Web-Rag / scraper.py
TharaKavin's picture
Update scraper.py
78a3b6c verified
raw
history blame contribute delete
608 Bytes
from scrapling.fetchers import Fetcher
def scrape_url(url: str) -> str:
try:
page = Fetcher.get(url)
# Extract raw text safely
elements = page.css("body *::text")
texts = []
for el in elements:
try:
# Try extracting text
txt = str(el)
if txt:
texts.append(txt)
except:
continue
cleaned = [t.strip() for t in texts if t.strip()]
return " ".join(cleaned)
except Exception as e:
print("SCRAPING ERROR:", e)
return ""