Spaces:

TharaKavin
/

Web-Rag

Running

Web-Rag / scraper.py

Update scraper.py

78a3b6c verified 10 days ago

608 Bytes

	from scrapling.fetchers import Fetcher

	def scrape_url(url: str) -> str:
	try:
	page = Fetcher.get(url)

	# Extract raw text safely
	elements = page.css("body *::text")

	texts = []

	for el in elements:
	try:
	# Try extracting text
	txt = str(el)
	if txt:
	texts.append(txt)
	except:
	continue

	cleaned = [t.strip() for t in texts if t.strip()]

	return " ".join(cleaned)

	except Exception as e:
	print("SCRAPING ERROR:", e)
	return ""