TharaKavin commited on
Commit
1cc2027
·
verified ·
1 Parent(s): e008495

Update scraper.py

Browse files
Files changed (1) hide show
  1. scraper.py +4 -7
scraper.py CHANGED
@@ -4,14 +4,11 @@ def scrape_url(url: str) -> str:
4
  try:
5
  page = Fetcher.get(url)
6
 
7
- # Try method 1 (new versions)
8
- try:
9
- texts = page.css("body *::text").getall()
10
- except:
11
- # Fallback for older versions
12
- texts = [t.get() for t in page.css("body *::text")]
13
 
14
- cleaned = [t.strip() for t in texts if t and t.strip()]
 
15
 
16
  return " ".join(cleaned)
17
 
 
4
  try:
5
  page = Fetcher.get(url)
6
 
7
+ # Extract text properly
8
+ texts = page.css("body *::text").getall()
 
 
 
 
9
 
10
+ # Clean text
11
+ cleaned = [t.strip() for t in texts if t.strip()]
12
 
13
  return " ".join(cleaned)
14