KingNish commited on
Commit
1ee12e5
1 Parent(s): 95a34b4

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +8 -2
app.py CHANGED
@@ -177,7 +177,10 @@ async def chat(
177
 
178
  def extract_text_from_webpage(html_content):
179
  """Extracts visible text from HTML content using BeautifulSoup."""
180
- return BeautifulSoup(html_content).get_text(strip=True)
 
 
 
181
 
182
  async def fetch_and_extract(url, max_chars, proxy: Optional[str] = None):
183
  """Fetches a URL and extracts text asynchronously."""
@@ -242,7 +245,10 @@ async def web_search_and_extract(
242
 
243
  def extract_text_from_webpage2(html_content):
244
  """Extracts visible text from HTML content using BeautifulSoup."""
245
- return BeautifulSoup(html_content).get_text(strip=True)
 
 
 
246
 
247
  def fetch_and_extract2(url, max_chars):
248
  """Fetches a URL and extracts text using threading."""
 
177
 
178
  def extract_text_from_webpage(html_content):
179
  """Extracts visible text from HTML content using BeautifulSoup."""
180
+ soup = BeautifulSoup(html_content)
181
+ for tag in soup(["script", "style", "header", "footer"]):
182
+ tag.extract()
183
+ return soup.get_text(strip=True)
184
 
185
  async def fetch_and_extract(url, max_chars, proxy: Optional[str] = None):
186
  """Fetches a URL and extracts text asynchronously."""
 
245
 
246
  def extract_text_from_webpage2(html_content):
247
  """Extracts visible text from HTML content using BeautifulSoup."""
248
+ soup = BeautifulSoup(html_content)
249
+ for tag in soup(["script", "style", "header", "footer"]):
250
+ tag.extract()
251
+ return soup.get_text(strip=True)
252
 
253
  def fetch_and_extract2(url, max_chars):
254
  """Fetches a URL and extracts text using threading."""