Spaces:
Sleeping
Sleeping
File size: 2,326 Bytes
a007bfc |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 |
import aiohttp
import asyncio
from bs4 import BeautifulSoup
from urllib.parse import urlparse, parse_qs
# Асинхронный запрос к странице
async def fetch(session, url):
try:
async with session.get(url, timeout=10) as response:
return await response.text()
except Exception as e:
return ""
# Асинхронное получение текста страницы
async def get_page_text(session, url):
html = await fetch(session, url)
if not html:
return "Текст не найден"
soup = BeautifulSoup(html, 'html.parser')
body = soup.find('body')
if body:
return body.get_text(separator='\n', strip=True)
return "Текст не найден"
# Асинхронный поиск информации
async def search_info(prompt):
query = prompt.replace(' ', '+')
search_url = f"https://www.google.com/search?q={query}"
async with aiohttp.ClientSession() as session:
html = await fetch(session, search_url)
if not html:
return []
soup = BeautifulSoup(html, 'html.parser')
links = []
for item in soup.find_all('h3'):
parent = item.find_parent('a')
if parent and 'href' in parent.attrs:
link = parent['href']
parsed_url = urlparse(link)
if parsed_url.path == '/url':
query_params = parse_qs(parsed_url.query)
if 'q' in query_params:
links.append(query_params['q'][0])
return links
# Основной асинхронный цикл
async def main():
prompt = input("Введите запрос для поиска: ")
results = await search_info(prompt)
if not results:
print("Ничего не найдено.")
return
async with aiohttp.ClientSession() as session:
tasks = [get_page_text(session, link) for link in results[:5]] # Ограничение до 5 ссылок для скорости
texts = await asyncio.gather(*tasks)
for link, text in zip(results, texts):
print(f"Ссылка: {link}")
print(f"Текст: {text}\n")
# Запуск программы
if __name__ == "__main__":
asyncio.run(main())
|