NikosKprl commited on
Commit
c967ca0
·
verified ·
1 Parent(s): ba3bedb

Update ✨Entity Linking Application✨.py

Browse files
Files changed (1) hide show
  1. ✨Entity Linking Application✨.py +17 -6
✨Entity Linking Application✨.py CHANGED
@@ -13,10 +13,18 @@ import streamlit as st
13
  import time
14
  from openai import OpenAI
15
  import sys
16
- #from googlesearch import search
17
  import time
 
 
 
18
 
19
 
 
 
 
 
 
 
20
  folder_path = '/home/user/app/qids_folder'
21
 
22
  if not os.path.exists(folder_path):
@@ -72,11 +80,14 @@ async def mains(name, single, combi):
72
  qids = set()
73
 
74
  async with aiohttp.ClientSession() as session:
75
- url = f"https://en.wikipedia.org/w/api.php?action=query&list=search&srsearch={name}&srlimit=10&srprop=&srenablerewrites=True&srinfo=suggestion&format=json"
76
- json_suggestion = await fetch_json(url, session)
77
- results = json_suggestion.get('query', {}).get('search')
78
- for i in results:
79
- data.add(i.get('title'))
 
 
 
80
 
81
  wikipedia_url = f"https://en.wikipedia.org/w/api.php?action=query&list=search&srsearch={name}&srlimit=1&srprop=&srenablerewrites=True&srinfo=suggestion&format=json"
82
  json_data = await fetch_json(wikipedia_url, session)
 
13
  import time
14
  from openai import OpenAI
15
  import sys
 
16
  import time
17
+ from bs4 import BeautifulSoup
18
+ from fake_useragent import UserAgent
19
+ import requests
20
 
21
 
22
+ ua = UserAgent()
23
+
24
+ headers = {
25
+ "User-Agent": f"{ua.random}"
26
+ }
27
+
28
  folder_path = '/home/user/app/qids_folder'
29
 
30
  if not os.path.exists(folder_path):
 
80
  qids = set()
81
 
82
  async with aiohttp.ClientSession() as session:
83
+ url = f"https://www.google.com/search?q={name} site:en.wikipedia.org inurl:/wiki/ -inurl:? -inurl:Category: -inurl:File: -inurl:Special: -inurl:Help:"
84
+ html = requests.get(url, headers=headers)
85
+ soup = BeautifulSoup(html.text, "html.parser")
86
+ elements_with_href = soup.find_all(href=True)
87
+ href_links = [element['href'] for element in elements_with_href]
88
+ for link in href_links:
89
+ if link.startswith('https://en.wikipedia.org/wiki/'):
90
+ data.add(link.split("/")[-1])
91
 
92
  wikipedia_url = f"https://en.wikipedia.org/w/api.php?action=query&list=search&srsearch={name}&srlimit=1&srprop=&srenablerewrites=True&srinfo=suggestion&format=json"
93
  json_data = await fetch_json(wikipedia_url, session)