Update ✨Entity Linking Application✨.py
Browse files
✨Entity Linking Application✨.py
CHANGED
@@ -13,10 +13,18 @@ import streamlit as st
|
|
13 |
import time
|
14 |
from openai import OpenAI
|
15 |
import sys
|
16 |
-
#from googlesearch import search
|
17 |
import time
|
|
|
|
|
|
|
18 |
|
19 |
|
|
|
|
|
|
|
|
|
|
|
|
|
20 |
folder_path = '/home/user/app/qids_folder'
|
21 |
|
22 |
if not os.path.exists(folder_path):
|
@@ -72,11 +80,14 @@ async def mains(name, single, combi):
|
|
72 |
qids = set()
|
73 |
|
74 |
async with aiohttp.ClientSession() as session:
|
75 |
-
url = f"https://
|
76 |
-
|
77 |
-
|
78 |
-
|
79 |
-
|
|
|
|
|
|
|
80 |
|
81 |
wikipedia_url = f"https://en.wikipedia.org/w/api.php?action=query&list=search&srsearch={name}&srlimit=1&srprop=&srenablerewrites=True&srinfo=suggestion&format=json"
|
82 |
json_data = await fetch_json(wikipedia_url, session)
|
|
|
13 |
import time
|
14 |
from openai import OpenAI
|
15 |
import sys
|
|
|
16 |
import time
|
17 |
+
from bs4 import BeautifulSoup
|
18 |
+
from fake_useragent import UserAgent
|
19 |
+
import requests
|
20 |
|
21 |
|
22 |
+
ua = UserAgent()
|
23 |
+
|
24 |
+
headers = {
|
25 |
+
"User-Agent": f"{ua.random}"
|
26 |
+
}
|
27 |
+
|
28 |
folder_path = '/home/user/app/qids_folder'
|
29 |
|
30 |
if not os.path.exists(folder_path):
|
|
|
80 |
qids = set()
|
81 |
|
82 |
async with aiohttp.ClientSession() as session:
|
83 |
+
url = f"https://www.google.com/search?q={name} site:en.wikipedia.org inurl:/wiki/ -inurl:? -inurl:Category: -inurl:File: -inurl:Special: -inurl:Help:"
|
84 |
+
html = requests.get(url, headers=headers)
|
85 |
+
soup = BeautifulSoup(html.text, "html.parser")
|
86 |
+
elements_with_href = soup.find_all(href=True)
|
87 |
+
href_links = [element['href'] for element in elements_with_href]
|
88 |
+
for link in href_links:
|
89 |
+
if link.startswith('https://en.wikipedia.org/wiki/'):
|
90 |
+
data.add(link.split("/")[-1])
|
91 |
|
92 |
wikipedia_url = f"https://en.wikipedia.org/w/api.php?action=query&list=search&srsearch={name}&srlimit=1&srprop=&srenablerewrites=True&srinfo=suggestion&format=json"
|
93 |
json_data = await fetch_json(wikipedia_url, session)
|