DeepSoft-Tech
commited on
Commit
•
757c949
1
Parent(s):
e157fed
Update app.py
Browse files
app.py
CHANGED
@@ -16,46 +16,13 @@ from keybert import KeyBERT
|
|
16 |
from keyphrase_vectorizers import KeyphraseCountVectorizer
|
17 |
kw_model=KeyBERT(model='AI-Growth-Lab/PatentSBERTa')
|
18 |
|
19 |
-
|
20 |
-
region_name='us-east-1',
|
21 |
-
aws_access_key_id='AKIA3VGKPNV5NSVBJWEE',
|
22 |
-
aws_secret_access_key='LtdbeuggNR1hbvwwzOp0WCYaSXYmYMl7S0nOcjEx')
|
23 |
-
|
24 |
-
INDEX_API_KEY='b33ddf5d-5b1a-4d0e-9a3f-572008563791'
|
25 |
-
INDEX_DIMENSION=768
|
26 |
-
INDEX_ENV='gcp-starter'
|
27 |
-
INDEX_NAME='wiki-index'
|
28 |
-
|
29 |
-
# getting Pinecone credntials
|
30 |
-
# INDEX_DIMENSION=768
|
31 |
-
# logging.info(f"Index dimensions are:{INDEX_DIMENSION}")
|
32 |
pinecone.init(api_key=INDEX_API_KEY, environment=INDEX_ENV)
|
33 |
index = pinecone.Index(index_name=INDEX_NAME )
|
34 |
|
35 |
tokenizer = AutoTokenizer.from_pretrained('intfloat/e5-base')
|
36 |
model = AutoModel.from_pretrained('intfloat/e5-base')
|
37 |
|
38 |
-
# data=pd.read_csv("wikicat_all.csv")
|
39 |
-
|
40 |
-
def get_pat_text(pnkc_no):
|
41 |
-
pat_data=Patent_DataCreator(pnkc_no)
|
42 |
-
bib_key,pnkc_without_kindcode,pnkc_suffix=pat_data.get_bib_key()
|
43 |
-
bib_bucket=pat_data.get_bib_bucket()
|
44 |
-
bib_data=pat_data.get_bib_data(s3)
|
45 |
-
claims_data=pat_data.get_claims_data(s3)
|
46 |
-
desc_data=pat_data.get_desc_data(s3)
|
47 |
-
df1,df2,df3=pat_data.get_patent_dfs()
|
48 |
-
dataset=pat_data.get_patent_dataset()
|
49 |
-
|
50 |
-
Title=dataset[1]['Title'][0]
|
51 |
-
Abstract=dataset[1]['Abstract'][0]
|
52 |
-
Claims=dataset[1]['Claims'][0]
|
53 |
-
Description=dataset[1]['Description'][0]
|
54 |
-
# SOI=dataset[1]['SOI'][0]
|
55 |
-
|
56 |
-
pat_text= Title+Abstract
|
57 |
-
return pat_text
|
58 |
-
|
59 |
|
60 |
# Function to fetch categories, title, and related text from a Wikipedia page
|
61 |
def fetch_wikipedia_data(article_title):
|
@@ -100,24 +67,7 @@ def get_wiki_category_aprch_1(pat_text):
|
|
100 |
res = [i for n, i in enumerate(result) if i not in result[:n]]
|
101 |
return titles,res
|
102 |
|
103 |
-
|
104 |
-
# print(pat_text)
|
105 |
-
# keywords=kw_model.extract_keywords(pat_text,keyphrase_ngram_range=(1, 3),top_n=10,vectorizer=KeyphraseCountVectorizer())
|
106 |
-
# titles=[]
|
107 |
-
# for i in range(len(keywords)):
|
108 |
-
# title=keywords[i][0]
|
109 |
-
# titles.append(title)
|
110 |
-
# data = []
|
111 |
-
# for i in titles:
|
112 |
-
# results = fetch_wikipedia_data(i)
|
113 |
-
# data.append(results)
|
114 |
-
# cats=[]
|
115 |
-
# for i in range(len(data)):
|
116 |
-
# if data[i] is not None:
|
117 |
-
# cat=data[i]['categories']
|
118 |
-
# cats.append(cat)
|
119 |
-
# result=[j for i in cats for j in i]
|
120 |
-
# res = [i for n, i in enumerate(result) if i not in result[:n]]
|
121 |
# return res
|
122 |
|
123 |
def average_pool(last_hidden_states: Tensor,
|
@@ -161,16 +111,9 @@ def get_wiki_category(pat_text):
|
|
161 |
def main():
|
162 |
st.title('Wiki Classifier')
|
163 |
|
164 |
-
|
165 |
-
pat_text = st.text_area("Enter a text paragraph:")
|
166 |
|
167 |
if st.button('Get Wiki categories'):
|
168 |
-
|
169 |
-
if pnkc_no:
|
170 |
-
text = get_pat_text(pnkc_no)
|
171 |
-
else:
|
172 |
-
text=pat_text
|
173 |
-
|
174 |
st.write("Predicting Wiki Categories for text:",text[:200])
|
175 |
start_time = time.time()
|
176 |
titles,wiki_categories=get_wiki_category_aprch_1(text)
|
|
|
16 |
from keyphrase_vectorizers import KeyphraseCountVectorizer
|
17 |
kw_model=KeyBERT(model='AI-Growth-Lab/PatentSBERTa')
|
18 |
|
19 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
20 |
pinecone.init(api_key=INDEX_API_KEY, environment=INDEX_ENV)
|
21 |
index = pinecone.Index(index_name=INDEX_NAME )
|
22 |
|
23 |
tokenizer = AutoTokenizer.from_pretrained('intfloat/e5-base')
|
24 |
model = AutoModel.from_pretrained('intfloat/e5-base')
|
25 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
26 |
|
27 |
# Function to fetch categories, title, and related text from a Wikipedia page
|
28 |
def fetch_wikipedia_data(article_title):
|
|
|
67 |
res = [i for n, i in enumerate(result) if i not in result[:n]]
|
68 |
return titles,res
|
69 |
|
70 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
71 |
# return res
|
72 |
|
73 |
def average_pool(last_hidden_states: Tensor,
|
|
|
111 |
def main():
|
112 |
st.title('Wiki Classifier')
|
113 |
|
114 |
+
text = st.text_area("Enter a text paragraph:")
|
|
|
115 |
|
116 |
if st.button('Get Wiki categories'):
|
|
|
|
|
|
|
|
|
|
|
|
|
117 |
st.write("Predicting Wiki Categories for text:",text[:200])
|
118 |
start_time = time.time()
|
119 |
titles,wiki_categories=get_wiki_category_aprch_1(text)
|