DeepSoft-Tech commited on
Commit
757c949
1 Parent(s): e157fed

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +3 -60
app.py CHANGED
@@ -16,46 +16,13 @@ from keybert import KeyBERT
16
  from keyphrase_vectorizers import KeyphraseCountVectorizer
17
  kw_model=KeyBERT(model='AI-Growth-Lab/PatentSBERTa')
18
 
19
- s3 = boto3.resource('s3',
20
- region_name='us-east-1',
21
- aws_access_key_id='AKIA3VGKPNV5NSVBJWEE',
22
- aws_secret_access_key='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')
23
-
24
- INDEX_API_KEY='b33ddf5d-5b1a-4d0e-9a3f-572008563791'
25
- INDEX_DIMENSION=768
26
- INDEX_ENV='gcp-starter'
27
- INDEX_NAME='wiki-index'
28
-
29
- # getting Pinecone credntials
30
- # INDEX_DIMENSION=768
31
- # logging.info(f"Index dimensions are:{INDEX_DIMENSION}")
32
  pinecone.init(api_key=INDEX_API_KEY, environment=INDEX_ENV)
33
  index = pinecone.Index(index_name=INDEX_NAME )
34
 
35
  tokenizer = AutoTokenizer.from_pretrained('intfloat/e5-base')
36
  model = AutoModel.from_pretrained('intfloat/e5-base')
37
 
38
- # data=pd.read_csv("wikicat_all.csv")
39
-
40
- def get_pat_text(pnkc_no):
41
- pat_data=Patent_DataCreator(pnkc_no)
42
- bib_key,pnkc_without_kindcode,pnkc_suffix=pat_data.get_bib_key()
43
- bib_bucket=pat_data.get_bib_bucket()
44
- bib_data=pat_data.get_bib_data(s3)
45
- claims_data=pat_data.get_claims_data(s3)
46
- desc_data=pat_data.get_desc_data(s3)
47
- df1,df2,df3=pat_data.get_patent_dfs()
48
- dataset=pat_data.get_patent_dataset()
49
-
50
- Title=dataset[1]['Title'][0]
51
- Abstract=dataset[1]['Abstract'][0]
52
- Claims=dataset[1]['Claims'][0]
53
- Description=dataset[1]['Description'][0]
54
- # SOI=dataset[1]['SOI'][0]
55
-
56
- pat_text= Title+Abstract
57
- return pat_text
58
-
59
 
60
  # Function to fetch categories, title, and related text from a Wikipedia page
61
  def fetch_wikipedia_data(article_title):
@@ -100,24 +67,7 @@ def get_wiki_category_aprch_1(pat_text):
100
  res = [i for n, i in enumerate(result) if i not in result[:n]]
101
  return titles,res
102
 
103
- # def get_wiki_category_aprch_2(pat_text):
104
- # print(pat_text)
105
- # keywords=kw_model.extract_keywords(pat_text,keyphrase_ngram_range=(1, 3),top_n=10,vectorizer=KeyphraseCountVectorizer())
106
- # titles=[]
107
- # for i in range(len(keywords)):
108
- # title=keywords[i][0]
109
- # titles.append(title)
110
- # data = []
111
- # for i in titles:
112
- # results = fetch_wikipedia_data(i)
113
- # data.append(results)
114
- # cats=[]
115
- # for i in range(len(data)):
116
- # if data[i] is not None:
117
- # cat=data[i]['categories']
118
- # cats.append(cat)
119
- # result=[j for i in cats for j in i]
120
- # res = [i for n, i in enumerate(result) if i not in result[:n]]
121
  # return res
122
 
123
  def average_pool(last_hidden_states: Tensor,
@@ -161,16 +111,9 @@ def get_wiki_category(pat_text):
161
  def main():
162
  st.title('Wiki Classifier')
163
 
164
- pnkc_no = st.text_input("Enter a pnkc number:")
165
- pat_text = st.text_area("Enter a text paragraph:")
166
 
167
  if st.button('Get Wiki categories'):
168
-
169
- if pnkc_no:
170
- text = get_pat_text(pnkc_no)
171
- else:
172
- text=pat_text
173
-
174
  st.write("Predicting Wiki Categories for text:",text[:200])
175
  start_time = time.time()
176
  titles,wiki_categories=get_wiki_category_aprch_1(text)
 
16
  from keyphrase_vectorizers import KeyphraseCountVectorizer
17
  kw_model=KeyBERT(model='AI-Growth-Lab/PatentSBERTa')
18
 
19
+
 
 
 
 
 
 
 
 
 
 
 
 
20
  pinecone.init(api_key=INDEX_API_KEY, environment=INDEX_ENV)
21
  index = pinecone.Index(index_name=INDEX_NAME )
22
 
23
  tokenizer = AutoTokenizer.from_pretrained('intfloat/e5-base')
24
  model = AutoModel.from_pretrained('intfloat/e5-base')
25
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
26
 
27
  # Function to fetch categories, title, and related text from a Wikipedia page
28
  def fetch_wikipedia_data(article_title):
 
67
  res = [i for n, i in enumerate(result) if i not in result[:n]]
68
  return titles,res
69
 
70
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
71
  # return res
72
 
73
  def average_pool(last_hidden_states: Tensor,
 
111
  def main():
112
  st.title('Wiki Classifier')
113
 
114
+ text = st.text_area("Enter a text paragraph:")
 
115
 
116
  if st.button('Get Wiki categories'):
 
 
 
 
 
 
117
  st.write("Predicting Wiki Categories for text:",text[:200])
118
  start_time = time.time()
119
  titles,wiki_categories=get_wiki_category_aprch_1(text)