Benjamin S Liang commited on
Commit
2d7ed25
1 Parent(s): 480b6a8

Updated app with patent abstract dropdown

Browse files
Files changed (2) hide show
  1. app.py +28 -6
  2. finetunehupd.ipynb +46 -4
app.py CHANGED
@@ -5,6 +5,24 @@ from transformers import pipeline, DistilBertForSequenceClassification, DistilBe
5
  MODEL_OPTS = ['finetuned', 'default', 'bertweet-base-sentiment-analysis', 'twitter-roberta-base', 'distilRoberta-financial-sentiment']
6
  FINETUNED_OPT = MODEL_OPTS[0]
7
  DEFAULT_OPT = MODEL_OPTS[1]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8
 
9
  # returns loaded model and tokenizer, if any
10
  def load_model(opt):
@@ -37,19 +55,23 @@ def sentiment_analysis(model, tokenizer):
37
  return pipeline('text-classification', model=model, tokenizer=tokenizer)
38
  else: return pipeline('text-classification')
39
 
40
- # Title the Streamlit app 'Sentiment Analysis'
41
- st.title('Sentiment Analysis')
42
  st.markdown('Link to the app - [sentiment-analysis-app](https://huggingface.co/spaces/saccharinedreams/sentiment-analysis-app)')
43
 
 
 
 
44
  # Take in user input
45
- user_text = st.text_input('Input text to perform sentiment analysis on here.', 'I love AI!')
46
 
47
  # The user can interact with a dropdown menu to choose a sentiment analysis model.
48
- dropdown_value = st.selectbox('Select one of the following sentiment analysis models', MODEL_OPTS, index=MODEL_OPTS.index(DEFAULT_OPT))
49
- model, tokenizer = load_model(dropdown_value)
 
50
 
51
  # Perform sentiment analysis on the user's input
52
- result = sentiment_analysis(model, tokenizer)(user_text)
53
 
54
  # Display the sentiment analysis results
55
  st.write('Sentiment:', result[0]['label'], '; Score:', result[0]['score'])
 
5
  MODEL_OPTS = ['finetuned', 'default', 'bertweet-base-sentiment-analysis', 'twitter-roberta-base', 'distilRoberta-financial-sentiment']
6
  FINETUNED_OPT = MODEL_OPTS[0]
7
  DEFAULT_OPT = MODEL_OPTS[1]
8
+ abstracts = []
9
+
10
+ # Helper function
11
+ def map_decision_to_string(example):
12
+ return {'decision': decision_to_str[example['decision']]}
13
+
14
+ def load_abstracts():
15
+ dataset_dict = load_dataset('HUPD/hupd',
16
+ name='sample',
17
+ data_files="https://huggingface.co/datasets/HUPD/hupd/blob/main/hupd_metadata_2022-02-22.feather",
18
+ icpr_label=None,
19
+ train_filing_start_date='2016-01-01',
20
+ train_filing_end_date='2016-01-31',
21
+ val_filing_start_date='2016-01-01',
22
+ val_filing_end_date='2016-01-01',
23
+ )
24
+ abstracts = dataset_dict['train']['abstract']
25
+ dataset_dict = [] # free up space
26
 
27
  # returns loaded model and tokenizer, if any
28
  def load_model(opt):
 
55
  return pipeline('text-classification', model=model, tokenizer=tokenizer)
56
  else: return pipeline('text-classification')
57
 
58
+ # Title the Streamlit app 'Finetuned Harvard USPTO Patent Dataset (using DistilBert-Base-Uncased)'
59
+ st.title('Finetuned Harvard USPTO Patent Dataset (using DistilBert-Base-Uncased)')
60
  st.markdown('Link to the app - [sentiment-analysis-app](https://huggingface.co/spaces/saccharinedreams/sentiment-analysis-app)')
61
 
62
+ dropdown_abstracts = st.selectbox('Select one of the following abstracts from the HUPD dataset:', abstracts, index=abstracts.index(abstracts[0]))
63
+ model, tokenizer = load_model('finetuned')
64
+
65
  # Take in user input
66
+ #user_text = st.text_input('Input text to perform sentiment analysis on here.', 'I love AI!')
67
 
68
  # The user can interact with a dropdown menu to choose a sentiment analysis model.
69
+ #dropdown_value = st.selectbox('Select one of the following sentiment analysis models', MODEL_OPTS, index=MODEL_OPTS.index(DEFAULT_OPT))
70
+ #model, tokenizer = load_model(dropdown_value)
71
+
72
 
73
  # Perform sentiment analysis on the user's input
74
+ result = sentiment_analysis(model, tokenizer)(dropdown_abstracts)
75
 
76
  # Display the sentiment analysis results
77
  st.write('Sentiment:', result[0]['label'], '; Score:', result[0]['score'])
finetunehupd.ipynb CHANGED
@@ -2,7 +2,7 @@
2
  "cells": [
3
  {
4
  "cell_type": "code",
5
- "execution_count": 1,
6
  "id": "1df3c609-62a6-49c3-bcc6-29c520f9501c",
7
  "metadata": {},
8
  "outputs": [],
@@ -19,7 +19,7 @@
19
  },
20
  {
21
  "cell_type": "code",
22
- "execution_count": 2,
23
  "id": "58167c28-eb27-4f82-b7d0-8216dbeaf650",
24
  "metadata": {},
25
  "outputs": [
@@ -33,7 +33,7 @@
33
  {
34
  "data": {
35
  "application/vnd.jupyter.widget-view+json": {
36
- "model_id": "345008775bf549b5a548948949710507",
37
  "version_major": 2,
38
  "version_minor": 0
39
  },
@@ -1262,9 +1262,51 @@
1262
  },
1263
  {
1264
  "cell_type": "code",
1265
- "execution_count": null,
1266
  "id": "9b496593-c0de-4ce2-95d5-d5d3bf09d93c",
1267
  "metadata": {},
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1268
  "outputs": [],
1269
  "source": []
1270
  }
 
2
  "cells": [
3
  {
4
  "cell_type": "code",
5
+ "execution_count": 2,
6
  "id": "1df3c609-62a6-49c3-bcc6-29c520f9501c",
7
  "metadata": {},
8
  "outputs": [],
 
19
  },
20
  {
21
  "cell_type": "code",
22
+ "execution_count": 3,
23
  "id": "58167c28-eb27-4f82-b7d0-8216dbeaf650",
24
  "metadata": {},
25
  "outputs": [
 
33
  {
34
  "data": {
35
  "application/vnd.jupyter.widget-view+json": {
36
+ "model_id": "a2f090474cb148548ce3eb73698fcc6c",
37
  "version_major": 2,
38
  "version_minor": 0
39
  },
 
1262
  },
1263
  {
1264
  "cell_type": "code",
1265
+ "execution_count": 5,
1266
  "id": "9b496593-c0de-4ce2-95d5-d5d3bf09d93c",
1267
  "metadata": {},
1268
+ "outputs": [
1269
+ {
1270
+ "data": {
1271
+ "text/plain": [
1272
+ "'The present invention relates to passive optical network (PON), and in particular, to an optical network terminal (ONT) in the PON system. In one embodiment, the optical network terminal includes a first interface coupled to a communications network, a second interface coupled to a network client and a processor including a memory coupled to the first interface and to the second interface, wherein the processor is capable of converting optical signals to electric signals, such that the network client can access the communications network.'"
1273
+ ]
1274
+ },
1275
+ "execution_count": 5,
1276
+ "metadata": {},
1277
+ "output_type": "execute_result"
1278
+ }
1279
+ ],
1280
+ "source": [
1281
+ "dataset_dict['train']['abstract'][0]"
1282
+ ]
1283
+ },
1284
+ {
1285
+ "cell_type": "code",
1286
+ "execution_count": 6,
1287
+ "id": "6b6ad778-15aa-492a-9484-40106269e10d",
1288
+ "metadata": {},
1289
+ "outputs": [
1290
+ {
1291
+ "data": {
1292
+ "text/plain": [
1293
+ "'Embodiments of the invention provide a method of reading and verifying a tag based on inherent disorder during a manufacturing process. The method includes using a first reader to take a first reading of an inherent disorder feature of the tag, and using a second reader to take a second reading of the inherent disorder feature of the tag. The method further includes matching the first reading with the second reading, and determining one or more acceptance criteria, wherein at least one of the acceptance criteria is based on whether the first reading and the second reading match within a predetermined threshold. If the acceptance criteria are met, then the tag is accepted, and a fingerprint for the tag is recorded. The invention further provides a method of testing and characterizing a reader of inherent disorder tags during a manufacturing process. The method includes taking a reading of a known inherent disorder tag, using the reading to measure a characteristic of the reader, and storing the measured characteristic for use when reading inherent disorder tags.'"
1294
+ ]
1295
+ },
1296
+ "execution_count": 6,
1297
+ "metadata": {},
1298
+ "output_type": "execute_result"
1299
+ }
1300
+ ],
1301
+ "source": [
1302
+ "dataset_dict['train']['abstract'][1]"
1303
+ ]
1304
+ },
1305
+ {
1306
+ "cell_type": "code",
1307
+ "execution_count": null,
1308
+ "id": "cf9d94a7-4d7e-4f6b-88f8-46c9855289f4",
1309
+ "metadata": {},
1310
  "outputs": [],
1311
  "source": []
1312
  }