Spaces:
Runtime error
Runtime error
benliang99
commited on
Commit
•
2d7ed25
1
Parent(s):
480b6a8
Updated app with patent abstract dropdown
Browse files- app.py +28 -6
- finetunehupd.ipynb +46 -4
app.py
CHANGED
@@ -5,6 +5,24 @@ from transformers import pipeline, DistilBertForSequenceClassification, DistilBe
|
|
5 |
MODEL_OPTS = ['finetuned', 'default', 'bertweet-base-sentiment-analysis', 'twitter-roberta-base', 'distilRoberta-financial-sentiment']
|
6 |
FINETUNED_OPT = MODEL_OPTS[0]
|
7 |
DEFAULT_OPT = MODEL_OPTS[1]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
8 |
|
9 |
# returns loaded model and tokenizer, if any
|
10 |
def load_model(opt):
|
@@ -37,19 +55,23 @@ def sentiment_analysis(model, tokenizer):
|
|
37 |
return pipeline('text-classification', model=model, tokenizer=tokenizer)
|
38 |
else: return pipeline('text-classification')
|
39 |
|
40 |
-
# Title the Streamlit app '
|
41 |
-
st.title('
|
42 |
st.markdown('Link to the app - [sentiment-analysis-app](https://huggingface.co/spaces/saccharinedreams/sentiment-analysis-app)')
|
43 |
|
|
|
|
|
|
|
44 |
# Take in user input
|
45 |
-
user_text = st.text_input('Input text to perform sentiment analysis on here.', 'I love AI!')
|
46 |
|
47 |
# The user can interact with a dropdown menu to choose a sentiment analysis model.
|
48 |
-
dropdown_value = st.selectbox('Select one of the following sentiment analysis models', MODEL_OPTS, index=MODEL_OPTS.index(DEFAULT_OPT))
|
49 |
-
model, tokenizer = load_model(dropdown_value)
|
|
|
50 |
|
51 |
# Perform sentiment analysis on the user's input
|
52 |
-
result = sentiment_analysis(model, tokenizer)(
|
53 |
|
54 |
# Display the sentiment analysis results
|
55 |
st.write('Sentiment:', result[0]['label'], '; Score:', result[0]['score'])
|
|
|
5 |
MODEL_OPTS = ['finetuned', 'default', 'bertweet-base-sentiment-analysis', 'twitter-roberta-base', 'distilRoberta-financial-sentiment']
|
6 |
FINETUNED_OPT = MODEL_OPTS[0]
|
7 |
DEFAULT_OPT = MODEL_OPTS[1]
|
8 |
+
abstracts = []
|
9 |
+
|
10 |
+
# Helper function
|
11 |
+
def map_decision_to_string(example):
|
12 |
+
return {'decision': decision_to_str[example['decision']]}
|
13 |
+
|
14 |
+
def load_abstracts():
|
15 |
+
dataset_dict = load_dataset('HUPD/hupd',
|
16 |
+
name='sample',
|
17 |
+
data_files="https://huggingface.co/datasets/HUPD/hupd/blob/main/hupd_metadata_2022-02-22.feather",
|
18 |
+
icpr_label=None,
|
19 |
+
train_filing_start_date='2016-01-01',
|
20 |
+
train_filing_end_date='2016-01-31',
|
21 |
+
val_filing_start_date='2016-01-01',
|
22 |
+
val_filing_end_date='2016-01-01',
|
23 |
+
)
|
24 |
+
abstracts = dataset_dict['train']['abstract']
|
25 |
+
dataset_dict = [] # free up space
|
26 |
|
27 |
# returns loaded model and tokenizer, if any
|
28 |
def load_model(opt):
|
|
|
55 |
return pipeline('text-classification', model=model, tokenizer=tokenizer)
|
56 |
else: return pipeline('text-classification')
|
57 |
|
58 |
+
# Title the Streamlit app 'Finetuned Harvard USPTO Patent Dataset (using DistilBert-Base-Uncased)'
|
59 |
+
st.title('Finetuned Harvard USPTO Patent Dataset (using DistilBert-Base-Uncased)')
|
60 |
st.markdown('Link to the app - [sentiment-analysis-app](https://huggingface.co/spaces/saccharinedreams/sentiment-analysis-app)')
|
61 |
|
62 |
+
dropdown_abstracts = st.selectbox('Select one of the following abstracts from the HUPD dataset:', abstracts, index=abstracts.index(abstracts[0]))
|
63 |
+
model, tokenizer = load_model('finetuned')
|
64 |
+
|
65 |
# Take in user input
|
66 |
+
#user_text = st.text_input('Input text to perform sentiment analysis on here.', 'I love AI!')
|
67 |
|
68 |
# The user can interact with a dropdown menu to choose a sentiment analysis model.
|
69 |
+
#dropdown_value = st.selectbox('Select one of the following sentiment analysis models', MODEL_OPTS, index=MODEL_OPTS.index(DEFAULT_OPT))
|
70 |
+
#model, tokenizer = load_model(dropdown_value)
|
71 |
+
|
72 |
|
73 |
# Perform sentiment analysis on the user's input
|
74 |
+
result = sentiment_analysis(model, tokenizer)(dropdown_abstracts)
|
75 |
|
76 |
# Display the sentiment analysis results
|
77 |
st.write('Sentiment:', result[0]['label'], '; Score:', result[0]['score'])
|
finetunehupd.ipynb
CHANGED
@@ -2,7 +2,7 @@
|
|
2 |
"cells": [
|
3 |
{
|
4 |
"cell_type": "code",
|
5 |
-
"execution_count":
|
6 |
"id": "1df3c609-62a6-49c3-bcc6-29c520f9501c",
|
7 |
"metadata": {},
|
8 |
"outputs": [],
|
@@ -19,7 +19,7 @@
|
|
19 |
},
|
20 |
{
|
21 |
"cell_type": "code",
|
22 |
-
"execution_count":
|
23 |
"id": "58167c28-eb27-4f82-b7d0-8216dbeaf650",
|
24 |
"metadata": {},
|
25 |
"outputs": [
|
@@ -33,7 +33,7 @@
|
|
33 |
{
|
34 |
"data": {
|
35 |
"application/vnd.jupyter.widget-view+json": {
|
36 |
-
"model_id": "
|
37 |
"version_major": 2,
|
38 |
"version_minor": 0
|
39 |
},
|
@@ -1262,9 +1262,51 @@
|
|
1262 |
},
|
1263 |
{
|
1264 |
"cell_type": "code",
|
1265 |
-
"execution_count":
|
1266 |
"id": "9b496593-c0de-4ce2-95d5-d5d3bf09d93c",
|
1267 |
"metadata": {},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1268 |
"outputs": [],
|
1269 |
"source": []
|
1270 |
}
|
|
|
2 |
"cells": [
|
3 |
{
|
4 |
"cell_type": "code",
|
5 |
+
"execution_count": 2,
|
6 |
"id": "1df3c609-62a6-49c3-bcc6-29c520f9501c",
|
7 |
"metadata": {},
|
8 |
"outputs": [],
|
|
|
19 |
},
|
20 |
{
|
21 |
"cell_type": "code",
|
22 |
+
"execution_count": 3,
|
23 |
"id": "58167c28-eb27-4f82-b7d0-8216dbeaf650",
|
24 |
"metadata": {},
|
25 |
"outputs": [
|
|
|
33 |
{
|
34 |
"data": {
|
35 |
"application/vnd.jupyter.widget-view+json": {
|
36 |
+
"model_id": "a2f090474cb148548ce3eb73698fcc6c",
|
37 |
"version_major": 2,
|
38 |
"version_minor": 0
|
39 |
},
|
|
|
1262 |
},
|
1263 |
{
|
1264 |
"cell_type": "code",
|
1265 |
+
"execution_count": 5,
|
1266 |
"id": "9b496593-c0de-4ce2-95d5-d5d3bf09d93c",
|
1267 |
"metadata": {},
|
1268 |
+
"outputs": [
|
1269 |
+
{
|
1270 |
+
"data": {
|
1271 |
+
"text/plain": [
|
1272 |
+
"'The present invention relates to passive optical network (PON), and in particular, to an optical network terminal (ONT) in the PON system. In one embodiment, the optical network terminal includes a first interface coupled to a communications network, a second interface coupled to a network client and a processor including a memory coupled to the first interface and to the second interface, wherein the processor is capable of converting optical signals to electric signals, such that the network client can access the communications network.'"
|
1273 |
+
]
|
1274 |
+
},
|
1275 |
+
"execution_count": 5,
|
1276 |
+
"metadata": {},
|
1277 |
+
"output_type": "execute_result"
|
1278 |
+
}
|
1279 |
+
],
|
1280 |
+
"source": [
|
1281 |
+
"dataset_dict['train']['abstract'][0]"
|
1282 |
+
]
|
1283 |
+
},
|
1284 |
+
{
|
1285 |
+
"cell_type": "code",
|
1286 |
+
"execution_count": 6,
|
1287 |
+
"id": "6b6ad778-15aa-492a-9484-40106269e10d",
|
1288 |
+
"metadata": {},
|
1289 |
+
"outputs": [
|
1290 |
+
{
|
1291 |
+
"data": {
|
1292 |
+
"text/plain": [
|
1293 |
+
"'Embodiments of the invention provide a method of reading and verifying a tag based on inherent disorder during a manufacturing process. The method includes using a first reader to take a first reading of an inherent disorder feature of the tag, and using a second reader to take a second reading of the inherent disorder feature of the tag. The method further includes matching the first reading with the second reading, and determining one or more acceptance criteria, wherein at least one of the acceptance criteria is based on whether the first reading and the second reading match within a predetermined threshold. If the acceptance criteria are met, then the tag is accepted, and a fingerprint for the tag is recorded. The invention further provides a method of testing and characterizing a reader of inherent disorder tags during a manufacturing process. The method includes taking a reading of a known inherent disorder tag, using the reading to measure a characteristic of the reader, and storing the measured characteristic for use when reading inherent disorder tags.'"
|
1294 |
+
]
|
1295 |
+
},
|
1296 |
+
"execution_count": 6,
|
1297 |
+
"metadata": {},
|
1298 |
+
"output_type": "execute_result"
|
1299 |
+
}
|
1300 |
+
],
|
1301 |
+
"source": [
|
1302 |
+
"dataset_dict['train']['abstract'][1]"
|
1303 |
+
]
|
1304 |
+
},
|
1305 |
+
{
|
1306 |
+
"cell_type": "code",
|
1307 |
+
"execution_count": null,
|
1308 |
+
"id": "cf9d94a7-4d7e-4f6b-88f8-46c9855289f4",
|
1309 |
+
"metadata": {},
|
1310 |
"outputs": [],
|
1311 |
"source": []
|
1312 |
}
|