Spaces:
Runtime error
Runtime error
moonahhyun
commited on
Commit
•
b1fe96d
1
Parent(s):
164142a
Update app.py
Browse files
app.py
CHANGED
@@ -1,7 +1,8 @@
|
|
1 |
import streamlit as st
|
2 |
from transformers import pipeline, AutoTokenizer, AutoModelForSequenceClassification
|
3 |
from datasets import load_dataset
|
4 |
-
|
|
|
5 |
dataset = load_dataset('HUPD/hupd',
|
6 |
name='sample',
|
7 |
data_files="https://huggingface.co/datasets/HUPD/hupd/blob/main/hupd_metadata_2022-02-22.feather",
|
@@ -11,7 +12,9 @@ dataset = load_dataset('HUPD/hupd',
|
|
11 |
val_filing_start_date='2016-01-30',
|
12 |
val_filing_end_date='2016-01-31',
|
13 |
)
|
|
|
14 |
exclude_pending = dataset.filter(lambda example: example["decision"] != "PENDING")
|
|
|
15 |
data = exclude_pending["validation"]
|
16 |
p_number = data["patent_number"]
|
17 |
p_abstract = data["abstract"]
|
@@ -20,7 +23,7 @@ p_decision = data["decision"]
|
|
20 |
abstract = ""
|
21 |
claims = ""
|
22 |
decision = ""
|
23 |
-
|
24 |
st.title("Patentability Score")
|
25 |
st.write("Select a patent ID and click submit to get patentability score.")
|
26 |
selected_id = st.selectbox("Patent ID:", p_number, index=len(p_number)-1)
|
@@ -28,26 +31,26 @@ selected_index = p_number.index(selected_id)
|
|
28 |
abstract = p_abstract[selected_index]
|
29 |
claims = p_claims[selected_index]
|
30 |
decision = p_decision[selected_index]
|
31 |
-
|
32 |
st.text_area("Abstract:", abstract)
|
33 |
st.text_area("Claims:", claims)
|
34 |
-
|
35 |
def get_pipeline():
|
36 |
model = AutoModelForSequenceClassification.from_pretrained("moonahhyun/project-uspto")
|
37 |
tokenizer = AutoTokenizer.from_pretrained("moonahhyun/project-uspto")
|
38 |
pl = pipeline("sentiment-analysis", model=model, tokenizer=tokenizer)
|
39 |
return pl
|
40 |
-
|
41 |
-
# Load the model and perform sentiment analysis
|
42 |
if st.button("Submit"):
|
43 |
-
with st.spinner("Analyzing the
|
44 |
pl = get_pipeline()
|
45 |
result = pl(abstract)
|
46 |
label = result[0]['label']
|
47 |
score = result[0]['score']
|
|
|
48 |
if label == "ACCEPTED":
|
49 |
st.write(f"Patentability Score: {score}")
|
50 |
st.write(f"Higher % of being: {label}")
|
|
|
51 |
else:
|
52 |
score = 1 - score
|
53 |
st.write(f"Patentability Score: {score}")
|
|
|
1 |
import streamlit as st
|
2 |
from transformers import pipeline, AutoTokenizer, AutoModelForSequenceClassification
|
3 |
from datasets import load_dataset
|
4 |
+
### Load HUPD dataset
|
5 |
+
# Sample a small subset of the dataset that corresponds to all patent applications submitted in Jan 2016.
|
6 |
dataset = load_dataset('HUPD/hupd',
|
7 |
name='sample',
|
8 |
data_files="https://huggingface.co/datasets/HUPD/hupd/blob/main/hupd_metadata_2022-02-22.feather",
|
|
|
12 |
val_filing_start_date='2016-01-30',
|
13 |
val_filing_end_date='2016-01-31',
|
14 |
)
|
15 |
+
### Exclude pending applications (also removed in finetuned model)
|
16 |
exclude_pending = dataset.filter(lambda example: example["decision"] != "PENDING")
|
17 |
+
### Extract necessary data lists
|
18 |
data = exclude_pending["validation"]
|
19 |
p_number = data["patent_number"]
|
20 |
p_abstract = data["abstract"]
|
|
|
23 |
abstract = ""
|
24 |
claims = ""
|
25 |
decision = ""
|
26 |
+
### Streamlit app
|
27 |
st.title("Patentability Score")
|
28 |
st.write("Select a patent ID and click submit to get patentability score.")
|
29 |
selected_id = st.selectbox("Patent ID:", p_number, index=len(p_number)-1)
|
|
|
31 |
abstract = p_abstract[selected_index]
|
32 |
claims = p_claims[selected_index]
|
33 |
decision = p_decision[selected_index]
|
|
|
34 |
st.text_area("Abstract:", abstract)
|
35 |
st.text_area("Claims:", claims)
|
36 |
+
### Load model and tokenizer
|
37 |
def get_pipeline():
|
38 |
model = AutoModelForSequenceClassification.from_pretrained("moonahhyun/project-uspto")
|
39 |
tokenizer = AutoTokenizer.from_pretrained("moonahhyun/project-uspto")
|
40 |
pl = pipeline("sentiment-analysis", model=model, tokenizer=tokenizer)
|
41 |
return pl
|
42 |
+
### Perform patentability analysis
|
|
|
43 |
if st.button("Submit"):
|
44 |
+
with st.spinner("Analyzing the patentability..."):
|
45 |
pl = get_pipeline()
|
46 |
result = pl(abstract)
|
47 |
label = result[0]['label']
|
48 |
score = result[0]['score']
|
49 |
+
# Print score when label = accepted
|
50 |
if label == "ACCEPTED":
|
51 |
st.write(f"Patentability Score: {score}")
|
52 |
st.write(f"Higher % of being: {label}")
|
53 |
+
# Print 1 - score when label = rejected
|
54 |
else:
|
55 |
score = 1 - score
|
56 |
st.write(f"Patentability Score: {score}")
|