|
import streamlit as st |
|
from datasets import load_dataset |
|
from transformers import pipeline |
|
import pandas as pd |
|
import torch |
|
from transformers import AutoTokenizer, AutoModelForSequenceClassification, pipeline |
|
from datasets import load_dataset |
|
|
|
dataset_dict = load_dataset('HUPD/hupd', |
|
name='sample', |
|
data_files="https://huggingface.co/datasets/HUPD/hupd/blob/main/hupd_metadata_2022-02-22.feather", |
|
icpr_label=None, |
|
train_filing_start_date='2016-01-01', |
|
train_filing_end_date='2016-01-31', |
|
val_filing_start_date='2017-01-22', |
|
val_filing_end_date='2017-01-31', |
|
) |
|
|
|
df = pd.DataFrame.from_dict(dataset_dict["train"]) |
|
df = pd.DataFrame(df,columns =['patent_number','decision', 'abstract', 'claims','filing_date']) |
|
|
|
PAN = df['patent_number'].drop_duplicates() |
|
|
|
st.title('Harvard USPTO Patentability Score') |
|
|
|
|
|
|
|
with st.form("patent-form"): |
|
make_choice = st.selectbox('Select the Patent Application Number:', PAN) |
|
submitted = st.form_submit_button(label='submit') |
|
|
|
if submitted: |
|
|
|
model_name = "distilbert-base-uncased-finetuned-sst-2-english" |
|
model = AutoModelForSequenceClassification.from_pretrained(model_name) |
|
tokenizer = AutoTokenizer.from_pretrained(model_name) |
|
classifier = pipeline("sentiment-analysis", model=model, tokenizer=tokenizer) |
|
|
|
|
|
|
|
decision = df['decision'].loc[df['patent_number'] == make_choice] |
|
|
|
X_train = decision.to_string() |
|
|
|
results = classifier(X_train, truncation=True) |
|
|
|
for result in results: |
|
print(result) |
|
score = result['score'] |
|
print(score) |
|
st.write("The Patentability Score is:", score) |
|
|
|
|
|
|
|
|
|
pd.options.display.max_colwidth = 100000 |
|
|
|
abstract = df["abstract"].loc[df["patent_number"] == make_choice] |
|
st.subheader(':red[Patent Application]') |
|
st.subheader(':red[Abstract:]') |
|
st.info(abstract) |
|
|
|
|
|
claims = df["claims"].loc[df["patent_number"] == make_choice] |
|
st.subheader(':red[Claim:]') |
|
st.info(claims) |
|
|
|
|