import streamlit as st from datasets import load_dataset from transformers import pipeline import pandas as pd import torch from transformers import AutoTokenizer, AutoModelForSequenceClassification, pipeline from datasets import load_dataset dataset_dict = load_dataset('HUPD/hupd', name='sample', data_files="https://huggingface.co/datasets/HUPD/hupd/blob/main/hupd_metadata_2022-02-22.feather", icpr_label=None, train_filing_start_date='2016-01-01', train_filing_end_date='2016-01-31', val_filing_start_date='2017-01-22', val_filing_end_date='2017-01-31', ) df = pd.DataFrame.from_dict(dataset_dict["train"]) df = pd.DataFrame(df,columns =['patent_number','decision', 'abstract', 'claims','filing_date']) #st.dataframe(df) PAN = df['patent_number'].drop_duplicates() st.title('Harvard USPTO Patentability Score') #make_choice = st.sidebar.selectbox('Select the Patent Application Number:', PAN) #####NEW with st.form("patent-form"): make_choice = st.selectbox('Select the Patent Application Number:', PAN) submitted = st.form_submit_button(label='submit') if submitted: #st.write("Outside the form") model_name = "distilbert-base-uncased-finetuned-sst-2-english" model = AutoModelForSequenceClassification.from_pretrained(model_name) tokenizer = AutoTokenizer.from_pretrained(model_name) classifier = pipeline("sentiment-analysis", model=model, tokenizer=tokenizer) #abstract = df['abstract'].loc[df['patent_number'] == make_choice] decision = df['decision'].loc[df['patent_number'] == make_choice] #X_train = abstract.to_string() X_train = decision.to_string() #X_train = abstract.values.tolist() results = classifier(X_train, truncation=True) for result in results: print(result) score = result['score'] print(score) st.write("The Patentability Score is:", score) ######NEW pd.options.display.max_colwidth = 100000 abstract = df["abstract"].loc[df["patent_number"] == make_choice] st.subheader(':red[Patent Application]') st.subheader(':red[Abstract:]') st.info(abstract) claims = df["claims"].loc[df["patent_number"] == make_choice] st.subheader(':red[Claim:]') st.info(claims)