juliaannjose commited on
Commit
2d5fb99
1 Parent(s): c841eb4

patent classifier code

Browse files
Files changed (1) hide show
  1. app.py +41 -0
app.py ADDED
@@ -0,0 +1,41 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ from transformers import pipeline
3
+ from datasets import load_dataset, Dataset, DatasetDict
4
+
5
+ # load the dataset and
6
+ # use the patent number, abstract and claim columns for UI
7
+ dataset_dict = load_dataset(
8
+ "HUPD/hupd",
9
+ name="sample",
10
+ data_files="https://huggingface.co/datasets/HUPD/hupd/blob/main/hupd_metadata_2022-02-22.feather",
11
+ icpr_label=None,
12
+ train_filing_start_date="2016-01-01",
13
+ train_filing_end_date="2016-01-21",
14
+ val_filing_start_date="2016-01-22",
15
+ val_filing_end_date="2016-01-31",
16
+ )
17
+
18
+ # widget for selecting our finetuned langugae model
19
+ language_model_path = "juliaannjose/finetuned_model"
20
+
21
+ # pass the model to transformers pipeline - model selection component.
22
+ classifier_model = pipeline(model=language_model_path)
23
+
24
+ # drop down menu with patent numbers
25
+ _patent_id = st.selectbox(
26
+ "Select the Patent Number",
27
+ dataset_dict["train"]["patent_number"],
28
+ )
29
+
30
+ # get abstract and claim corresponding to this patent id
31
+ _abstract = dataset_dict["train"][["patent_number"] == _patent_id]["abstract"]
32
+ _claim = dataset_dict["train"][["patent_number"] == _patent_id]["claim"]
33
+
34
+ # display abstract and claim
35
+ st.write(_abstract)
36
+ st.write(_claim)
37
+
38
+ # when submit button clicked, run the model and get result
39
+ if st.button("Submit"):
40
+ results = classifier_model([_abstract + _claim])
41
+ st.write(results)