theresatvan commited on
Commit
8da68fd
1 Parent(s): 81414ba

Add dropdown options

Browse files
Files changed (1) hide show
  1. app.py +31 -0
app.py CHANGED
@@ -1,10 +1,32 @@
1
  import streamlit as st
 
 
 
2
  from datasets import load_dataset
 
3
  from transformers import DistilBertForSequenceClassification, DistilBertTokenizer
4
 
5
  decision_to_str = {'REJECTED': 0, 'ACCEPTED': 1, 'PENDING': 2, 'CONT-REJECTED': 3, 'CONT-ACCEPTED': 4, 'CONT-PENDING': 5}
6
 
7
  dataset_dict = load_dataset('HUPD/hupd',
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8
  name='all',
9
  data_files="https://huggingface.co/datasets/HUPD/hupd/blob/main/hupd_metadata_2022-02-22.feather",
10
  icpr_label=None,
@@ -22,6 +44,7 @@ tokenizer_abstract = DistilBertTokenizer('theresatvan/hupd-distilbert-abstract')
22
 
23
  model_claims = DistilBertForSequenceClassification('theresatvan/hupd-distilbert-claims')
24
  tokenizer_claims = DistilBertTokenizer('theresatvan/hupd-distilbert-claims')
 
25
 
26
 
27
  def predict(model_abstract, model_claims, tokenizer_abstract, tokenizer_claims, input):
@@ -52,13 +75,21 @@ if __name__ == '__main__':
52
  st.title = "Can I Patent This?"
53
 
54
  form = st.form('patent-prediction-form')
 
 
 
55
  dropdown = []
 
56
 
57
  input_application = form.selectbox('Select a patent\'s application number', patents_dropdown)
58
  submit = form.form_submit_button("Submit")
59
 
60
  if submit:
 
 
 
61
  input = dataset.filter(lambda e: e['application_number'] == input_application)
 
62
 
63
  label, prob = predict(model_abstract, model_claims, tokenizer_abstract, tokenizer_claims, input)
64
 
 
1
  import streamlit as st
2
+ <<<<<<< HEAD
3
+ from datasets import load_dataset, Features, Value, Sequence
4
+ =======
5
  from datasets import load_dataset
6
+ >>>>>>> 81414ba96ac55f927033c62ee5c2db6c6a22349c
7
  from transformers import DistilBertForSequenceClassification, DistilBertTokenizer
8
 
9
  decision_to_str = {'REJECTED': 0, 'ACCEPTED': 1, 'PENDING': 2, 'CONT-REJECTED': 3, 'CONT-ACCEPTED': 4, 'CONT-PENDING': 5}
10
 
11
  dataset_dict = load_dataset('HUPD/hupd',
12
+ <<<<<<< HEAD
13
+ name='sample',
14
+ data_files="https://huggingface.co/datasets/HUPD/hupd/blob/main/hupd_metadata_2022-02-22.feather",
15
+ icpr_label=None,
16
+ train_filing_start_date='2016-01-01',
17
+ train_filing_end_date='2016-01-21',
18
+ val_filing_start_date='2016-01-22',
19
+ val_filing_end_date='2016-01-31',
20
+ )
21
+
22
+ dataset = dataset_dict['validation']
23
+
24
+ model_abstract = DistilBertForSequenceClassification.from_pretrained('theresatvan/hupd-distilbert-abstract')
25
+ tokenizer_abstract = DistilBertTokenizer.from_pretrained('theresatvan/hupd-distilbert-abstract')
26
+
27
+ model_claims = DistilBertForSequenceClassification.from_pretrained('theresatvan/hupd-distilbert-claims')
28
+ tokenizer_claims = DistilBertTokenizer.from_pretrained('theresatvan/hupd-distilbert-claims')
29
+ =======
30
  name='all',
31
  data_files="https://huggingface.co/datasets/HUPD/hupd/blob/main/hupd_metadata_2022-02-22.feather",
32
  icpr_label=None,
 
44
 
45
  model_claims = DistilBertForSequenceClassification('theresatvan/hupd-distilbert-claims')
46
  tokenizer_claims = DistilBertTokenizer('theresatvan/hupd-distilbert-claims')
47
+ >>>>>>> 81414ba96ac55f927033c62ee5c2db6c6a22349c
48
 
49
 
50
  def predict(model_abstract, model_claims, tokenizer_abstract, tokenizer_claims, input):
 
75
  st.title = "Can I Patent This?"
76
 
77
  form = st.form('patent-prediction-form')
78
+ <<<<<<< HEAD
79
+ dropdown = [example['application_number'] for example in dataset]
80
+ =======
81
  dropdown = []
82
+ >>>>>>> 81414ba96ac55f927033c62ee5c2db6c6a22349c
83
 
84
  input_application = form.selectbox('Select a patent\'s application number', patents_dropdown)
85
  submit = form.form_submit_button("Submit")
86
 
87
  if submit:
88
+ <<<<<<< HEAD
89
+ input = dataset.filter(lambda e: e['patent_number'] == input_application)
90
+ =======
91
  input = dataset.filter(lambda e: e['application_number'] == input_application)
92
+ >>>>>>> 81414ba96ac55f927033c62ee5c2db6c6a22349c
93
 
94
  label, prob = predict(model_abstract, model_claims, tokenizer_abstract, tokenizer_claims, input)
95