theresatvan commited on
Commit
2a11a7e
1 Parent(s): 068cd45

Update app.py & requirements

Browse files
Files changed (2) hide show
  1. app.py +60 -21
  2. requirements.txt +2 -1
app.py CHANGED
@@ -1,29 +1,68 @@
1
  import streamlit as st
2
- from transformers import pipeline
3
- from huggingface_hub import HfApi, ModelFilter
4
 
5
- # Set up Hugging Face Hub API client
6
- api = HfApi()
7
 
8
- # Display title
9
- st.title("Text Sentiment Analyzer")
 
 
 
 
 
 
 
 
10
 
11
- # Retrieve all text classification models
12
- models = api.list_models(filter=ModelFilter(task="text-classification"))[:10]
13
- model_ids = [model.modelId for model in models]
14
 
15
- # Create submission form
16
- form = st.form("sentiment-form")
17
- select_model = form.selectbox("Select a pretrained model", model_ids)
18
- input = form.text_area('Enter your text here.')
19
- submit = form.form_submit_button("Submit")
20
 
21
- if submit:
22
- # Create pipeline to user's selected pre-trained model
23
- classifier = pipeline(task="sentiment-analysis", model=select_model)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
24
 
25
- # Extract prediction from the results
26
- pred = classifier(input)
27
 
28
- # Display prediction
29
- st.write(pred)
 
 
 
 
 
 
 
1
  import streamlit as st
2
+ from datasets import load_dataset
3
+ from transformers import DistilBertForSequenceClassification, DistilBertTokenizer
4
 
5
+ decision_to_str = {'REJECTED': 0, 'ACCEPTED': 1, 'PENDING': 2, 'CONT-REJECTED': 3, 'CONT-ACCEPTED': 4, 'CONT-PENDING': 5}
 
6
 
7
+ dataset_dict = load_dataset('HUPD/hupd',
8
+ name='all',
9
+ data_files="https://huggingface.co/datasets/HUPD/hupd/blob/main/hupd_metadata_2022-02-22.feather",
10
+ icpr_label=None,
11
+ force_extract=True,
12
+ train_filing_start_date='2016-01-01',
13
+ train_filing_end_date='2016-01-01',
14
+ val_filing_start_date='2017-01-01',
15
+ val_filing_end_date='2017-05-31',
16
+ )
17
 
18
+ dataset = dataset_dict['validation'].filter(lambda e: e['decision'] in ['REJECTED', 'ACCEPTED'])
 
 
19
 
20
+ model_abstract = DistilBertForSequenceClassification('theresatvan/hupd-distilbert-abstract')
21
+ tokenizer_abstract = DistilBertTokenizer('theresatvan/hupd-distilbert-abstract')
 
 
 
22
 
23
+ model_claims = DistilBertForSequenceClassification('theresatvan/hupd-distilbert-claims')
24
+ tokenizer_claims = DistilBertTokenizer('theresatvan/hupd-distilbert-claims')
25
+
26
+
27
+ def predict(model_abstract, model_claims, tokenizer_abstract, tokenizer_claims, input):
28
+ device = 'cuda' if torch.cuda.is_available() else 'cpu'
29
+
30
+ model_abstract.to(device)
31
+ model_claims.to(device)
32
+
33
+ model_abstract.eval()
34
+ model_claims.eval()
35
+
36
+ abstract, claims = input['abstract'], input['claims']
37
+
38
+ input_abstract = tokenizer_abstract(abstract, return_tensors='pt')
39
+ input_claims = tokenizer_claims(claims, return_tensors='pt')
40
+
41
+ with torch.no_grad():
42
+ outputs_abstract = model_abstract(**input_abstract)
43
+ outputs_claims = model_claims(**input_claims)
44
+
45
+ combined_prob = (outputs_abstract.logits.softmax(dim=1) + outputs_claims.logits.softmax(dim=1)) / 2
46
+ label = torch.argmax(combined_prob, dim=1)
47
+
48
+ return label, combined_prob
49
+
50
+
51
+ if __name__ == '__main__':
52
+ st.title = "Can I Patent This?"
53
+
54
+ form = st.form('patent-prediction-form')
55
+ dropdown = []
56
 
57
+ input_application = form.selectbox('Select a patent\'s application number', patents_dropdown)
58
+ submit = form.form_submit_button("Submit")
59
 
60
+ if submit:
61
+ input = dataset.filter(lambda e: e['application_number'] == input_application)
62
+
63
+ label, prob = predict(model_abstract, model_claims, tokenizer_abstract, tokenizer_claims, input)
64
+
65
+ st.write(label)
66
+ st.write(predict)
67
+ st.write(input['decision'])
68
+
requirements.txt CHANGED
@@ -1,3 +1,4 @@
1
  streamlit
2
  transformers
3
- torch
 
1
  streamlit
2
  transformers
3
+ torch
4
+ datasets