moonahhyun commited on
Commit
b1fe96d
1 Parent(s): 164142a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +10 -7
app.py CHANGED
@@ -1,7 +1,8 @@
1
  import streamlit as st
2
  from transformers import pipeline, AutoTokenizer, AutoModelForSequenceClassification
3
  from datasets import load_dataset
4
-
 
5
  dataset = load_dataset('HUPD/hupd',
6
  name='sample',
7
  data_files="https://huggingface.co/datasets/HUPD/hupd/blob/main/hupd_metadata_2022-02-22.feather",
@@ -11,7 +12,9 @@ dataset = load_dataset('HUPD/hupd',
11
  val_filing_start_date='2016-01-30',
12
  val_filing_end_date='2016-01-31',
13
  )
 
14
  exclude_pending = dataset.filter(lambda example: example["decision"] != "PENDING")
 
15
  data = exclude_pending["validation"]
16
  p_number = data["patent_number"]
17
  p_abstract = data["abstract"]
@@ -20,7 +23,7 @@ p_decision = data["decision"]
20
  abstract = ""
21
  claims = ""
22
  decision = ""
23
- # Streamlit app
24
  st.title("Patentability Score")
25
  st.write("Select a patent ID and click submit to get patentability score.")
26
  selected_id = st.selectbox("Patent ID:", p_number, index=len(p_number)-1)
@@ -28,26 +31,26 @@ selected_index = p_number.index(selected_id)
28
  abstract = p_abstract[selected_index]
29
  claims = p_claims[selected_index]
30
  decision = p_decision[selected_index]
31
-
32
  st.text_area("Abstract:", abstract)
33
  st.text_area("Claims:", claims)
34
-
35
  def get_pipeline():
36
  model = AutoModelForSequenceClassification.from_pretrained("moonahhyun/project-uspto")
37
  tokenizer = AutoTokenizer.from_pretrained("moonahhyun/project-uspto")
38
  pl = pipeline("sentiment-analysis", model=model, tokenizer=tokenizer)
39
  return pl
40
-
41
- # Load the model and perform sentiment analysis
42
  if st.button("Submit"):
43
- with st.spinner("Analyzing the patent claims..."):
44
  pl = get_pipeline()
45
  result = pl(abstract)
46
  label = result[0]['label']
47
  score = result[0]['score']
 
48
  if label == "ACCEPTED":
49
  st.write(f"Patentability Score: {score}")
50
  st.write(f"Higher % of being: {label}")
 
51
  else:
52
  score = 1 - score
53
  st.write(f"Patentability Score: {score}")
 
1
  import streamlit as st
2
  from transformers import pipeline, AutoTokenizer, AutoModelForSequenceClassification
3
  from datasets import load_dataset
4
+ ### Load HUPD dataset
5
+ # Sample a small subset of the dataset that corresponds to all patent applications submitted in Jan 2016.
6
  dataset = load_dataset('HUPD/hupd',
7
  name='sample',
8
  data_files="https://huggingface.co/datasets/HUPD/hupd/blob/main/hupd_metadata_2022-02-22.feather",
 
12
  val_filing_start_date='2016-01-30',
13
  val_filing_end_date='2016-01-31',
14
  )
15
+ ### Exclude pending applications (also removed in finetuned model)
16
  exclude_pending = dataset.filter(lambda example: example["decision"] != "PENDING")
17
+ ### Extract necessary data lists
18
  data = exclude_pending["validation"]
19
  p_number = data["patent_number"]
20
  p_abstract = data["abstract"]
 
23
  abstract = ""
24
  claims = ""
25
  decision = ""
26
+ ### Streamlit app
27
  st.title("Patentability Score")
28
  st.write("Select a patent ID and click submit to get patentability score.")
29
  selected_id = st.selectbox("Patent ID:", p_number, index=len(p_number)-1)
 
31
  abstract = p_abstract[selected_index]
32
  claims = p_claims[selected_index]
33
  decision = p_decision[selected_index]
 
34
  st.text_area("Abstract:", abstract)
35
  st.text_area("Claims:", claims)
36
+ ### Load model and tokenizer
37
  def get_pipeline():
38
  model = AutoModelForSequenceClassification.from_pretrained("moonahhyun/project-uspto")
39
  tokenizer = AutoTokenizer.from_pretrained("moonahhyun/project-uspto")
40
  pl = pipeline("sentiment-analysis", model=model, tokenizer=tokenizer)
41
  return pl
42
+ ### Perform patentability analysis
 
43
  if st.button("Submit"):
44
+ with st.spinner("Analyzing the patentability..."):
45
  pl = get_pipeline()
46
  result = pl(abstract)
47
  label = result[0]['label']
48
  score = result[0]['score']
49
+ # Print score when label = accepted
50
  if label == "ACCEPTED":
51
  st.write(f"Patentability Score: {score}")
52
  st.write(f"Higher % of being: {label}")
53
+ # Print 1 - score when label = rejected
54
  else:
55
  score = 1 - score
56
  st.write(f"Patentability Score: {score}")