rb757 commited on
Commit
0765bcf
·
1 Parent(s): c2519f2

Add Streamlit app for patentability score prediction

Browse files
Files changed (1) hide show
  1. app.py +69 -23
app.py CHANGED
@@ -29,28 +29,86 @@ val_df = pd.DataFrame(dataset_dict['validation'])
29
  print("Train set columns:", train_df.columns.tolist())
30
  print("Validation set columns:", val_df.columns.tolist())
31
 
32
- # Title and description
33
- st.title("Milestone Patent 🐨")
34
- st.write("Select a patent application to evaluate its patentability.")
 
 
 
 
35
 
36
- # Dropdown for patent numbers
37
- patent_numbers = train_df['patent_number'].unique()
38
- selected_patent = st.selectbox("Select Patent Number", patent_numbers)
39
 
40
- # Retrieve abstract and claims
41
- if selected_patent:
42
  patent_info = train_df[train_df['patent_number'] == selected_patent].iloc[0]
 
43
  abstract = patent_info['abstract']
44
  claims = patent_info['claims']
 
 
 
 
 
 
 
 
 
45
 
46
- # Display the abstract and claims
 
 
 
 
 
 
47
  st.text_area("Abstract", abstract, height=150)
 
 
 
 
48
  st.text_area("Claims", claims, height=150)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
49
 
50
  # Submit button
51
  if st.button("Get Patentability Score"):
52
  # Prepare the input text
53
- input_text = f"{abstract} {claims}"
54
  inputs = tokenizer(input_text, return_tensors="pt", truncation=True, padding=True)
55
 
56
  # Get the model prediction
@@ -61,16 +119,4 @@ if selected_patent:
61
  # Display the patentability score
62
  decision_labels = ['REJECTED', 'ACCEPTED', 'PENDING', 'CONT-REJECTED', 'CONT-ACCEPTED', 'CONT-PENDING']
63
  score = decision_labels[predictions.item()]
64
- st.write(f"Patentability Score: **{score}**")
65
-
66
- # Additional button to evaluate the model on the validation set
67
- if st.button("Evaluate Model"):
68
- eval_logits = []
69
- for _, row in val_df.iterrows():
70
- input_text = f"{row['abstract']} {row['claims']}"
71
- inputs = tokenizer(input_text, return_tensors="pt", truncation=True, padding='max_length', max_length=512)
72
- with torch.no_grad():
73
- logits = model(**inputs).logits
74
- eval_logits.append(logits)
75
-
76
- st.write("Evaluation complete.")
 
29
  print("Train set columns:", train_df.columns.tolist())
30
  print("Validation set columns:", val_df.columns.tolist())
31
 
32
+ # Check if 'patent_number' exists
33
+ if 'patent_number' not in train_df.columns:
34
+ st.error("Column 'patent_number' not found in the training dataset.")
35
+ else:
36
+ # Title and description
37
+ st.title("📜 Milestone Patent Evaluation 🐨")
38
+ st.write("Select a patent application to evaluate its patentability.")
39
 
40
+ # Dropdown for patent numbers
41
+ patent_numbers = train_df['patent_number'].unique()
42
+ selected_patent = st.selectbox("Select Patent Number", patent_numbers)
43
 
44
+ # Retrieve relevant information
 
45
  patent_info = train_df[train_df['patent_number'] == selected_patent].iloc[0]
46
+ title = patent_info['title']
47
  abstract = patent_info['abstract']
48
  claims = patent_info['claims']
49
+ background = patent_info['background']
50
+ summary = patent_info['summary']
51
+ description = patent_info['description']
52
+ cpc_label = patent_info['cpc_label']
53
+ ipc_label = patent_info['ipc_label']
54
+ filing_date = patent_info['filing_date']
55
+ patent_issue_date = patent_info['patent_issue_date']
56
+ date_published = patent_info['date_published']
57
+ examiner_id = patent_info['examiner_id']
58
 
59
+ # Display the information
60
+ st.markdown("### Title")
61
+ st.markdown(f"**{title}**")
62
+
63
+ st.markdown("---")
64
+
65
+ st.markdown("### Abstract")
66
  st.text_area("Abstract", abstract, height=150)
67
+
68
+ st.markdown("---")
69
+
70
+ st.markdown("### Claims")
71
  st.text_area("Claims", claims, height=150)
72
+
73
+ st.markdown("---")
74
+
75
+ st.markdown("### Background")
76
+ st.text_area("Background", background, height=150)
77
+
78
+ st.markdown("---")
79
+
80
+ st.markdown("### Summary")
81
+ st.text_area("Summary", summary, height=150)
82
+
83
+ st.markdown("---")
84
+
85
+ st.markdown("### Description")
86
+ st.text_area("Description", description, height=150)
87
+
88
+ st.markdown("---")
89
+
90
+ st.markdown("### CPC Label")
91
+ st.markdown(f"**{cpc_label}**")
92
+
93
+ st.markdown("### IPC Label")
94
+ st.markdown(f"**{ipc_label}**")
95
+
96
+ st.markdown("### Filing Date")
97
+ st.markdown(f"**{filing_date}**")
98
+
99
+ st.markdown("### Patent Issue Date")
100
+ st.markdown(f"**{patent_issue_date}**")
101
+
102
+ st.markdown("### Date Published")
103
+ st.markdown(f"**{date_published}**")
104
+
105
+ st.markdown("### Examiner ID")
106
+ st.markdown(f"**{examiner_id}**")
107
 
108
  # Submit button
109
  if st.button("Get Patentability Score"):
110
  # Prepare the input text
111
+ input_text = f"{title} {abstract} {claims} {background} {summary} {description}"
112
  inputs = tokenizer(input_text, return_tensors="pt", truncation=True, padding=True)
113
 
114
  # Get the model prediction
 
119
  # Display the patentability score
120
  decision_labels = ['REJECTED', 'ACCEPTED', 'PENDING', 'CONT-REJECTED', 'CONT-ACCEPTED', 'CONT-PENDING']
121
  score = decision_labels[predictions.item()]
122
+ st.success(f"Patentability Score: **{score}**")