Spaces:
Sleeping
Sleeping
Add Streamlit app for patentability score prediction
Browse files
app.py
CHANGED
@@ -29,28 +29,86 @@ val_df = pd.DataFrame(dataset_dict['validation'])
|
|
29 |
print("Train set columns:", train_df.columns.tolist())
|
30 |
print("Validation set columns:", val_df.columns.tolist())
|
31 |
|
32 |
-
#
|
33 |
-
|
34 |
-
st.
|
|
|
|
|
|
|
|
|
35 |
|
36 |
-
# Dropdown for patent numbers
|
37 |
-
patent_numbers = train_df['patent_number'].unique()
|
38 |
-
selected_patent = st.selectbox("Select Patent Number", patent_numbers)
|
39 |
|
40 |
-
# Retrieve
|
41 |
-
if selected_patent:
|
42 |
patent_info = train_df[train_df['patent_number'] == selected_patent].iloc[0]
|
|
|
43 |
abstract = patent_info['abstract']
|
44 |
claims = patent_info['claims']
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
45 |
|
46 |
-
# Display the
|
|
|
|
|
|
|
|
|
|
|
|
|
47 |
st.text_area("Abstract", abstract, height=150)
|
|
|
|
|
|
|
|
|
48 |
st.text_area("Claims", claims, height=150)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
49 |
|
50 |
# Submit button
|
51 |
if st.button("Get Patentability Score"):
|
52 |
# Prepare the input text
|
53 |
-
input_text = f"{abstract} {claims}"
|
54 |
inputs = tokenizer(input_text, return_tensors="pt", truncation=True, padding=True)
|
55 |
|
56 |
# Get the model prediction
|
@@ -61,16 +119,4 @@ if selected_patent:
|
|
61 |
# Display the patentability score
|
62 |
decision_labels = ['REJECTED', 'ACCEPTED', 'PENDING', 'CONT-REJECTED', 'CONT-ACCEPTED', 'CONT-PENDING']
|
63 |
score = decision_labels[predictions.item()]
|
64 |
-
st.
|
65 |
-
|
66 |
-
# Additional button to evaluate the model on the validation set
|
67 |
-
if st.button("Evaluate Model"):
|
68 |
-
eval_logits = []
|
69 |
-
for _, row in val_df.iterrows():
|
70 |
-
input_text = f"{row['abstract']} {row['claims']}"
|
71 |
-
inputs = tokenizer(input_text, return_tensors="pt", truncation=True, padding='max_length', max_length=512)
|
72 |
-
with torch.no_grad():
|
73 |
-
logits = model(**inputs).logits
|
74 |
-
eval_logits.append(logits)
|
75 |
-
|
76 |
-
st.write("Evaluation complete.")
|
|
|
29 |
print("Train set columns:", train_df.columns.tolist())
|
30 |
print("Validation set columns:", val_df.columns.tolist())
|
31 |
|
32 |
+
# Check if 'patent_number' exists
|
33 |
+
if 'patent_number' not in train_df.columns:
|
34 |
+
st.error("Column 'patent_number' not found in the training dataset.")
|
35 |
+
else:
|
36 |
+
# Title and description
|
37 |
+
st.title("📜 Milestone Patent Evaluation 🐨")
|
38 |
+
st.write("Select a patent application to evaluate its patentability.")
|
39 |
|
40 |
+
# Dropdown for patent numbers
|
41 |
+
patent_numbers = train_df['patent_number'].unique()
|
42 |
+
selected_patent = st.selectbox("Select Patent Number", patent_numbers)
|
43 |
|
44 |
+
# Retrieve relevant information
|
|
|
45 |
patent_info = train_df[train_df['patent_number'] == selected_patent].iloc[0]
|
46 |
+
title = patent_info['title']
|
47 |
abstract = patent_info['abstract']
|
48 |
claims = patent_info['claims']
|
49 |
+
background = patent_info['background']
|
50 |
+
summary = patent_info['summary']
|
51 |
+
description = patent_info['description']
|
52 |
+
cpc_label = patent_info['cpc_label']
|
53 |
+
ipc_label = patent_info['ipc_label']
|
54 |
+
filing_date = patent_info['filing_date']
|
55 |
+
patent_issue_date = patent_info['patent_issue_date']
|
56 |
+
date_published = patent_info['date_published']
|
57 |
+
examiner_id = patent_info['examiner_id']
|
58 |
|
59 |
+
# Display the information
|
60 |
+
st.markdown("### Title")
|
61 |
+
st.markdown(f"**{title}**")
|
62 |
+
|
63 |
+
st.markdown("---")
|
64 |
+
|
65 |
+
st.markdown("### Abstract")
|
66 |
st.text_area("Abstract", abstract, height=150)
|
67 |
+
|
68 |
+
st.markdown("---")
|
69 |
+
|
70 |
+
st.markdown("### Claims")
|
71 |
st.text_area("Claims", claims, height=150)
|
72 |
+
|
73 |
+
st.markdown("---")
|
74 |
+
|
75 |
+
st.markdown("### Background")
|
76 |
+
st.text_area("Background", background, height=150)
|
77 |
+
|
78 |
+
st.markdown("---")
|
79 |
+
|
80 |
+
st.markdown("### Summary")
|
81 |
+
st.text_area("Summary", summary, height=150)
|
82 |
+
|
83 |
+
st.markdown("---")
|
84 |
+
|
85 |
+
st.markdown("### Description")
|
86 |
+
st.text_area("Description", description, height=150)
|
87 |
+
|
88 |
+
st.markdown("---")
|
89 |
+
|
90 |
+
st.markdown("### CPC Label")
|
91 |
+
st.markdown(f"**{cpc_label}**")
|
92 |
+
|
93 |
+
st.markdown("### IPC Label")
|
94 |
+
st.markdown(f"**{ipc_label}**")
|
95 |
+
|
96 |
+
st.markdown("### Filing Date")
|
97 |
+
st.markdown(f"**{filing_date}**")
|
98 |
+
|
99 |
+
st.markdown("### Patent Issue Date")
|
100 |
+
st.markdown(f"**{patent_issue_date}**")
|
101 |
+
|
102 |
+
st.markdown("### Date Published")
|
103 |
+
st.markdown(f"**{date_published}**")
|
104 |
+
|
105 |
+
st.markdown("### Examiner ID")
|
106 |
+
st.markdown(f"**{examiner_id}**")
|
107 |
|
108 |
# Submit button
|
109 |
if st.button("Get Patentability Score"):
|
110 |
# Prepare the input text
|
111 |
+
input_text = f"{title} {abstract} {claims} {background} {summary} {description}"
|
112 |
inputs = tokenizer(input_text, return_tensors="pt", truncation=True, padding=True)
|
113 |
|
114 |
# Get the model prediction
|
|
|
119 |
# Display the patentability score
|
120 |
decision_labels = ['REJECTED', 'ACCEPTED', 'PENDING', 'CONT-REJECTED', 'CONT-ACCEPTED', 'CONT-PENDING']
|
121 |
score = decision_labels[predictions.item()]
|
122 |
+
st.success(f"Patentability Score: **{score}**")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|