Spaces:
Sleeping
Sleeping
DrishtiSharma
commited on
Create app.py
Browse files
app.py
ADDED
@@ -0,0 +1,171 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import chromadb
|
3 |
+
from datetime import datetime
|
4 |
+
import streamlit as st
|
5 |
+
from patentwiz import preprocess_data, qa_agent
|
6 |
+
|
7 |
+
# Check if the API key is loaded
|
8 |
+
api_key = os.getenv("OPENAI_API_KEY")
|
9 |
+
if not api_key:
|
10 |
+
st.error("OPENAI_API_KEY not found! Please set it in the environment variables or Hugging Face Secrets.")
|
11 |
+
st.stop()
|
12 |
+
|
13 |
+
# Clear ChromaDB cache to fix tenant issue
|
14 |
+
chromadb.api.client.SharedSystemClient.clear_system_cache()
|
15 |
+
|
16 |
+
PROMPT = """
|
17 |
+
Task: Carefully review the provided patent text and extract all relevant technical information, specifically for RF devices, antennas, and related hardware. Focus on the following parameters:
|
18 |
+
1. **Physical Measurements**:
|
19 |
+
- Frequency, impedance, voltage, current, power, gain, bandwidth, radiation efficiency, and other measurable RF parameters.
|
20 |
+
- For each measurement, provide the following details:
|
21 |
+
- Substance or component being measured.
|
22 |
+
- Specific value or range of the measurement.
|
23 |
+
- Unit of measurement (if provided).
|
24 |
+
- Measurement type or context (e.g., frequency, impedance, gain, etc.).
|
25 |
+
2. **Patent Metadata**:
|
26 |
+
- Title of the patent.
|
27 |
+
- Abstract summarizing the technical focus.
|
28 |
+
- Metadata, including:
|
29 |
+
- Patent number.
|
30 |
+
- Filing date.
|
31 |
+
- Inventors.
|
32 |
+
- Assignee (if applicable).
|
33 |
+
### Output Format:
|
34 |
+
The response should be formatted as a structured JSON object, as shown below:
|
35 |
+
{
|
36 |
+
"Patent_Title": "Title",
|
37 |
+
"Patent_Abstract": "Abstract",
|
38 |
+
"Patent_Metadata": {
|
39 |
+
"Patent_Number": "Number",
|
40 |
+
"Filing_Date": "Date",
|
41 |
+
"Inventors": ["Name1", "Name2"],
|
42 |
+
"Assignee": "Assignee Name"
|
43 |
+
},
|
44 |
+
"Content": [
|
45 |
+
{
|
46 |
+
"Measurement_substance": "substance",
|
47 |
+
"Measured_value": "value",
|
48 |
+
"Measured_unit": "unit",
|
49 |
+
"measurement_type": "type"
|
50 |
+
}
|
51 |
+
// Additional measurements
|
52 |
+
]
|
53 |
+
}
|
54 |
+
### Example:
|
55 |
+
If the patent discusses "A novel RF power amplifier operating at 2.4 GHz with a bandwidth of 20 MHz and an output power of 30 dBm," the output should be:
|
56 |
+
{
|
57 |
+
"Patent_Title": "High-Efficiency RF Power Amplifier",
|
58 |
+
"Patent_Abstract": "A novel RF power amplifier with improved impedance matching for wireless communication devices.",
|
59 |
+
"Patent_Metadata": {
|
60 |
+
"Patent_Number": "US12345678B2",
|
61 |
+
"Filing_Date": "2024-06-20",
|
62 |
+
"Inventors": ["Jane Doe", "John Smith"],
|
63 |
+
"Assignee": "TechWave Inc."
|
64 |
+
},
|
65 |
+
"Content": [
|
66 |
+
{
|
67 |
+
"Measurement_substance": "RF power amplifier",
|
68 |
+
"Measured_value": "2.4",
|
69 |
+
"Measured_unit": "GHz",
|
70 |
+
"measurement_type": "operating frequency"
|
71 |
+
},
|
72 |
+
{
|
73 |
+
"Measurement_substance": "RF power amplifier",
|
74 |
+
"Measured_value": "20",
|
75 |
+
"Measured_unit": "MHz",
|
76 |
+
"measurement_type": "bandwidth"
|
77 |
+
},
|
78 |
+
{
|
79 |
+
"Measurement_substance": "RF power amplifier",
|
80 |
+
"Measured_value": "30",
|
81 |
+
"Measured_unit": "dBm",
|
82 |
+
"measurement_type": "output power"
|
83 |
+
}
|
84 |
+
]
|
85 |
+
}
|
86 |
+
### Key Considerations:
|
87 |
+
- Extract as much detailed information as possible based on the text.
|
88 |
+
- Retain accuracy: Avoid inferring data not explicitly mentioned.
|
89 |
+
- Follow the structured JSON format strictly for consistency.
|
90 |
+
- Exclude any irrelevant or redundant information. Focus only on RF and related hardware technical data.
|
91 |
+
"""
|
92 |
+
|
93 |
+
|
94 |
+
# Title and description
|
95 |
+
st.title("Technical Measurements Extractor for Patents")
|
96 |
+
st.write(
|
97 |
+
"Analyze patents to extract physical measurements such as frequency, bandwidth, and more. "
|
98 |
+
"Provide a date range to download patents and analyze them using GPT models."
|
99 |
+
)
|
100 |
+
|
101 |
+
# User Input Section
|
102 |
+
st.header("Enter Date Range for Patent Analysis")
|
103 |
+
start_date_input = st.text_input("Enter the start date (YYYY-MM-DD):", value="2024-06-20")
|
104 |
+
end_date_input = st.text_input("Enter the end date (YYYY-MM-DD):", value="2024-06-27")
|
105 |
+
|
106 |
+
num_patents_to_analyze = st.number_input(
|
107 |
+
"Number of patents to analyze:", min_value=1, value=3, step=1, help="Specify how many patents you want to analyze."
|
108 |
+
)
|
109 |
+
|
110 |
+
model_choice = st.selectbox(
|
111 |
+
"Select a model for analysis:", ["gpt-3.5-turbo", "gpt-4"], help="Choose the OpenAI GPT model for the analysis."
|
112 |
+
)
|
113 |
+
|
114 |
+
logging_enabled = st.checkbox("Enable logging?", value=False, help="Toggle logging for debugging purposes.")
|
115 |
+
|
116 |
+
# Run Analysis Button
|
117 |
+
if st.button("Analyze Patents"):
|
118 |
+
if not start_date_input or not end_date_input:
|
119 |
+
st.error("Please enter both start and end dates!")
|
120 |
+
else:
|
121 |
+
try:
|
122 |
+
# Parse date inputs
|
123 |
+
start_date = datetime.strptime(start_date_input, "%Y-%m-%d")
|
124 |
+
end_date = datetime.strptime(end_date_input, "%Y-%m-%d")
|
125 |
+
|
126 |
+
# Validate date range
|
127 |
+
if start_date > end_date:
|
128 |
+
st.error("End date must be after start date!")
|
129 |
+
st.stop()
|
130 |
+
|
131 |
+
# Step 1: Download and preprocess patents
|
132 |
+
with st.spinner("Downloading and extracting patents..."):
|
133 |
+
saved_patent_names = preprocess_data.parse_and_save_patents(
|
134 |
+
start_date, end_date, logging_enabled
|
135 |
+
)
|
136 |
+
if not saved_patent_names:
|
137 |
+
st.error("No patents found for the given date range.")
|
138 |
+
st.stop()
|
139 |
+
st.success(f"{len(saved_patent_names)} patents found and processed!")
|
140 |
+
|
141 |
+
# Step 2: Analyze patents using GPT
|
142 |
+
random_patents = saved_patent_names[:num_patents_to_analyze]
|
143 |
+
total_cost = 0
|
144 |
+
results = []
|
145 |
+
|
146 |
+
st.write("Starting patent analysis...")
|
147 |
+
for i, patent_file in enumerate(random_patents):
|
148 |
+
cost, output = qa_agent.call_QA_to_json(
|
149 |
+
PROMPT,
|
150 |
+
start_date.year, # Pass start_date year, month, and day
|
151 |
+
start_date.month,
|
152 |
+
start_date.day,
|
153 |
+
saved_patent_names,
|
154 |
+
i,
|
155 |
+
logging_enabled,
|
156 |
+
model_choice,
|
157 |
+
)
|
158 |
+
total_cost += cost
|
159 |
+
results.append(output)
|
160 |
+
|
161 |
+
# Step 3: Display results
|
162 |
+
st.write(f"**Total Cost:** ${total_cost:.4f}")
|
163 |
+
st.write("### Analysis Results:")
|
164 |
+
for idx, result in enumerate(results):
|
165 |
+
st.subheader(f"Patent {idx + 1}")
|
166 |
+
st.json(result)
|
167 |
+
|
168 |
+
except ValueError as ve:
|
169 |
+
st.error(f"Invalid date format: {ve}")
|
170 |
+
except Exception as e:
|
171 |
+
st.error(f"An unexpected error occurred: {e}")
|