DrishtiSharma commited on
Commit
dfbdb14
·
verified ·
1 Parent(s): 3491a63

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +171 -0
app.py ADDED
@@ -0,0 +1,171 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import chromadb
3
+ from datetime import datetime
4
+ import streamlit as st
5
+ from patentwiz import preprocess_data, qa_agent
6
+
7
+ # Check if the API key is loaded
8
+ api_key = os.getenv("OPENAI_API_KEY")
9
+ if not api_key:
10
+ st.error("OPENAI_API_KEY not found! Please set it in the environment variables or Hugging Face Secrets.")
11
+ st.stop()
12
+
13
+ # Clear ChromaDB cache to fix tenant issue
14
+ chromadb.api.client.SharedSystemClient.clear_system_cache()
15
+
16
+ PROMPT = """
17
+ Task: Carefully review the provided patent text and extract all relevant technical information, specifically for RF devices, antennas, and related hardware. Focus on the following parameters:
18
+ 1. **Physical Measurements**:
19
+ - Frequency, impedance, voltage, current, power, gain, bandwidth, radiation efficiency, and other measurable RF parameters.
20
+ - For each measurement, provide the following details:
21
+ - Substance or component being measured.
22
+ - Specific value or range of the measurement.
23
+ - Unit of measurement (if provided).
24
+ - Measurement type or context (e.g., frequency, impedance, gain, etc.).
25
+ 2. **Patent Metadata**:
26
+ - Title of the patent.
27
+ - Abstract summarizing the technical focus.
28
+ - Metadata, including:
29
+ - Patent number.
30
+ - Filing date.
31
+ - Inventors.
32
+ - Assignee (if applicable).
33
+ ### Output Format:
34
+ The response should be formatted as a structured JSON object, as shown below:
35
+ {
36
+ "Patent_Title": "Title",
37
+ "Patent_Abstract": "Abstract",
38
+ "Patent_Metadata": {
39
+ "Patent_Number": "Number",
40
+ "Filing_Date": "Date",
41
+ "Inventors": ["Name1", "Name2"],
42
+ "Assignee": "Assignee Name"
43
+ },
44
+ "Content": [
45
+ {
46
+ "Measurement_substance": "substance",
47
+ "Measured_value": "value",
48
+ "Measured_unit": "unit",
49
+ "measurement_type": "type"
50
+ }
51
+ // Additional measurements
52
+ ]
53
+ }
54
+ ### Example:
55
+ If the patent discusses "A novel RF power amplifier operating at 2.4 GHz with a bandwidth of 20 MHz and an output power of 30 dBm," the output should be:
56
+ {
57
+ "Patent_Title": "High-Efficiency RF Power Amplifier",
58
+ "Patent_Abstract": "A novel RF power amplifier with improved impedance matching for wireless communication devices.",
59
+ "Patent_Metadata": {
60
+ "Patent_Number": "US12345678B2",
61
+ "Filing_Date": "2024-06-20",
62
+ "Inventors": ["Jane Doe", "John Smith"],
63
+ "Assignee": "TechWave Inc."
64
+ },
65
+ "Content": [
66
+ {
67
+ "Measurement_substance": "RF power amplifier",
68
+ "Measured_value": "2.4",
69
+ "Measured_unit": "GHz",
70
+ "measurement_type": "operating frequency"
71
+ },
72
+ {
73
+ "Measurement_substance": "RF power amplifier",
74
+ "Measured_value": "20",
75
+ "Measured_unit": "MHz",
76
+ "measurement_type": "bandwidth"
77
+ },
78
+ {
79
+ "Measurement_substance": "RF power amplifier",
80
+ "Measured_value": "30",
81
+ "Measured_unit": "dBm",
82
+ "measurement_type": "output power"
83
+ }
84
+ ]
85
+ }
86
+ ### Key Considerations:
87
+ - Extract as much detailed information as possible based on the text.
88
+ - Retain accuracy: Avoid inferring data not explicitly mentioned.
89
+ - Follow the structured JSON format strictly for consistency.
90
+ - Exclude any irrelevant or redundant information. Focus only on RF and related hardware technical data.
91
+ """
92
+
93
+
94
+ # Title and description
95
+ st.title("Technical Measurements Extractor for Patents")
96
+ st.write(
97
+ "Analyze patents to extract physical measurements such as frequency, bandwidth, and more. "
98
+ "Provide a date range to download patents and analyze them using GPT models."
99
+ )
100
+
101
+ # User Input Section
102
+ st.header("Enter Date Range for Patent Analysis")
103
+ start_date_input = st.text_input("Enter the start date (YYYY-MM-DD):", value="2024-06-20")
104
+ end_date_input = st.text_input("Enter the end date (YYYY-MM-DD):", value="2024-06-27")
105
+
106
+ num_patents_to_analyze = st.number_input(
107
+ "Number of patents to analyze:", min_value=1, value=3, step=1, help="Specify how many patents you want to analyze."
108
+ )
109
+
110
+ model_choice = st.selectbox(
111
+ "Select a model for analysis:", ["gpt-3.5-turbo", "gpt-4"], help="Choose the OpenAI GPT model for the analysis."
112
+ )
113
+
114
+ logging_enabled = st.checkbox("Enable logging?", value=False, help="Toggle logging for debugging purposes.")
115
+
116
+ # Run Analysis Button
117
+ if st.button("Analyze Patents"):
118
+ if not start_date_input or not end_date_input:
119
+ st.error("Please enter both start and end dates!")
120
+ else:
121
+ try:
122
+ # Parse date inputs
123
+ start_date = datetime.strptime(start_date_input, "%Y-%m-%d")
124
+ end_date = datetime.strptime(end_date_input, "%Y-%m-%d")
125
+
126
+ # Validate date range
127
+ if start_date > end_date:
128
+ st.error("End date must be after start date!")
129
+ st.stop()
130
+
131
+ # Step 1: Download and preprocess patents
132
+ with st.spinner("Downloading and extracting patents..."):
133
+ saved_patent_names = preprocess_data.parse_and_save_patents(
134
+ start_date, end_date, logging_enabled
135
+ )
136
+ if not saved_patent_names:
137
+ st.error("No patents found for the given date range.")
138
+ st.stop()
139
+ st.success(f"{len(saved_patent_names)} patents found and processed!")
140
+
141
+ # Step 2: Analyze patents using GPT
142
+ random_patents = saved_patent_names[:num_patents_to_analyze]
143
+ total_cost = 0
144
+ results = []
145
+
146
+ st.write("Starting patent analysis...")
147
+ for i, patent_file in enumerate(random_patents):
148
+ cost, output = qa_agent.call_QA_to_json(
149
+ PROMPT,
150
+ start_date.year, # Pass start_date year, month, and day
151
+ start_date.month,
152
+ start_date.day,
153
+ saved_patent_names,
154
+ i,
155
+ logging_enabled,
156
+ model_choice,
157
+ )
158
+ total_cost += cost
159
+ results.append(output)
160
+
161
+ # Step 3: Display results
162
+ st.write(f"**Total Cost:** ${total_cost:.4f}")
163
+ st.write("### Analysis Results:")
164
+ for idx, result in enumerate(results):
165
+ st.subheader(f"Patent {idx + 1}")
166
+ st.json(result)
167
+
168
+ except ValueError as ve:
169
+ st.error(f"Invalid date format: {ve}")
170
+ except Exception as e:
171
+ st.error(f"An unexpected error occurred: {e}")