Spaces:
Running
Running
regraded01
commited on
Commit
•
5f61498
1
Parent(s):
23d0f06
change to JSON output
Browse files
app.py
CHANGED
@@ -7,11 +7,14 @@ try:
|
|
7 |
api_key = st.secrets.hf_credentials.hf_api
|
8 |
except:
|
9 |
api_key = st.secrets.hf_api
|
10 |
-
model_id = "meta-llama/Llama-2-
|
11 |
system_message = """
|
12 |
-
Your role is to take PDF documents and extract their raw text into a
|
13 |
-
Return the
|
14 |
-
|
|
|
|
|
|
|
15 |
"""
|
16 |
|
17 |
|
@@ -72,26 +75,35 @@ if st.session_state.key_inputs:
|
|
72 |
keys_title = st.write("\nKeys/Columns for extraction:")
|
73 |
keys_values = st.write(st.session_state.key_inputs)
|
74 |
|
75 |
-
|
76 |
-
|
77 |
-
|
78 |
-
|
79 |
-
|
80 |
-
|
81 |
-
|
82 |
-
|
83 |
-
|
84 |
-
|
85 |
-
|
86 |
-
|
87 |
-
|
88 |
-
|
89 |
-
|
90 |
-
|
91 |
-
|
92 |
-
|
93 |
-
|
94 |
-
|
95 |
-
|
96 |
-
|
97 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
7 |
api_key = st.secrets.hf_credentials.hf_api
|
8 |
except:
|
9 |
api_key = st.secrets.hf_api
|
10 |
+
model_id = "meta-llama/Llama-2-70b-chat-hf"
|
11 |
system_message = """
|
12 |
+
Your role is to take PDF documents and extract their raw text into a JSON format that can be uploaded into a database.
|
13 |
+
Return the JSON only.
|
14 |
+
For example if you need to extract information about a report written on 2nd February 2011 with an author called Jane Mary then return this only:
|
15 |
+
{'report_written_date': '02/02/2011', 'author_name': 'Jane Mary'}
|
16 |
+
Another example would be a clinical exam passed by a student on the 3rd of July 2022 would return this only:
|
17 |
+
{'result' : 'pass', 'date_of_exam' : '03/07/2022'}
|
18 |
"""
|
19 |
|
20 |
|
|
|
75 |
keys_title = st.write("\nKeys/Columns for extraction:")
|
76 |
keys_values = st.write(st.session_state.key_inputs)
|
77 |
|
78 |
+
with st.spinner("Extracting requested data"):
|
79 |
+
if st.button("Extract data!"):
|
80 |
+
user_message = f"""
|
81 |
+
Use the text provided and denoted by 3 backticks ```{pdf_text}```.
|
82 |
+
Extract the following columns and return a table that could be uploaded to an SQL database.
|
83 |
+
{'; '.join([key + ': ' + st.session_state.key_inputs[key] for key in st.session_state.key_inputs])}
|
84 |
+
"""
|
85 |
+
the_prompt = prompt_generator(
|
86 |
+
system_message=system_message, user_message=user_message
|
87 |
+
)
|
88 |
+
response = query(
|
89 |
+
{
|
90 |
+
"inputs": the_prompt,
|
91 |
+
"parameters": {"max_new_tokens": 500, "temperature": 0.1},
|
92 |
+
},
|
93 |
+
model_id,
|
94 |
+
)
|
95 |
+
try:
|
96 |
+
match = re.search(
|
97 |
+
pattern, response[0]["generated_text"], re.MULTILINE | re.DOTALL
|
98 |
+
)
|
99 |
+
if match:
|
100 |
+
response = match.group(1).strip()
|
101 |
+
|
102 |
+
response = eval(response)
|
103 |
+
|
104 |
+
st.success("Data Extracted Successfully!")
|
105 |
+
st.write(response)
|
106 |
+
except:
|
107 |
+
st.error("Unable to connect to model. Please try again later.")
|
108 |
+
|
109 |
+
# st.success(f"Data Extracted!")
|