regraded01 commited on
Commit
5f61498
1 Parent(s): 23d0f06

change to JSON output

Browse files
Files changed (1) hide show
  1. app.py +39 -27
app.py CHANGED
@@ -7,11 +7,14 @@ try:
7
  api_key = st.secrets.hf_credentials.hf_api
8
  except:
9
  api_key = st.secrets.hf_api
10
- model_id = "meta-llama/Llama-2-13b-chat-hf"
11
  system_message = """
12
- Your role is to take PDF documents and extract their raw text into a table format that can be uploaded into a database.
13
- Return the table only. For example if you need to extract information about a report written on 2nd February 2011 with an author called Jane Mary then return this only:
14
- | report_written_date | author_name | \n | --- | --- | \n | 02/02/2011 | Jane Mary |
 
 
 
15
  """
16
 
17
 
@@ -72,26 +75,35 @@ if st.session_state.key_inputs:
72
  keys_title = st.write("\nKeys/Columns for extraction:")
73
  keys_values = st.write(st.session_state.key_inputs)
74
 
75
- if st.button("Extract data!"):
76
- user_message = f"""
77
- Use the text provided and denoted by 3 backticks ```{pdf_text}```.
78
- Extract the following columns and return a table that could be uploaded to an SQL database.
79
- {'; '.join([key + ': ' + st.session_state.key_inputs[key] for key in st.session_state.key_inputs])}
80
- """
81
- the_prompt = prompt_generator(
82
- system_message=system_message, user_message=user_message
83
- )
84
- response = query(
85
- {
86
- "inputs": the_prompt,
87
- "parameters": {"max_new_tokens": 500, "temperature": 0.1},
88
- },
89
- model_id,
90
- )
91
- match = re.search(
92
- pattern, response[0]["generated_text"], re.MULTILINE | re.DOTALL
93
- )
94
- if match:
95
- response = match.group(1).strip()
96
-
97
- st.markdown(f"Data Extracted!\n{response}")
 
 
 
 
 
 
 
 
 
 
7
  api_key = st.secrets.hf_credentials.hf_api
8
  except:
9
  api_key = st.secrets.hf_api
10
+ model_id = "meta-llama/Llama-2-70b-chat-hf"
11
  system_message = """
12
+ Your role is to take PDF documents and extract their raw text into a JSON format that can be uploaded into a database.
13
+ Return the JSON only.
14
+ For example if you need to extract information about a report written on 2nd February 2011 with an author called Jane Mary then return this only:
15
+ {'report_written_date': '02/02/2011', 'author_name': 'Jane Mary'}
16
+ Another example would be a clinical exam passed by a student on the 3rd of July 2022 would return this only:
17
+ {'result' : 'pass', 'date_of_exam' : '03/07/2022'}
18
  """
19
 
20
 
 
75
  keys_title = st.write("\nKeys/Columns for extraction:")
76
  keys_values = st.write(st.session_state.key_inputs)
77
 
78
+ with st.spinner("Extracting requested data"):
79
+ if st.button("Extract data!"):
80
+ user_message = f"""
81
+ Use the text provided and denoted by 3 backticks ```{pdf_text}```.
82
+ Extract the following columns and return a table that could be uploaded to an SQL database.
83
+ {'; '.join([key + ': ' + st.session_state.key_inputs[key] for key in st.session_state.key_inputs])}
84
+ """
85
+ the_prompt = prompt_generator(
86
+ system_message=system_message, user_message=user_message
87
+ )
88
+ response = query(
89
+ {
90
+ "inputs": the_prompt,
91
+ "parameters": {"max_new_tokens": 500, "temperature": 0.1},
92
+ },
93
+ model_id,
94
+ )
95
+ try:
96
+ match = re.search(
97
+ pattern, response[0]["generated_text"], re.MULTILINE | re.DOTALL
98
+ )
99
+ if match:
100
+ response = match.group(1).strip()
101
+
102
+ response = eval(response)
103
+
104
+ st.success("Data Extracted Successfully!")
105
+ st.write(response)
106
+ except:
107
+ st.error("Unable to connect to model. Please try again later.")
108
+
109
+ # st.success(f"Data Extracted!")