AeternumS commited on
Commit
cb4d28e
·
1 Parent(s): 70deb6a
Files changed (1) hide show
  1. app.py +31 -39
app.py CHANGED
@@ -9,6 +9,7 @@ from langchain_core.prompts import PromptTemplate
9
  import re
10
  import json
11
 
 
12
  api_key = os.environ.get("HFBearer")
13
  os.environ["HUGGINGFACEHUB_API_TOKEN"] = api_key
14
 
@@ -17,38 +18,31 @@ API_URL = "https://pllfc7e5i0rujahy.us-east-1.aws.endpoints.huggingface.cloud"
17
 
18
  # Function to extract text from image
19
  def extract_text_from_image(image):
20
- text = pytesseract.image_to_string(image)
21
- return text
22
 
23
  # Function to extract JSON from text
24
  def extract_json(text):
25
- # Use regex to find the JSON between <JSON> and </JSON>
26
  match = re.search(r'<JSON>\s*(.*?)\s*</JSON>', text, re.DOTALL)
27
-
28
  if match:
29
- json_str = match.group(1) # Get the JSON string
30
  try:
31
- # Load the JSON string into a Python dictionary
32
- json_data = json.loads(json_str)
33
- return json_data
34
  except json.JSONDecodeError:
35
- return "Erreur de décodage JSON"
36
- else:
37
- return "Aucun JSON trouvé"
38
 
39
  # Function to get metadata title from image
40
  def get_image_metadata(image):
41
- # You can customize this function to extract other metadata as needed
42
- title = image.name.split('.')[0] # Simple title extraction from file name without extension
43
- return title
44
 
45
  def count_tokens(text):
46
  return len(text.split())
47
 
 
48
  image_params = {
49
  "bilan-atherosclerose": "medecin_responsable, rythme_sinusal, valeur_EIM, score_calcique",
50
  "bilan-medical": "medecin_responsable, date_naissance, prenom, nom, identifiant_patient, nom_medecin",
51
- "ECG": "medecin_responsable, poids, taille, ECG_repos_valeur_par_minute), valeur_FMT, valeur_niveau_atteint, valeur_diminution_frequence_cardiaque_bpm",
52
  "echo-doppler": "medecin_responsable, sous_clavieres, vertebrales, carotides",
53
  "echographie-poumons": "medecin_responsable, score calcique, technique, resultats",
54
  "echotomographie-abdominale": "medecin_responsable, foie, vesicule, pancreas, reins, rate, aorte_abdominale, conclusion",
@@ -58,34 +52,31 @@ image_params = {
58
  }
59
 
60
  # Streamlit app layout
61
- st.title("API Query App")
62
- st.write("This app allows you to query the API and retrieve responses.")
63
 
 
64
  user_input = """
65
- Vous allez extraire des paramètres d'un texte à l'intérieur d'un objet JSON, écrit entre <JSON> et </JSON>.
66
- Liste des paramètres : {parameters}
67
 
68
- Voici un exemple de réponse valide :
69
  <JSON>
70
  {{"date_naissance": "", "prenom": "", "nom": ""}}
71
  </JSON>
72
 
73
- Voici le texte à partir duquel vous devez extraire les paramètres :
74
  {texte}
75
  """
76
-
77
  prompt = PromptTemplate.from_template(user_input)
78
 
79
- llm = HuggingFaceEndpoint(
80
- endpoint_url=API_URL,
81
- )
82
-
83
  llm_chain = prompt | llm
84
 
85
  # File uploader for multiple images
86
  uploaded_images = st.file_uploader("Upload images", type=["png", "jpg", "jpeg"], accept_multiple_files=True)
87
 
88
- # Modify the Streamlit section to extract the JSON for multiple images
89
  if st.button("Submit"):
90
  if uploaded_images:
91
  all_json_data = {} # Dictionary to store JSON data for each image
@@ -93,24 +84,25 @@ if st.button("Submit"):
93
  with st.spinner(f"Extracting text from image: {uploaded_image.name}..."):
94
  image = Image.open(uploaded_image)
95
  extracted_text = extract_text_from_image(image)
 
96
 
97
- max_text_length = 500 # Adjust as needed to keep total tokens under 1024
98
  if count_tokens(extracted_text) > max_text_length:
99
  extracted_text = " ".join(extracted_text.split()[:max_text_length])
100
 
 
 
 
101
  with st.spinner(f"Fetching response from API for {uploaded_image.name}..."):
102
- # Get metadata title from the image
103
- title = get_image_metadata(uploaded_image)
104
- parameters = image_params[title]
105
- output = llm_chain.invoke({"texte": extracted_text, "parameters": parameters})
106
  st.success(f"Response received for {uploaded_image.name}!")
107
 
108
  # Extract JSON from the API output
109
- json_data = extract_json(output) # Extract JSON from the API output
110
- all_json_data[title] = json_data # Store JSON data with title as key
111
- st.write(title, json_data)
112
-
113
- # Display all extracted JSON data
114
- st.write("Extracted JSON Data for all images.")
115
  else:
116
- st.warning("Please upload at least one image to extract text.")
 
9
  import re
10
  import json
11
 
12
+ # Set up the Hugging Face API key
13
  api_key = os.environ.get("HFBearer")
14
  os.environ["HUGGINGFACEHUB_API_TOKEN"] = api_key
15
 
 
18
 
19
  # Function to extract text from image
20
  def extract_text_from_image(image):
21
+ return pytesseract.image_to_string(image)
 
22
 
23
  # Function to extract JSON from text
24
  def extract_json(text):
 
25
  match = re.search(r'<JSON>\s*(.*?)\s*</JSON>', text, re.DOTALL)
 
26
  if match:
27
+ json_str = match.group(1)
28
  try:
29
+ return json.loads(json_str)
 
 
30
  except json.JSONDecodeError:
31
+ return "Error decoding JSON"
32
+ return "No JSON found"
 
33
 
34
  # Function to get metadata title from image
35
  def get_image_metadata(image):
36
+ return image.name.split('.')[0]
 
 
37
 
38
  def count_tokens(text):
39
  return len(text.split())
40
 
41
+ # Mapping of image parameters to expected fields
42
  image_params = {
43
  "bilan-atherosclerose": "medecin_responsable, rythme_sinusal, valeur_EIM, score_calcique",
44
  "bilan-medical": "medecin_responsable, date_naissance, prenom, nom, identifiant_patient, nom_medecin",
45
+ "ECG": "medecin_responsable, poids, taille, ECG_repos_valeur_par_minute, valeur_FMT, valeur_niveau_atteint, valeur_diminution_frequence_cardiaque_bpm",
46
  "echo-doppler": "medecin_responsable, sous_clavieres, vertebrales, carotides",
47
  "echographie-poumons": "medecin_responsable, score calcique, technique, resultats",
48
  "echotomographie-abdominale": "medecin_responsable, foie, vesicule, pancreas, reins, rate, aorte_abdominale, conclusion",
 
52
  }
53
 
54
  # Streamlit app layout
55
+ st.title("Medical Patient Data Extractor")
56
+ st.write("This app extracts medical patient data from uploaded images.")
57
 
58
+ # User prompt template
59
  user_input = """
60
+ You will extract parameters from a text inside a JSON object, written between <JSON> and </JSON>.
61
+ List of parameters: {parameters}
62
 
63
+ Here is an example of a valid response:
64
  <JSON>
65
  {{"date_naissance": "", "prenom": "", "nom": ""}}
66
  </JSON>
67
 
68
+ Here is the text from which you need to extract the parameters:
69
  {texte}
70
  """
 
71
  prompt = PromptTemplate.from_template(user_input)
72
 
73
+ # Initialize Hugging Face LLM
74
+ llm = HuggingFaceEndpoint(endpoint_url=API_URL)
 
 
75
  llm_chain = prompt | llm
76
 
77
  # File uploader for multiple images
78
  uploaded_images = st.file_uploader("Upload images", type=["png", "jpg", "jpeg"], accept_multiple_files=True)
79
 
 
80
  if st.button("Submit"):
81
  if uploaded_images:
82
  all_json_data = {} # Dictionary to store JSON data for each image
 
84
  with st.spinner(f"Extracting text from image: {uploaded_image.name}..."):
85
  image = Image.open(uploaded_image)
86
  extracted_text = extract_text_from_image(image)
87
+ st.text_area(f"Extracted Text from {uploaded_image.name}", value=extracted_text, height=200)
88
 
89
+ max_text_length = 500 # Adjust as needed
90
  if count_tokens(extracted_text) > max_text_length:
91
  extracted_text = " ".join(extracted_text.split()[:max_text_length])
92
 
93
+ title = get_image_metadata(uploaded_image)
94
+ parameters = image_params.get(title, "Unknown parameters")
95
+
96
  with st.spinner(f"Fetching response from API for {uploaded_image.name}..."):
97
+ output = llm_chain.invoke({"texte": extracted_text, "parameters": parameters})
 
 
 
98
  st.success(f"Response received for {uploaded_image.name}!")
99
 
100
  # Extract JSON from the API output
101
+ json_data = extract_json(output)
102
+ all_json_data[title] = json_data
103
+ st.write(f"**{title} JSON Data:**")
104
+ st.json(json_data) # Display JSON nicely
105
+ st.write("All extracted JSON Data:")
106
+ st.json(all_json_data) # Display all extracted JSON data together
107
  else:
108
+ st.warning("Please upload at least one image to extract text.")