AeternumS commited on
Commit
70deb6a
1 Parent(s): 315b363

added all parsers

Browse files
Files changed (2) hide show
  1. app.py +89 -37
  2. requirements.txt +4 -1
app.py CHANGED
@@ -2,63 +2,115 @@ import streamlit as st
2
  import requests
3
  from PIL import Image
4
  import pytesseract
5
-
6
- import os
 
 
 
 
7
 
8
  api_key = os.environ.get("HFBearer")
 
9
 
10
  # API URL and headers
11
  API_URL = "https://pllfc7e5i0rujahy.us-east-1.aws.endpoints.huggingface.cloud"
12
- headers = {
13
- "Accept": "application/json",
14
- "Authorization": api_key, # Replace with your actual token
15
- "Content-Type": "application/json"
16
- }
17
-
18
- # Function to query the API
19
- def query(payload):
20
- response = requests.post(API_URL, headers=headers, json=payload)
21
- return response.json()
22
 
23
  # Function to extract text from image
24
- def extract_text_from_image(image_path):
25
- image = Image.open(image_path)
26
  text = pytesseract.image_to_string(image)
27
  return text
28
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
29
  # Streamlit app layout
30
  st.title("API Query App")
31
  st.write("This app allows you to query the API and retrieve responses.")
32
 
33
  user_input = """
34
- Extrais les paramètres suivants dans un json:
 
 
 
 
 
 
35
 
36
- - Date de naissance
37
- - Prénom
38
- - Nom du patient
39
 
40
- Dans ta réponse, le json (uniquement) doit apparaitre entre <JSON> et </JSON>.
41
- Ne répond que par le json entre les balises, si les paramètres n'existent pas, laisse les champs vides.
42
 
43
- Voici le texte qui contient les paramètres à extraire:
 
 
44
 
45
- """
46
 
47
- # File uploader for the image
48
- uploaded_image = st.file_uploader("Upload an image", type=["png", "jpg", "jpeg"])
49
 
50
- # Submit button
51
  if st.button("Submit"):
52
- if uploaded_image is not None:
53
- with st.spinner("Extracting text from image..."):
54
- # Extract text from the uploaded image
55
- extracted_text = extract_text_from_image(uploaded_image)
56
- st.write("Extracted text from image.")
57
-
58
- with st.spinner("Fetching response from API..."):
59
- # Query the API with user input
60
- llm_input = user_input + extracted_text + "\n Donne uniquement le json entre balises, pas le texte:"
61
- output = query({"inputs": llm_input, "parameters": {}})
62
- st.success("Response received!")
63
- st.write(output) # Display the response
 
 
 
 
 
 
 
 
 
 
64
 
 
 
 
 
 
2
  import requests
3
  from PIL import Image
4
  import pytesseract
5
+ import os
6
+ from langchain_huggingface import HuggingFaceEndpoint
7
+ from langchain.chains import LLMChain
8
+ from langchain_core.prompts import PromptTemplate
9
+ import re
10
+ import json
11
 
12
  api_key = os.environ.get("HFBearer")
13
+ os.environ["HUGGINGFACEHUB_API_TOKEN"] = api_key
14
 
15
  # API URL and headers
16
  API_URL = "https://pllfc7e5i0rujahy.us-east-1.aws.endpoints.huggingface.cloud"
 
 
 
 
 
 
 
 
 
 
17
 
18
  # Function to extract text from image
19
+ def extract_text_from_image(image):
 
20
  text = pytesseract.image_to_string(image)
21
  return text
22
 
23
+ # Function to extract JSON from text
24
+ def extract_json(text):
25
+ # Use regex to find the JSON between <JSON> and </JSON>
26
+ match = re.search(r'<JSON>\s*(.*?)\s*</JSON>', text, re.DOTALL)
27
+
28
+ if match:
29
+ json_str = match.group(1) # Get the JSON string
30
+ try:
31
+ # Load the JSON string into a Python dictionary
32
+ json_data = json.loads(json_str)
33
+ return json_data
34
+ except json.JSONDecodeError:
35
+ return "Erreur de décodage JSON"
36
+ else:
37
+ return "Aucun JSON trouvé"
38
+
39
+ # Function to get metadata title from image
40
+ def get_image_metadata(image):
41
+ # You can customize this function to extract other metadata as needed
42
+ title = image.name.split('.')[0] # Simple title extraction from file name without extension
43
+ return title
44
+
45
+ def count_tokens(text):
46
+ return len(text.split())
47
+
48
+ image_params = {
49
+ "bilan-atherosclerose": "medecin_responsable, rythme_sinusal, valeur_EIM, score_calcique",
50
+ "bilan-medical": "medecin_responsable, date_naissance, prenom, nom, identifiant_patient, nom_medecin",
51
+ "ECG": "medecin_responsable, poids, taille, ECG_repos_valeur_par_minute), valeur_FMT, valeur_niveau_atteint, valeur_diminution_frequence_cardiaque_bpm",
52
+ "echo-doppler": "medecin_responsable, sous_clavieres, vertebrales, carotides",
53
+ "echographie-poumons": "medecin_responsable, score calcique, technique, resultats",
54
+ "echotomographie-abdominale": "medecin_responsable, foie, vesicule, pancreas, reins, rate, aorte_abdominale, conclusion",
55
+ "echotomographie-cardiaque": "medecin_responsable, taille, poids, surface_corporelle, conclusion",
56
+ "echotomographie-prostate": "medecin_responsable, vessie, ureteres, prostate, conclusion",
57
+ "hematologie": "medecin_responsable, leucocytes, hematies, hemoglobines, hematocrite"
58
+ }
59
+
60
  # Streamlit app layout
61
  st.title("API Query App")
62
  st.write("This app allows you to query the API and retrieve responses.")
63
 
64
  user_input = """
65
+ Vous allez extraire des paramètres d'un texte à l'intérieur d'un objet JSON, écrit entre <JSON> et </JSON>.
66
+ Liste des paramètres : {parameters}
67
+
68
+ Voici un exemple de réponse valide :
69
+ <JSON>
70
+ {{"date_naissance": "", "prenom": "", "nom": ""}}
71
+ </JSON>
72
 
73
+ Voici le texte à partir duquel vous devez extraire les paramètres :
74
+ {texte}
75
+ """
76
 
77
+ prompt = PromptTemplate.from_template(user_input)
 
78
 
79
+ llm = HuggingFaceEndpoint(
80
+ endpoint_url=API_URL,
81
+ )
82
 
83
+ llm_chain = prompt | llm
84
 
85
+ # File uploader for multiple images
86
+ uploaded_images = st.file_uploader("Upload images", type=["png", "jpg", "jpeg"], accept_multiple_files=True)
87
 
88
+ # Modify the Streamlit section to extract the JSON for multiple images
89
  if st.button("Submit"):
90
+ if uploaded_images:
91
+ all_json_data = {} # Dictionary to store JSON data for each image
92
+ for uploaded_image in uploaded_images:
93
+ with st.spinner(f"Extracting text from image: {uploaded_image.name}..."):
94
+ image = Image.open(uploaded_image)
95
+ extracted_text = extract_text_from_image(image)
96
+
97
+ max_text_length = 500 # Adjust as needed to keep total tokens under 1024
98
+ if count_tokens(extracted_text) > max_text_length:
99
+ extracted_text = " ".join(extracted_text.split()[:max_text_length])
100
+
101
+ with st.spinner(f"Fetching response from API for {uploaded_image.name}..."):
102
+ # Get metadata title from the image
103
+ title = get_image_metadata(uploaded_image)
104
+ parameters = image_params[title]
105
+ output = llm_chain.invoke({"texte": extracted_text, "parameters": parameters})
106
+ st.success(f"Response received for {uploaded_image.name}!")
107
+
108
+ # Extract JSON from the API output
109
+ json_data = extract_json(output) # Extract JSON from the API output
110
+ all_json_data[title] = json_data # Store JSON data with title as key
111
+ st.write(title, json_data)
112
 
113
+ # Display all extracted JSON data
114
+ st.write("Extracted JSON Data for all images.")
115
+ else:
116
+ st.warning("Please upload at least one image to extract text.")
requirements.txt CHANGED
@@ -1,3 +1,6 @@
1
  requests
2
  pytesseract
3
- streamlit
 
 
 
 
1
  requests
2
  pytesseract
3
+ streamlit
4
+ langchain_huggingface
5
+ langchain
6
+ huggingface_hub