Spaces:

ppaihack
/

ZamaKlinik

Sleeping

App Files Files Community

AeternumS commited on Sep 28, 2024

Commit

70deb6a

1 Parent(s): 315b363

added all parsers

Browse files

Files changed (2) hide show

app.py +89 -37
requirements.txt +4 -1

app.py CHANGED Viewed

@@ -2,63 +2,115 @@ import streamlit as st
 import requests
 from PIL import Image
 import pytesseract
-import os
 api_key = os.environ.get("HFBearer")
 # API URL and headers
 API_URL = "https://pllfc7e5i0rujahy.us-east-1.aws.endpoints.huggingface.cloud"
-headers = {
-    "Accept": "application/json",
-    "Authorization": api_key,  # Replace with your actual token
-    "Content-Type": "application/json"
-}
-# Function to query the API
-def query(payload):
-    response = requests.post(API_URL, headers=headers, json=payload)
-    return response.json()
 # Function to extract text from image
-def extract_text_from_image(image_path):
-    image = Image.open(image_path)
     text = pytesseract.image_to_string(image)
     return text
 # Streamlit app layout
 st.title("API Query App")
 st.write("This app allows you to query the API and retrieve responses.")
 user_input = """
-Extrais les paramètres suivants dans un json:
-- Date de naissance
-- Prénom
-- Nom du patient
-Dans ta réponse, le json (uniquement) doit apparaitre entre <JSON> et </JSON>.
-Ne répond que par le json entre les balises, si les paramètres n'existent pas, laisse les champs vides.
-Voici le texte qui contient les paramètres à extraire:
-"""
-# File uploader for the image
-uploaded_image = st.file_uploader("Upload an image", type=["png", "jpg", "jpeg"])
-# Submit button
 if st.button("Submit"):
-    if uploaded_image is not None:
-        with st.spinner("Extracting text from image..."):
-            # Extract text from the uploaded image
-            extracted_text = extract_text_from_image(uploaded_image)
-            st.write("Extracted text from image.")
-    with st.spinner("Fetching response from API..."):
-        # Query the API with user input
-        llm_input = user_input + extracted_text + "\n Donne uniquement le json entre balises, pas le texte:"
-        output = query({"inputs": llm_input, "parameters": {}})
-        st.success("Response received!")
-        st.write(output)  # Display the response

 import requests
 from PIL import Image
 import pytesseract
+import os
+from langchain_huggingface import HuggingFaceEndpoint
+from langchain.chains import LLMChain
+from langchain_core.prompts import PromptTemplate
+import re
+import json
 api_key = os.environ.get("HFBearer")
+os.environ["HUGGINGFACEHUB_API_TOKEN"] = api_key
 # API URL and headers
 API_URL = "https://pllfc7e5i0rujahy.us-east-1.aws.endpoints.huggingface.cloud"
 # Function to extract text from image
+def extract_text_from_image(image):
     text = pytesseract.image_to_string(image)
     return text
+# Function to extract JSON from text
+def extract_json(text):
+    # Use regex to find the JSON between <JSON> and </JSON>
+    match = re.search(r'<JSON>\s*(.*?)\s*</JSON>', text, re.DOTALL)
+    if match:
+        json_str = match.group(1)  # Get the JSON string
+        try:
+            # Load the JSON string into a Python dictionary
+            json_data = json.loads(json_str)
+            return json_data
+        except json.JSONDecodeError:
+            return "Erreur de décodage JSON"
+    else:
+        return "Aucun JSON trouvé"
+# Function to get metadata title from image
+def get_image_metadata(image):
+    # You can customize this function to extract other metadata as needed
+    title = image.name.split('.')[0]  # Simple title extraction from file name without extension
+    return title
+def count_tokens(text):
+    return len(text.split())
+image_params = {
+    "bilan-atherosclerose": "medecin_responsable, rythme_sinusal, valeur_EIM, score_calcique",
+    "bilan-medical": "medecin_responsable, date_naissance, prenom, nom, identifiant_patient, nom_medecin",
+    "ECG": "medecin_responsable, poids, taille, ECG_repos_valeur_par_minute), valeur_FMT, valeur_niveau_atteint, valeur_diminution_frequence_cardiaque_bpm",
+    "echo-doppler": "medecin_responsable, sous_clavieres, vertebrales, carotides",
+    "echographie-poumons": "medecin_responsable, score calcique, technique, resultats",
+    "echotomographie-abdominale": "medecin_responsable, foie, vesicule, pancreas, reins, rate, aorte_abdominale, conclusion",
+    "echotomographie-cardiaque": "medecin_responsable, taille, poids, surface_corporelle, conclusion",
+    "echotomographie-prostate": "medecin_responsable, vessie, ureteres, prostate, conclusion",
+    "hematologie": "medecin_responsable, leucocytes, hematies, hemoglobines, hematocrite"
+}
 # Streamlit app layout
 st.title("API Query App")
 st.write("This app allows you to query the API and retrieve responses.")
 user_input = """
+Vous allez extraire des paramètres d'un texte à l'intérieur d'un objet JSON, écrit entre <JSON> et </JSON>.
+Liste des paramètres : {parameters}
+Voici un exemple de réponse valide :
+<JSON>
+{{"date_naissance": "", "prenom": "", "nom": ""}}
+</JSON>
+Voici le texte à partir duquel vous devez extraire les paramètres :
+{texte}
+"""
+prompt = PromptTemplate.from_template(user_input)
+llm = HuggingFaceEndpoint(
+    endpoint_url=API_URL,
+)
+llm_chain = prompt | llm
+# File uploader for multiple images
+uploaded_images = st.file_uploader("Upload images", type=["png", "jpg", "jpeg"], accept_multiple_files=True)
+# Modify the Streamlit section to extract the JSON for multiple images
 if st.button("Submit"):
+    if uploaded_images:
+        all_json_data = {}  # Dictionary to store JSON data for each image
+        for uploaded_image in uploaded_images:
+            with st.spinner(f"Extracting text from image: {uploaded_image.name}..."):
+                image = Image.open(uploaded_image)
+                extracted_text = extract_text_from_image(image)
+                max_text_length = 500  # Adjust as needed to keep total tokens under 1024
+                if count_tokens(extracted_text) > max_text_length:
+                    extracted_text = " ".join(extracted_text.split()[:max_text_length])
+                with st.spinner(f"Fetching response from API for {uploaded_image.name}..."):
+                    # Get metadata title from the image
+                    title = get_image_metadata(uploaded_image)
+                    parameters = image_params[title]
+                    output = llm_chain.invoke({"texte": extracted_text, "parameters": parameters})
+                    st.success(f"Response received for {uploaded_image.name}!")
+                    # Extract JSON from the API output
+                    json_data = extract_json(output)  # Extract JSON from the API output
+                    all_json_data[title] = json_data  # Store JSON data with title as key
+                    st.write(title, json_data)
+        # Display all extracted JSON data
+        st.write("Extracted JSON Data for all images.")
+    else:
+        st.warning("Please upload at least one image to extract text.")

requirements.txt CHANGED Viewed

@@ -1,3 +1,6 @@
 requests
 pytesseract
-streamlit

 requests
 pytesseract
+streamlit
+langchain_huggingface
+langchain
+huggingface_hub