Spaces:
Sleeping
Sleeping
File size: 4,691 Bytes
e904228 96c3529 c346bb7 70deb6a fcbfa92 cb4d28e d8ecef0 70deb6a 96c3529 e904228 c346bb7 70deb6a cb4d28e c346bb7 70deb6a cb4d28e 70deb6a cb4d28e 70deb6a cb4d28e 70deb6a cb4d28e 70deb6a cb4d28e 70deb6a cb4d28e 70deb6a 96c3529 cb4d28e e904228 cb4d28e d8ecef0 cb4d28e 70deb6a cb4d28e 70deb6a 315b363 cb4d28e 70deb6a 315b363 cb4d28e 70deb6a c346bb7 70deb6a e904228 96c3529 70deb6a 44ee111 70deb6a a935c1d 70deb6a cb4d28e 70deb6a cb4d28e 70deb6a cb4d28e 70deb6a cb4d28e 70deb6a 44ee111 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 |
import streamlit as st
import requests
from PIL import Image
import pytesseract
import os
from langchain_huggingface import HuggingFaceEndpoint
from langchain.chains import LLMChain
from langchain_core.prompts import PromptTemplate
import re
import json
# Set up the Hugging Face API key
api_key = os.environ.get("HFBearer")
os.environ["HUGGINGFACEHUB_API_TOKEN"] = api_key
# API URL and headers
API_URL = "https://pllfc7e5i0rujahy.us-east-1.aws.endpoints.huggingface.cloud"
# Function to extract text from image
def extract_text_from_image(image):
return pytesseract.image_to_string(image)
# Function to extract JSON from text
def extract_json(text):
match = re.search(r'<JSON>\s*(.*?)\s*</JSON>', text, re.DOTALL)
if match:
json_str = match.group(1)
try:
return json.loads(json_str)
except json.JSONDecodeError:
return "Error decoding JSON"
return "No JSON found"
# Function to get metadata title from image
def get_image_metadata(image):
return image.name.split('.')[0]
def count_tokens(text):
return len(text.split())
# Mapping of image parameters to expected fields
image_params = {
"bilan-atherosclerose": "medecin_responsable, rythme_sinusal, valeur_EIM, score_calcique",
"bilan-medical": "medecin_responsable, date_naissance, prenom, nom, identifiant_patient, nom_medecin",
"ECG": "medecin_responsable, poids, taille, ECG_repos_valeur_par_minute, valeur_FMT, valeur_niveau_atteint, valeur_diminution_frequence_cardiaque_bpm",
"echo-doppler": "medecin_responsable, sous_clavieres, vertebrales, carotides",
"echographie-poumons": "medecin_responsable, score calcique, technique, resultats",
"echotomographie-abdominale": "medecin_responsable, foie, vesicule, pancreas, reins, rate, aorte_abdominale, conclusion",
"echotomographie-cardiaque": "medecin_responsable, taille, poids, surface_corporelle, conclusion",
"echotomographie-prostate": "medecin_responsable, vessie, ureteres, prostate, conclusion",
"hematologie": "medecin_responsable, leucocytes, hematies, hemoglobines, hematocrite"
}
# Streamlit app layout
st.title("Medical Patient Data Extractor")
st.write("This app extracts medical patient data from uploaded images.")
# User prompt template
user_input = """
You will extract parameters from a text inside a JSON object, written between <JSON> and </JSON>.
List of parameters: {parameters}
Here is an example of a valid response:
<JSON>
{{"date_naissance": "", "prenom": "", "nom": ""}}
</JSON>
Here is the text from which you need to extract the parameters:
{texte}
"""
prompt = PromptTemplate.from_template(user_input)
# Initialize Hugging Face LLM
llm = HuggingFaceEndpoint(endpoint_url=API_URL)
llm_chain = prompt | llm
# File uploader for multiple images
uploaded_images = st.file_uploader("Upload images", type=["png", "jpg", "jpeg"], accept_multiple_files=True)
if st.button("Submit"):
if uploaded_images:
all_json_data = {} # Dictionary to store JSON data for each image
for uploaded_image in uploaded_images:
with st.spinner(f"Extracting text from image: {uploaded_image.name}..."):
image = Image.open(uploaded_image)
# Display the uploaded image
st.image(image, caption=f"Uploaded Image: {uploaded_image.name}", use_column_width=True)
extracted_text = extract_text_from_image(image)
st.text_area(f"Extracted Text from {uploaded_image.name}", value=extracted_text, height=200, key=f"{uploaded_image.name}")
max_text_length = 500 # Adjust as needed
if count_tokens(extracted_text) > max_text_length:
extracted_text = " ".join(extracted_text.split()[:max_text_length])
title = get_image_metadata(uploaded_image)
parameters = image_params.get(title, "Unknown parameters")
with st.spinner(f"Fetching response from API for {uploaded_image.name}..."):
output = llm_chain.invoke({"texte": extracted_text, "parameters": parameters})
st.success(f"Response received for {uploaded_image.name}!")
# Extract JSON from the API output
json_data = extract_json(output)
all_json_data[title] = json_data
st.write(f"**{title} JSON Data:**")
st.json(json_data) # Display JSON nicely
st.write("All extracted JSON Data:")
st.json(all_json_data) # Display all extracted JSON data together
else:
st.warning("Please upload at least one image to extract text.") |