# Welcome to Team Tonic's MultiMed from gradio_client import Client import os import numpy as np import base64 import gradio as gr import requests import json import dotenv from scipy.io.wavfile import write import PIL dotenv.load_dotenv() client = Client("facebook/seamless_m4t") def process_speech(audio): """ processing sound using seamless_m4t """ audio_name = f"{np.random.randint(0, 100)}.wav" sr, data = audio write(audio_name, sr, data.astype(np.int16)) out = client.predict( "S2TT", "file", None, audio_name, "", "French",# source language "English",# target language api_name="/run", ) out = out[1] # get the text try : return f"{out}" except Exception as e : return f"{e}" def process_image(image) : img_name = f"{np.random.randint(0, 100)}.jpg" PIL.Image.fromarray(image.astype('uint8'), 'RGB').save(img_name) image = open(img_name, "rb").read() base64_image = base64_image = base64.b64encode(image).decode('utf-8') openai_api_key = os.getenv('OPENAI_API_KEY') # oai_org = os.getenv('OAI_ORG') headers = { "Content-Type": "application/json", "Authorization": f"Bearer {openai_api_key}" } payload = { "model": "gpt-4-vision-preview", "messages": [ { "role": "user", "content": [ { "type": "text", "text": "What's in this image?" }, { "type": "image_url", "image_url": { "url": f"data:image/jpeg;base64,{base64_image}" } } ] } ], "max_tokens": 300 } response = requests.post("https://api.openai.com/v1/chat/completions", headers=headers, json=payload) try : out = response.json() out = out["choices"][0]["message"]["content"] return f"{out}" except Exception as e : return f"{e}" def query_vectara(text): user_message = text # Read authentication parameters from the .env file CUSTOMER_ID = os.getenv('CUSTOMER_ID') CORPUS_ID = os.getenv('CORPUS_ID') API_KEY = os.getenv('API_KEY') # Define the headers api_key_header = { "customer-id": CUSTOMER_ID, "x-api-key": API_KEY } # Define the request body in the structure provided in the example request_body = { "query": [ { "query": user_message, "queryContext": "", "start": 1, "numResults": 50, "contextConfig": { "charsBefore": 0, "charsAfter": 0, "sentencesBefore": 2, "sentencesAfter": 2, "startTag": "%START_SNIPPET%", "endTag": "%END_SNIPPET%", }, "rerankingConfig": { "rerankerId": 272725718, "mmrConfig": { "diversityBias": 0.35 } }, "corpusKey": [ { "customerId": CUSTOMER_ID, "corpusId": CORPUS_ID, "semantics": 0, "metadataFilter": "", "lexicalInterpolationConfig": { "lambda": 0 }, "dim": [] } ], "summary": [ { "maxSummarizedResults": 5, "responseLang": "auto", "summarizerPromptName": "vectara-summary-ext-v1.2.0" } ] } ] } # Make the API request using Gradio response = requests.post( "https://api.vectara.io/v1/query", json=request_body, # Use json to automatically serialize the request body verify=True, headers=api_key_header ) if response.status_code == 200: query_data = response.json() if query_data: sources_info = [] # Extract the summary. summary = query_data['responseSet'][0]['summary'][0]['text'] # Iterate over all response sets for response_set in query_data.get('responseSet', []): # Extract sources # Limit to top 5 sources. for source in response_set.get('response', [])[:5]: source_metadata = source.get('metadata', []) source_info = {} for metadata in source_metadata: metadata_name = metadata.get('name', '') metadata_value = metadata.get('value', '') if metadata_name == 'title': source_info['title'] = metadata_value elif metadata_name == 'author': source_info['author'] = metadata_value elif metadata_name == 'pageNumber': source_info['page number'] = metadata_value if source_info: sources_info.append(source_info) result = {"summary": summary, "sources": sources_info} return f"{json.dumps(result, indent=2)}" else: return "No data found in the response." else: return f"Error: {response.status_code}" def convert_to_markdown(vectara_response_json): vectara_response = json.loads(vectara_response_json) if vectara_response: summary = vectara_response.get('summary', 'No summary available') sources_info = vectara_response.get('sources', []) # Format the summary as Markdown markdown_summary = f'**Summary:** {summary}\n\n' # Format the sources as a numbered list markdown_sources = "" for i, source_info in enumerate(sources_info): author = source_info.get('author', 'Unknown author') title = source_info.get('title', 'Unknown title') page_number = source_info.get('page number', 'Unknown page number') markdown_sources += f"{i+1}. {title} by {author}, Page {page_number}\n" return f"{markdown_summary}**Sources:**\n{markdown_sources}" else: return "No data found in the response." # Main function to handle the Gradio interface logic def process_and_query(text=None, image=None, audio=None): try: print(f"text_value : {text}") # If an image is provided, process it with OpenAI and use the response as the text query for Vectara if image is not None: text = process_image(image) print("audio_value is : ", audio) if audio is not None: text = process_speech(audio) # this should print in the log the text that was extracted from the audio print("process_speech_out : ", text) # Now, use the text (either provided by the user or obtained from OpenAI) to query Vectara vectara_response_json = query_vectara(text) markdown_output = convert_to_markdown(vectara_response_json) client = OpenAI() prompt ="Answer in the same language, write it better, more understandable and shorter:" markdown_output_final = markdown_output completion = client.chat.completions.create( model="gpt-3.5-turbo", messages=[ {"role": "system", "content": prompt}, {"role": "user", "content": markdown_output_final} ] ) final_response= completion.choices[0].message.content return final_response except Exception as e: return str(e) # Define the Gradio interface iface = gr.Interface( fn=process_and_query, inputs=[ gr.Textbox(label="Input Text"), gr.Image(label="Upload Image"), gr.Audio(label="talk", type="filepath", sources="microphone", visible=True), ], outputs=[gr.Markdown(label="Output Text")], title="👋🏻Welcome to ⚕🗣️😷MultiMed - Access Chat ⚕🗣️😷", description=''' ### How To Use ⚕🗣️😷MultiMed⚕: #### 🗣️📝Interact with ⚕🗣️😷MultiMed⚕ in any language using audio or text! #### 🗣️📝 This is an educational and accessible conversational tool to improve wellness and sanitation in support of public health. #### 📚🌟💼 The knowledge base is composed of publicly available medical and health sources in multiple languages. We also used [Kelvalya/MedAware](https://huggingface.co/datasets/keivalya/MedQuad-MedicalQnADataset) that we processed and converted to HTML. The quality of the answers depends on the quality of the dataset, so if you want to see some data represented here, do [get in touch](https://discord.gg/GWpVpekp). You can also use 😷MultiMed⚕️ on your own data & in your own way by cloning this space. 🧬🔬🔍 Simply click here: Duplicate Space #### Join us : 🌟TeamTonic🌟 is always making cool demos! Join our active builder's🛠️community on 👻Discord: [Discord](https://discord.gg/GWpVpekp) On 🤗Huggingface: [TeamTonic](https://huggingface.co/TeamTonic) & [MultiTransformer](https://huggingface.co/MultiTransformer) On 🌐Github: [Polytonic](https://github.com/tonic-ai) & contribute to 🌟 [PolyGPT](https://github.com/tonic-ai/polygpt-alpha)" ''', theme='ParityError/Anime', examples=[ ["What is the proper treatment for buccal herpes?"], ["Male, 40 presenting with swollen glands and a rash"], ["How does cellular metabolism work TCA cycle"], ["What special care must be provided to children with chicken pox?"], ["When and how often should I wash my hands ?"], ["بکل ہرپس کا صحیح علاج کیا ہے؟"], ["구강 헤르페스의 적절한 치료법은 무엇입니까?"], ["Je, ni matibabu gani sahihi kwa herpes ya buccal?"], ], ) iface.launch()