Spaces:

salzzyy
/

ai-doctor-assistant

Build error

App Files Files Community

salzzyy commited on Feb 13

Commit

066999f

1 Parent(s): 372e799

Deploy AI Doctor Assistant

Browse files

Files changed (9) hide show

.gitignore +171 -0
LICENSE +21 -0
app.py +7 -0
brain_of_the_doctor.py +47 -0
catract.png +0 -0
env_variables.txt +0 -0
requirements.txt +64 -0
voice_of_the_doctor.py +91 -0
voice_of_the_patient.py +67 -0

.gitignore ADDED Viewed

	@@ -0,0 +1,171 @@

+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+# C extensions
+*.so
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+*.py,cover
+.hypothesis/
+.pytest_cache/
+cover/
+# Translations
+*.mo
+*.pot
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+db.sqlite3-journal
+# Flask stuff:
+instance/
+.webassets-cache
+# Scrapy stuff:
+.scrapy
+# Sphinx documentation
+docs/_build/
+# PyBuilder
+.pybuilder/
+target/
+# Jupyter Notebook
+.ipynb_checkpoints
+# IPython
+profile_default/
+ipython_config.py
+# pyenv
+#   For a library or package, you might want to ignore these files since the code is
+#   intended to run in multiple environments; otherwise, check them in:
+# .python-version
+# pipenv
+#   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
+#   However, in case of collaboration, if having platform-specific dependencies or dependencies
+#   having no cross-platform support, pipenv may install dependencies that don't work, or not
+#   install all needed dependencies.
+#Pipfile.lock
+# UV
+#   Similar to Pipfile.lock, it is generally recommended to include uv.lock in version control.
+#   This is especially recommended for binary packages to ensure reproducibility, and is more
+#   commonly ignored for libraries.
+#uv.lock
+# poetry
+#   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
+#   This is especially recommended for binary packages to ensure reproducibility, and is more
+#   commonly ignored for libraries.
+#   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
+#poetry.lock
+# pdm
+#   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
+#pdm.lock
+#   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
+#   in version control.
+#   https://pdm.fming.dev/latest/usage/project/#working-with-version-control
+.pdm.toml
+.pdm-python
+.pdm-build/
+# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
+__pypackages__/
+# Celery stuff
+celerybeat-schedule
+celerybeat.pid
+# SageMath parsed files
+*.sage.py
+# Environments
+.env
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+# Spyder project settings
+.spyderproject
+.spyproject
+# Rope project settings
+.ropeproject
+# mkdocs documentation
+/site
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+# Pyre type checker
+.pyre/
+# pytype static type analyzer
+.pytype/
+# Cython debug symbols
+cython_debug/
+# PyCharm
+#  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
+#  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
+#  and can be added to the global gitignore or merged into this file.  For a more nuclear
+#  option (not recommended) you can uncomment the following to ignore the entire idea folder.
+#.idea/
+# PyPI configuration file
+.pypirc

LICENSE ADDED Viewed

	@@ -0,0 +1,21 @@

+MIT License
+Copyright (c) 2025 SALONI SINGH
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.

app.py ADDED Viewed

	@@ -0,0 +1,7 @@

+import gradio as gr
+def greet(name):
+    return "Hello " + name + "!!"
+demo = gr.Interface(fn=greet, inputs="text", outputs="text")
+demo.launch()

brain_of_the_doctor.py ADDED Viewed

	@@ -0,0 +1,47 @@

+# setup groq api key
+import os
+GROQ_API_KEY = os.getenv("GROQ_API_KEY")
+# print(GROQ_API_KEY)
+#step 2- convert image to required format
+import base64
+def encode_image(image_path):
+    image_file=open(image_path, "rb")
+    return base64.b64encode(image_file.read()).decode('utf-8')
+#step 3- Setup Multimodal LLM
+from groq import Groq
+client = Groq()
+model = "llama-3.2-90b-vision-preview"
+query="Is there something wrong with my eye?"
+def analyze_image_with_query(query, model, encoded_image):
+    client=Groq()
+    messages=[
+        {
+            "role": "user",
+            "content": [
+                {
+                    "type": "text",
+                    "text": query
+                },
+                {
+                    "type": "image_url",
+                    "image_url": {
+                        "url": f"data:image/jpeg;base64,{encoded_image}",
+                    },
+                },
+            ],
+        }]
+    chat_completion=client.chat.completions.create(
+        messages=messages,
+        model=model
+    )
+    return chat_completion.choices[0].message.content

catract.png ADDED Viewed

env_variables.txt ADDED Viewed

Binary file (10.2 kB). View file

requirements.txt ADDED Viewed

	@@ -0,0 +1,64 @@

+-i https://pypi.org/simple
+aiofiles==23.2.1; python_version >= '3.7'
+annotated-types==0.7.0; python_version >= '3.8'
+anyio==4.8.0; python_version >= '3.9'
+certifi==2024.12.14; python_version >= '3.6'
+charset-normalizer==3.4.1; python_version >= '3.7'
+click==8.1.8; python_version >= '3.7'
+distro==1.9.0; python_version >= '3.6'
+elevenlabs==1.50.3; python_version >= '3.8' and python_version < '4.0'
+fastapi==0.115.6; python_version >= '3.8'
+ffmpy==0.5.0; python_version >= '3.8' and python_version < '4.0'
+filelock==3.16.1; python_version >= '3.8'
+fsspec==2024.12.0; python_version >= '3.8'
+gradio==5.12.0; python_version >= '3.10'
+gradio-client==1.5.4; python_version >= '3.10'
+groq==0.15.0; python_version >= '3.8'
+gtts==2.5.4; python_version >= '3.7'
+h11==0.14.0; python_version >= '3.7'
+httpcore==1.0.7; python_version >= '3.8'
+httpx==0.28.1; python_version >= '3.8'
+huggingface-hub==0.27.1; python_full_version >= '3.8.0'
+idna==3.10; python_version >= '3.6'
+jinja2==3.1.5; python_version >= '3.7'
+markdown-it-py==3.0.0; python_version >= '3.8'
+markupsafe==2.1.5; python_version >= '3.7'
+mdurl==0.1.2; python_version >= '3.7'
+numpy==2.2.1; python_version >= '3.10'
+orjson==3.10.14; python_version >= '3.8'
+packaging==24.2; python_version >= '3.8'
+pandas==2.2.3; python_version >= '3.9'
+pillow==11.1.0; python_version >= '3.9'
+pyaudio==0.2.14
+pydantic==2.10.5; python_version >= '3.8'
+pydantic-core==2.27.2; python_version >= '3.8'
+pydub==0.25.1
+pygments==2.19.1; python_version >= '3.8'
+python-dateutil==2.9.0.post0; python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'
+python-multipart==0.0.20; python_version >= '3.8'
+pytz==2024.2
+pyyaml==6.0.2; python_version >= '3.8'
+requests==2.32.3; python_version >= '3.8'
+rich==13.9.4; python_full_version >= '3.8.0'
+ruff==0.9.1; sys_platform != 'emscripten'
+safehttpx==0.1.6; python_version >= '3.10'
+semantic-version==2.10.0; python_version >= '2.7'
+shellingham==1.5.4; python_version >= '3.7'
+six==1.17.0; python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'
+sniffio==1.3.1; python_version >= '3.7'
+speechrecognition==3.13.0; python_version >= '3.9'
+starlette==0.41.3; sys_platform != 'emscripten'
+tomlkit==0.13.2; python_version >= '3.8'
+tqdm==4.67.1; python_version >= '3.7'
+typer==0.15.1; sys_platform != 'emscripten'
+typing-extensions==4.12.2; python_version >= '3.8'
+tzdata==2024.2; python_version >= '2'
+urllib3==2.3.0; python_version >= '3.9'
+uvicorn==0.34.0; sys_platform != 'emscripten'
+websockets==14.1; python_version >= '3.9'
+python-dotenv
+streamlit
+pydub
+requests
+openai
+gunicorn

voice_of_the_doctor.py ADDED Viewed

	@@ -0,0 +1,91 @@

+# step1 - > setup text to speech -TTS-Model (gTTS and ElevenLabs)
+import os
+from gtts import gTTS
+def text_to_speech_with_gtts_old(input_text, output_filepath):
+    language = "en"
+    audioobj = gTTS(text=input_text, lang=language, slow=False)
+    audioobj.save(output_filepath)
+input_text = "Hi this is SALONI SINGH, HOPE YOU ARE DOING WELL !"
+text_to_speech_with_gtts_old(input_text=input_text, output_filepath="gtts_testing.mp3")
+# Step1b: Setup Text to Speech–TTS–model with ElevenLabs
+import elevenlabs
+from elevenlabs.client import ElevenLabs
+ELEVENLABS_API_KEY = os.getenv("ELEVENLABS_API_KEY")
+def text_to_speech_with_elevenlabs_old(input_text, output_filepath):
+    client = ElevenLabs(api_key=ELEVENLABS_API_KEY)
+    audio = client.generate(
+        text=input_text,
+        voice="Rachel",
+        output_format="mp3_22050_32",
+        model="eleven_turbo_v2",
+    )
+    elevenlabs.save(audio, output_filepath)
+#text_to_speech_with_elevenlabs_old(input_text, output_filepath="elevenlabs_testing.mp3")
+# Step2: Use Model for Text output to Voice
+import subprocess
+import platform
+def text_to_speech_with_gtts(input_text, output_filepath):
+    language="en"
+    audioobj= gTTS(
+        text=input_text,
+        lang=language,
+        slow=False
+    )
+    audioobj.save(output_filepath)
+    os_name = platform.system()
+    try:
+        if os_name == "Darwin":  # macOS
+            subprocess.run(['afplay', output_filepath])
+        elif os_name == "Windows":  # Windows
+            subprocess.run(['powershell', '-c', f'(New-Object Media.SoundPlayer "{output_filepath}").PlaySync();'])
+        elif os_name == "Linux":  # Linux
+            subprocess.run(['aplay', output_filepath])  # Alternative: use 'mpg123' or 'ffplay'
+        else:
+            raise OSError("Unsupported operating system")
+    except Exception as e:
+        print(f"An error occurred while trying to play the audio: {e}")
+input_text="Hi this a Good Day, Good Monney!"
+# text_to_speech_with_gtts(input_text=input_text, output_filepath="gtts_testing_autoplay.mp3")
+def text_to_speech_with_elevenlabs(input_text, output_filepath):
+    client=ElevenLabs(api_key=ELEVENLABS_API_KEY)
+    audio=client.generate(
+        text= input_text,
+        voice= "Aria",
+        output_format= "mp3_22050_32",
+        model= "eleven_turbo_v2"
+    )
+    elevenlabs.save(audio, output_filepath)
+    os_name = platform.system()
+    try:
+        if os_name == "Darwin":  # macOS
+            subprocess.run(['afplay', output_filepath])
+        elif os_name == "Windows":  # Windows
+            subprocess.run(['powershell', '-c', f'(New-Object Media.SoundPlayer "{output_filepath}").PlaySync();'])
+        elif os_name == "Linux":  # Linux
+            subprocess.run(['aplay', output_filepath])  # Alternative: use 'mpg123' or 'ffplay'
+        else:
+            raise OSError("Unsupported operating system")
+    except Exception as e:
+        print(f"An error occurred while trying to play the audio: {e}")
+# text_to_speech_with_elevenlabs(input_text, output_filepath="elevenlabs_testing_autoplay.mp3")

voice_of_the_patient.py ADDED Viewed

	@@ -0,0 +1,67 @@

+#Step1: Setup Audio recorder (ffmpeg & portaudio)
+# ffmpeg, portaudio, pyaudio
+import logging
+import speech_recognition as sr
+from pydub import AudioSegment
+from io import BytesIO
+logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
+def record_audio(file_path, timeout=20, phrase_time_limit=None):
+    """
+    Simplified function to record audio from the microphone and save it as an MP3 file.
+    Args:
+    file_path (str): Path to save the recorded audio file.
+    timeout (int): Maximum time to wait for a phrase to start (in seconds).
+    phrase_time_lfimit (int): Maximum time for the phrase to be recorded (in seconds).
+    """
+    recognizer = sr.Recognizer()
+    try:
+        with sr.Microphone() as source:
+            logging.info("Adjusting for ambient noise...")
+            recognizer.adjust_for_ambient_noise(source, duration=1)
+            logging.info("Start speaking now...")
+            # Record the audio
+            audio_data = recognizer.listen(source, timeout=timeout, phrase_time_limit=phrase_time_limit)
+            logging.info("Recording complete.")
+            # Convert the recorded audio to an MP3 file
+            wav_data = audio_data.get_wav_data()
+            audio_segment = AudioSegment.from_wav(BytesIO(wav_data))
+            audio_segment.export(file_path, format="mp3", bitrate="128k")
+            logging.info(f"Audio saved to {file_path}")
+    except Exception as e:
+        logging.error(f"An error occurred: {e}")
+# step 2-> setup speech to text-sst -model for transcription.
+audio_filepath="patient_voice_test_for_patient.mp3"
+# record_audio(file_path=audio_filepath)
+#Step2: Setup Speech to text–STT–model for transcription
+import os
+from groq import Groq
+GROQ_API_KEY=os.getenv("GROQ_API_KEY")
+stt_model="whisper-large-v3"
+def transcribe_with_groq(stt_model, audio_filepath, GROQ_API_KEY):
+    client=Groq(api_key=GROQ_API_KEY)
+    audio_file=open(audio_filepath, "rb")
+    transcription=client.audio.transcriptions.create(
+        model=stt_model,
+        file=audio_file,
+        language="en"
+    )
+    return transcription.text
+# ans  = transcribe_with_groq(stt_model, audio_filepath, GROQ_API_KEY)
+# print(ans)