Spaces:

ruslanmv
/

Medical-Interviewer

Running

App Files Files Community

ruslanmv commited on Sep 8, 2024

Commit

e8173b9

•

1 Parent(s): 4e40bbb

First commit

Browse files

Files changed (7) hide show

README.md +2 -2
ai_config.py +69 -0
app.py +232 -0
knowledge_retrieval.py +91 -0
prompt_instructions.py +162 -0
requirements.txt +20 -0
settings.py +245 -0

README.md CHANGED Viewed

@@ -1,10 +1,10 @@
 ---
 title: Medical Interviewer
-emoji: 👀
 colorFrom: pink
 colorTo: yellow
 sdk: gradio
-sdk_version: 4.43.0
 app_file: app.py
 pinned: false
 ---

 ---
 title: Medical Interviewer
+emoji: 👩‍🦳
 colorFrom: pink
 colorTo: yellow
 sdk: gradio
+sdk_version: 4.41.0
 app_file: app.py
 pinned: false
 ---

ai_config.py ADDED Viewed

	@@ -0,0 +1,69 @@

+from io import BytesIO
+from langchain_openai import ChatOpenAI
+from openai import OpenAI
+import tiktoken
+import os
+from dotenv import load_dotenv
+import os
+# Load environment variables from .env file
+load_dotenv()
+# IBM Connection Parameters (using loaded env variables)
+openai_api_key = os.getenv("OPENAI_API_KEY")
+def n_of_questions():
+    n_of_questions = 25
+    return n_of_questions
+#openai_api_key = os.environ.get("openai_api_key")
+model = "gpt-4o-mini"
+def load_model(openai_api_key):
+    return ChatOpenAI(
+        model_name=model,
+        openai_api_key=openai_api_key,
+        temperature=0.5
+    )
+# Initialize the OpenAI client with the API key
+client = OpenAI(api_key=openai_api_key)
+def convert_text_to_speech(text, output, voice):
+    try:
+        # Convert the final text to speech
+        response = client.audio.speech.create(model="tts-1-hd", voice=voice, input=text)
+        if isinstance(output, BytesIO):
+            # If output is a BytesIO object, write directly to it
+            for chunk in response.iter_bytes():
+                output.write(chunk)
+        else:
+            # If output is a file path, open and write to it
+            with open(output, 'wb') as f:
+                for chunk in response.iter_bytes():
+                    f.write(chunk)
+    except Exception as e:
+        print(f"An error occurred: {e}")
+        # Fallback in case of error
+        response = client.audio.speech.create(model="tts-1-hd", voice=voice, input='Here is my Report.')
+        if isinstance(output, BytesIO):
+            for chunk in response.iter_bytes():
+                output.write(chunk)
+        else:
+            with open(output, 'wb') as f:
+                for chunk in response.iter_bytes():
+                    f.write(chunk)
+def transcribe_audio(audio):
+    audio_file = open(audio, "rb")
+    transcription = client.audio.transcriptions.create(
+        model="whisper-1",
+        file=audio_file
+    )
+    return transcription.text

app.py ADDED Viewed

	@@ -0,0 +1,232 @@

+import gradio as gr
+import tempfile
+import os
+from pathlib import Path
+from io import BytesIO
+from settings import (
+    respond,
+    generate_random_string,
+    reset_interview,
+    generate_interview_report,
+    generate_report_from_file,
+    interview_history,
+    question_count,
+    language,
+)
+from ai_config import convert_text_to_speech, transcribe_audio, n_of_questions
+from prompt_instructions import get_interview_initial_message_sarah, get_interview_initial_message_aaron
+# Global variables
+temp_audio_files = []
+initial_audio_path = None
+selected_interviewer = "Sarah"
+audio_enabled = True
+def reset_interview_action(voice):
+    global question_count, interview_history, selected_interviewer
+    selected_interviewer = voice
+    question_count = 0
+    interview_history.clear()
+    if voice == "Sarah":
+        initial_message = get_interview_initial_message_sarah()
+        voice_setting = "alloy"
+    else:
+        initial_message = get_interview_initial_message_aaron()
+        voice_setting = "onyx"
+    initial_message = str(initial_message)
+    initial_audio_buffer = BytesIO()
+    convert_text_to_speech(initial_message, initial_audio_buffer, voice_setting)
+    initial_audio_buffer.seek(0)
+    with tempfile.NamedTemporaryFile(suffix=".mp3", delete=False) as temp_file:
+        temp_audio_path = temp_file.name
+        temp_file.write(initial_audio_buffer.getvalue())
+    temp_audio_files.append(temp_audio_path)
+    return (
+        [(None, initial_message[0] if isinstance(initial_message, tuple) else initial_message)],
+        gr.Audio(value=temp_audio_path, label=voice, autoplay=True, visible=False),
+        gr.Textbox(value="")
+    )
+def create_app():
+    global initial_audio_path, selected_interviewer, audio_enabled
+    # Initialize without any message history
+    initial_message = ""
+    with gr.Blocks(title="AI Medical Interviewer") as demo:
+        gr.Image(value="appendix/icon.jpeg", label='icon', width=20, scale=1, show_label=False, show_fullscreen_button=False,
+                 show_download_button=False, show_share_button=False)
+        gr.Markdown(
+            """
+            # Medical Interviewer
+            This chatbot conducts medical interviews based on medical knowledge.
+            The interviewer will prepare a medical report based on the interview.
+            """
+        )
+        with gr.Tab("Interview"):
+            with gr.Row():
+                reset_button = gr.Button("Start Interview", size='sm', scale=1)
+                end_button = gr.Button("End Interview", size='sm', scale=1)  # Added End Interview button
+                audio_output = gr.Audio(
+                    label="Sarah",
+                    scale=3,
+                    autoplay=True,
+                    visible=False,  # Hides the audio but keeps it active
+                    show_download_button=False,
+                )
+            # Chatbot initialized with no messages
+            chatbot = gr.Chatbot(value=[], label=f"Medical Interview📋")
+            with gr.Row():
+                msg = gr.Textbox(label="Type your message here...", scale=3)
+                audio_input = gr.Audio(sources=(["microphone"]), label="Record your message", type="filepath", scale=1)
+            send_button = gr.Button("Send")
+            pdf_output = gr.File(label="Download Report", visible=False)
+            def user(user_message, audio, history):
+                if audio is not None:
+                    user_message = transcribe_audio(audio)
+                return "", None, history + [[user_message, None]]
+            def bot_response(chatbot, message):
+                global question_count, temp_audio_files, selected_interviewer, audio_enabled
+                question_count += 1
+                last_user_message = chatbot[-1][0] if chatbot else message
+                voice = "alloy" if selected_interviewer == "Sarah" else "onyx"
+                response, audio_buffer = respond(chatbot, last_user_message, voice, selected_interviewer)
+                for bot_message in response:
+                    chatbot.append((None, bot_message[1]))
+                if isinstance(audio_buffer, BytesIO):
+                    with tempfile.NamedTemporaryFile(suffix=".mp3", delete=False) as temp_file:
+                        temp_audio_path = temp_file.name
+                        temp_file.write(audio_buffer.getvalue())
+                    temp_audio_files.append(temp_audio_path)
+                    audio_output = gr.Audio(value=temp_audio_path, label=selected_interviewer, autoplay=audio_enabled, visible=False)
+                else:
+                    audio_output = gr.Audio(value=audio_buffer, label=selected_interviewer, autoplay=audio_enabled, visible=False)
+                if question_count >= n_of_questions():
+                    conclusion_message = "Thank you for participating in this interview. We have reached the end of our session. I hope this conversation has been helpful. Take care!"
+                    chatbot.append((None, conclusion_message))
+                    conclusion_audio_buffer = BytesIO()
+                    convert_text_to_speech(conclusion_message, conclusion_audio_buffer, voice)
+                    conclusion_audio_buffer.seek(0)
+                    with tempfile.NamedTemporaryFile(suffix=".mp3", delete=False) as temp_file:
+                        temp_audio_path = temp_file.name
+                        temp_file.write(conclusion_audio_buffer.getvalue())
+                    temp_audio_files.append(temp_audio_path)
+                    audio_output = gr.Audio(value=temp_audio_path, label=selected_interviewer, autoplay=audio_enabled, visible=False)
+                    report_content, pdf_path = generate_interview_report(interview_history, language)
+                    chatbot.append((None, f"Interview Report:\n\n{report_content}"))
+                    return chatbot, audio_output, gr.File(visible=True, value=pdf_path)
+                return chatbot, audio_output, gr.File(visible=False)
+            # Function to reset and start the interview, which populates the chatbot with the initial message
+            def start_interview():
+                global selected_interviewer
+                return reset_interview_action(selected_interviewer)
+            # Function to end the interview
+            def end_interview(chatbot):
+                chatbot.append((None, "The interview has been ended by the user."))
+                return chatbot, gr.Audio(visible=False), gr.Textbox(value="")
+            # Bind actions to buttons
+            reset_button.click(
+                start_interview,
+                inputs=[],
+                outputs=[chatbot, audio_output, msg]
+            )
+            end_button.click(
+                end_interview,
+                inputs=[chatbot],
+                outputs=[chatbot, audio_output, msg]
+            )
+            msg.submit(user, [msg, audio_input, chatbot], [msg, audio_input, chatbot], queue=False).then(
+                bot_response, [chatbot, msg], [chatbot, audio_output, pdf_output]
+            )
+            send_button.click(user, [msg, audio_input, chatbot], [msg, audio_input, chatbot], queue=False).then(
+                bot_response, [chatbot, msg], [chatbot, audio_output, pdf_output]
+            )
+        with gr.Tab("Settings"):
+            gr.Markdown('Configure your settings below:')
+            audio_toggle = gr.Checkbox(label="Enable Audio", value=True)
+            interviewer_radio = gr.Radio(["Sarah", "Aaron"], label="Select Interviewer", value="Sarah")
+            def update_settings(audio_status, interviewer_choice):
+                global audio_enabled, selected_interviewer
+                audio_enabled = audio_status
+                selected_interviewer = interviewer_choice
+                return f"Settings updated: Audio {'Enabled' if audio_enabled else 'Disabled'}, Interviewer: {selected_interviewer}"
+            settings_button = gr.Button("Apply Settings")
+            settings_message = gr.Textbox(visible=True)
+            settings_button.click(
+                update_settings,
+                inputs=[audio_toggle, interviewer_radio],
+                outputs=[settings_message]
+            )
+        with gr.Tab("Upload Document"):
+            gr.Markdown('Please upload a document that contains content written about a patient or by the patient.')
+            file_input = gr.File(label="Upload a TXT, PDF, or DOCX file")
+            language_input = 'English'
+            generate_button = gr.Button("Generate Report")
+            report_output = gr.Textbox(label="Generated Report", lines=100, visible=False)
+            pdf_output = gr.File(label="Download Report", visible=True)
+            def generate_report_and_pdf(file, language):
+                report_content, pdf_path = generate_report_from_file(file, language)
+                return report_content, pdf_path, gr.File(visible=True)
+            generate_button.click(
+                generate_report_and_pdf,
+                inputs=[file_input],
+                outputs=[report_output, pdf_output, pdf_output]
+            )
+        with gr.Tab("Description"):
+            with open('appendix/description.txt', 'r', encoding='utf-8') as file:
+                description_txt = file.read()
+            gr.Markdown(description_txt)
+            gr.Image(value="appendix/diagram.png", label='diagram', width=700, scale=1, show_label=False)
+    return demo
+# Clean up function
+def cleanup():
+    global temp_audio_files, initial_audio_path
+    for audio_file in temp_audio_files:
+        if os.path.exists(audio_file):
+            os.unlink(audio_file)
+    temp_audio_files.clear()
+    if initial_audio_path and os.path.exists(initial_audio_path):
+        os.unlink(initial_audio_path)
+if __name__ == "__main__":
+    app = create_app()
+    try:
+        app.launch()
+    finally:
+        cleanup()

knowledge_retrieval.py ADDED Viewed

	@@ -0,0 +1,91 @@

+import random
+from langchain_community.vectorstores import FAISS
+from langchain_openai import OpenAIEmbeddings
+from langchain.chains import create_retrieval_chain
+from langchain.chains.combine_documents import create_stuff_documents_chain
+from langchain_core.prompts import ChatPromptTemplate
+from langchain.retrievers import EnsembleRetriever
+from ai_config import n_of_questions, openai_api_key
+from prompt_instructions import get_interview_prompt_sarah, get_interview_prompt_aaron, get_report_prompt
+n_of_questions = n_of_questions()
+def setup_knowledge_retrieval(llm, language='english', voice='Sarah'):
+    embedding_model = OpenAIEmbeddings(openai_api_key=openai_api_key)
+    documents_faiss_index = FAISS.load_local("knowledge/faiss_index_all_documents", embedding_model,
+                                               allow_dangerous_deserialization=True)
+    documents_retriever = documents_faiss_index.as_retriever()
+    combined_retriever = EnsembleRetriever(
+        retrievers=[documents_retriever]
+    )
+    if voice == 'Sarah':
+        interview_prompt = ChatPromptTemplate.from_messages([
+            ("system", get_interview_prompt_sarah(language, n_of_questions)),
+            ("human", "{input}")
+        ])
+    else:
+        interview_prompt = ChatPromptTemplate.from_messages([
+            ("system", get_interview_prompt_aaron(language, n_of_questions)),
+            ("human", "{input}")
+        ])
+    report_prompt = ChatPromptTemplate.from_messages([
+        ("system", get_report_prompt(language)),
+        ("human", "Please provide a concise clinical report based on the interview.")
+    ])
+    interview_chain = create_stuff_documents_chain(llm, interview_prompt)
+    report_chain = create_stuff_documents_chain(llm, report_prompt)
+    interview_retrieval_chain = create_retrieval_chain(combined_retriever, interview_chain)
+    report_retrieval_chain = create_retrieval_chain(combined_retriever, report_chain)
+    return interview_retrieval_chain, report_retrieval_chain, combined_retriever
+def get_next_response(interview_chain, message, history, question_count):
+    combined_history = "\n".join(history)
+    # Check if the interview should end
+    if question_count >= n_of_questions:
+        return "Thank you for your responses. I will now prepare a report."
+    # Generate the next question
+    result = interview_chain.invoke({
+        "input": f"Based on the patient's last response: '{message}', and considering the full interview history, ask a specific, detailed question that hasn't been asked before and is relevant to the patient's situation.",
+        "history": combined_history,
+        "question_number": question_count + 1  # Increment question number here
+    })
+    next_question = result.get("answer", "Could you provide more details on that?")
+    # Update history with the new question and response
+    history.append(f"Q{question_count + 1}: {next_question}")
+    history.append(f"A{question_count + 1}: {message}")
+    return next_question
+def generate_report(report_chain, history, language):
+    combined_history = "\n".join(history)
+    result = report_chain.invoke({
+        "input": "Please provide a clinical report based on the interview.",
+        "history": combined_history,
+        "language": language
+    })
+    return result.get("answer", "Unable to generate report due to insufficient information.")
+def get_initial_question(interview_chain):
+    result = interview_chain.invoke({
+        "input": "What should be the first question in a clinical psychology interview?",
+        "history": "",
+        "question_number": 1
+    })
+    return result.get("answer", "Could you tell me a little bit about yourself and what brings you here today?")

prompt_instructions.py ADDED Viewed

	@@ -0,0 +1,162 @@

+from datetime import datetime
+from ai_config import n_of_questions
+current_datetime = datetime.now()
+current_date = current_datetime.strftime("%Y-%m-%d")
+n_of_questions = n_of_questions()
+def get_interview_initial_message_sarah():
+    return f"""Hello, I'm Sarah, an AI clinical psychologist, and I'll be conducting a clinical interview with you.
+    I will ask you about {n_of_questions} questions.
+    Feel free to share as much or as little as you're comfortable with.
+    Could you please tell me which language you prefer to speak or conduct this interview in? """
+def get_interview_initial_message_aaron():
+    return f"""Hello, I'm Aaron, an AI clinical psychologist. I'll be conducting a brief interview with you.
+    Which language do you prefer for this interview? my mother tongue language is English, so bear with me if there are any mistakes."""
+def get_interview_prompt_sarah(language, n_of_questions):
+    return f"""You are Sarah, an empathic and compassionate Female Psychologist or Psychiatrist, conducting a clinical interview in {language}.
+A highly experienced and dedicated Clinical Psychologist with over 30 years of experience in clinical practice and research.
+Specializing in trauma, anxiety disorders, and family therapy, Sarah has a proven track record of successfully treating a wide range of psychological conditions.
+Her deep commitment to patient care and mental health advocacy has driven her to develop innovative therapeutic approaches and lead community mental health initiatives.
+Sarah's extensive career is marked by her unwavering dedication to giving back to the community.
+She has been actively involved in various community service efforts, including several years of work with children with disabilities and autistic children.
+Her compassionate approach and ability to connect with patients of all ages have made her a respected figure in the field of psychology.
+Sarah is not only a skilled clinician but also a passionate advocate for mental health, continuously striving to improve the lives of those she serves.
+Use the following context and interview history to guide your response:
+Context from knowledge base: {{context}}
+Previous interview history:
+{{history}}
+Current question number: {{question_number}}
+Respond to the patient's input briefly and directly in {language}.
+Ask a specific, detailed question that hasn't been asked before.
+You must remember all the previous answers given by the patient, and use this information if necessary.
+If you perceive particularly special, or unusual, or strange things in the answers that require deepening or in-depth understanding - ask about it or direct your question to get answers about it and clarify the matter - this information maybe benefitial and may hint about the patient personality or traits.
+The first question is to ask for the patient name.
+The second question is to ask for age.
+The third question is to ask where they live.
+The fourth questions is to ask what they does for work.
+The fifth question is to ask about the nature of the relationship with their parents.
+Keep in mind that you have {n_of_questions} total number of questions.
+After {n_of_questions} interactions, indicate that you will prepare a report based on the gathered information."""
+def get_interview_prompt_aaron(language, n_of_questions):
+    return f"""You are Aaron, a not so much empathic, tough, and impatient Male Psychologist, Coach, and Mentor, conducting a clinical interview in {language}.
+    Aaron Professional Resume or Summary:
+    Aaron is a highly experienced clinical psychologist with over 15 years of expertise in treating individuals dealing with stress, trauma, and high-performance demands.
+    His background as an army officer in the special forces, where he served for 20 years, provides him with a unique understanding of the mental health challenges faced by soldiers.
+    In addition to his work with military personnel, Aaron extends his practice to athletes, entrepreneurs, and business professionals, offering specialized psychological support that helps them achieve peak performance while managing stress and mental well-being.
+    As a coach and mentor, Aaron is committed to guiding his clients through personal and professional challenges, fostering resilience, and promoting mental wellness.
+    Use the following context and interview history to guide your response:
+    Context from knowledge base: {{context}}
+    Previous interview history:
+    {{history}}
+    Current question number: {{question_number}}
+    Respond to the patient's input briefly and directly in {language}.
+    Ask a specific, detailed question that hasn't been asked before.
+    You must remember all the previous answers given by the patient, and use this information if necessary.
+    If you perceive particularly special, or unusual, or strange things in the answers that require deepening or in-depth understanding - ask about it or direct your question to get answers about it and clarify the matter - this information maybe benefitial and may hint about the patient personality or traits.
+    The first question is to ask for the patient name.
+    The second question is to ask for age.
+    The third question is to ask where they live.
+    The fourth questions is to ask what they does for work.
+    The fifth question is to ask about the nature of the relationship with their parents.
+    Keep in mind that you have {n_of_questions} total number of questions.
+    After {n_of_questions} interactions, indicate that you will prepare a report based on the gathered information."""
+def get_report_prompt(language):
+    return f"""You are a Psychologist or Psychiatrist preparing a clinical report in {language}.
+Use the following context and interview history to create your report.
+Keep the report concise and focused on the key observations:
+Context from knowledge base: {{context}}
+Complete interview history:
+{{history}}
+Prepare a brief clinical report in {language} based strictly on the information gathered during the interview.
+Date to specify in the report: {current_date}
+- Specify name, place of living, and current occupation if available.
+- Use only the terms, criteria for diagnosis, and categories for clinical diagnosis or classifications
+that are present in the provided knowledge base. Do not introduce any external information or terminology.
+* In your diagnosis, you must be very careful. That is, you need to have enough evidence and information to rate or diagnose a patient.
+* Your diagnoses must be fact-based when they are implied by what the speakers are saying.
+* Write technical, clinical or professional terms only in the English language.
+* As a rule, in cases where there is little information about the patient through the conversation or through
+the things they say, the diagnosis will be more difficult, and the ratings will be lower,
+because it is difficult to draw conclusions when our information about the patient is scarce.
+be very selective and careful with your facts that you write or provide in the report.
+in such a case, this also must be mentioned and taken into consideration.
+* Do not provide any clinical diagnosis or any conclusions in the reports if there is not enough information that the patient provide.
+* Any diagnosis or interpretation requires the presentation of facts, foundations, and explanations.
+* You can also give examples or quotes.
+* There are two parts for the report - main report and additional report.
+* Structure the main report to include observed symptoms, potential diagnoses (if applicable), and any other
+relevant clinical observations, all within the framework of the given knowledge.
+First, write the main report, than, in addition to the main report, add the following sections as the additional report:
+- An overall clinical impression
+- Dominant personality characteristics
+- Style of communication
+- What mainly preoccupies them - themes or topics that preoccupy them in particular
+- Possible personal weaknesses or triggers
+- Defense Mechanisms
+- How they are likely to react to stressful or emotionally charged situations or events
+- How they might deal with unexpected situations or events
+- How they might behave in a group vs alone
+- How they might behave in intimate relationships, and which partners they usually are drawn or attracted to. these unconscious choices may trigger past events or childhood experiences.
+- How will they function in work environments, and will they be able to contribute and perform properly and over time in a stable manner.
+- Degree of psychological mental health assessment
+- What will the experience be in general to meet such a person
+- Other things or further assessments that can be examined from a psychological perspective, and in which situations it is necessary to examine the person's reactions in order to get more indications of a diagnosis of their personality
+- The type of treatment that is recommended.
+Furthermore, include the following:
+Big Five Traits (ratings of 0-10):
+Extraversion: [rating]
+Agreeableness: [rating]
+Conscientiousness: [rating]
+Neuroticism: [rating]
+Openness: [rating]
+Big Five Traits explanation: [explanation]
+Personality Disorders or Styles (ratings of 0-4):
+Depressed: [rating]
+Paranoid: [rating]
+Schizoid-Schizotypal: [rating]
+Antisocial-Psychopathic: [rating]
+Borderline-Dysregulated: [rating]
+Narcissistic: [rating]
+Anxious-Avoidant: [rating]
+Dependent-Victimized: [rating]
+Hysteric-Histrionic: [rating]
+Obsessional: [rating]
+Personality Disorders or Styles explanation: [explanation]
+Attachment Styles (ratings of 0-10):
+Secured: [rating]
+Anxious-Preoccupied: [rating]
+Dismissive-Avoidant: [rating]
+Fearful-Avoidant: [rating]
+Avoidance: [rating]
+Positive view toward the Self: [rating]
+Positive view toward Others: [rating]
+Attachment Styles explanation: [explanation]
+"""

requirements.txt ADDED Viewed

	@@ -0,0 +1,20 @@

+python-dotenv==1.0.1
+pandas==2.1.4
+langchain==0.2.6
+langchain-openai==0.1.14
+langchain-core==0.2.11
+langchain-ibm==0.1.8
+langchain-community==0.2.6
+ibm-watson-machine-learning==1.0.359
+ipykernel
+notebook
+urllib3
+requests==2.32.0
+PyPDF2
+python-docx
+reportlab
+openai
+faiss-cpu
+cryptography
+pymysql
+scikit-learn

settings.py ADDED Viewed

	@@ -0,0 +1,245 @@

+import traceback
+from datetime import datetime
+from pathlib import Path
+import os
+import random
+import string
+import tempfile
+import re
+import io
+import PyPDF2
+import docx
+from reportlab.pdfgen import canvas
+from reportlab.lib.pagesizes import letter
+from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer
+from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle
+from reportlab.lib.enums import TA_JUSTIFY
+from ai_config import n_of_questions, load_model, openai_api_key, convert_text_to_speech
+from knowledge_retrieval import setup_knowledge_retrieval, generate_report
+# Initialize settings
+n_of_questions = n_of_questions()
+current_datetime = datetime.now()
+human_readable_datetime = current_datetime.strftime("%B %d, %Y at %H:%M")
+current_date = current_datetime.strftime("%Y-%m-%d")
+# Initialize the model and retrieval chain
+try:
+    llm = load_model(openai_api_key)
+    interview_retrieval_chain, report_retrieval_chain, combined_retriever = setup_knowledge_retrieval(llm)
+    knowledge_base_connected = True
+    print("Successfully connected to the knowledge base.")
+except Exception as e:
+    print(f"Error initializing the model or retrieval chain: {str(e)}")
+    knowledge_base_connected = False
+    print("Falling back to basic mode without knowledge base.")
+question_count = 0
+interview_history = []
+last_audio_path = None  # Variable to store the path of the last audio file
+initial_audio_path = None  # Variable to store the path of the initial audio file
+language = None
+def generate_random_string(length=5):
+    return ''.join(random.choices(string.ascii_letters + string.digits, k=length))
+def respond(message, history, voice, selected_interviewer):
+    global question_count, interview_history, combined_retriever, last_audio_path, initial_audio_path, language, interview_retrieval_chain, report_retrieval_chain
+    if not isinstance(history, list):
+        history = []
+    if not history or not history[-1]:
+        history.append(["", ""])
+    # Extract the actual message text
+    if isinstance(message, list):
+        message = message[-1][0] if message and isinstance(message[-1], list) else message[-1]
+    question_count += 1
+    interview_history.append(f"Q{question_count}: {message}")
+    history_str = "\n".join(interview_history)
+    print("Starting interview", question_count)
+    try:
+        if knowledge_base_connected:
+            if question_count == 1:
+                # Capture the language from the first response
+                language = message.strip().lower()
+                # Reinitialize the interview chain with the new language
+                interview_retrieval_chain, report_retrieval_chain, combined_retriever = setup_knowledge_retrieval(
+                    llm, language, selected_interviewer)
+            if question_count < n_of_questions:
+                result = interview_retrieval_chain.invoke({
+                    "input": f"Based on the patient's statement: '{message}', what should be the next question?",
+                    "history": history_str,
+                    "question_number": question_count + 1,
+                    "language": language
+                })
+                question = result.get("answer", f"Can you tell me more about that? (in {language})")
+            else:
+                result = generate_report(report_retrieval_chain, interview_history, language)
+                question = result
+                speech_file_path = None  # Skip audio generation for the report
+            if question:
+                random_suffix = generate_random_string()
+                speech_file_path = Path(__file__).parent / f"question_{question_count}_{random_suffix}.mp3"
+                convert_text_to_speech(question, speech_file_path, voice)
+                print(f"Question {question_count} saved as audio at {speech_file_path}")
+                # Remove the last audio file if it exists
+                if last_audio_path and os.path.exists(last_audio_path):
+                    os.remove(last_audio_path)
+                last_audio_path = speech_file_path
+            else:
+                speech_file_path = None  # Skip audio generation for the report
+        else:
+            # Fallback mode without knowledge base
+            question = f"Can you elaborate on that? (in {language})"
+            if question_count < n_of_questions:
+                speech_file_path = Path(__file__).parent / f"question_{question_count}.mp3"
+                convert_text_to_speech(question, speech_file_path, voice)
+                print(f"Question {question_count} saved as audio at {speech_file_path}")
+                if last_audio_path and os.path.exists(last_audio_path):
+                    os.remove(last_audio_path)
+                last_audio_path = speech_file_path
+            else:
+                speech_file_path = None
+        history[-1][1] = f"{question}"
+        # Remove the initial question audio file after the first user response
+        if initial_audio_path and os.path.exists(initial_audio_path):
+            os.remove(initial_audio_path)
+        initial_audio_path = None
+        # Clean up older files based on question_count
+        if question_count > 1:
+            previous_audio_path = Path(__file__).parent / f"question_{question_count-1}_{random_suffix}.mp3"
+            if os.path.exists(previous_audio_path):
+                os.remove(previous_audio_path)
+        return history, str(speech_file_path) if speech_file_path else None
+    except Exception as e:
+        print(f"Error in retrieval chain: {str(e)}")
+        print(traceback.format_exc())
+        return history, None
+def reset_interview():
+    """Reset the interview state."""
+    global question_count, interview_history, last_audio_path, initial_audio_path
+    question_count = 0
+    interview_history = []
+    if last_audio_path and os.path.exists(last_audio_path):
+        os.remove(last_audio_path)
+    last_audio_path = None
+    initial_audio_path = None
+def read_file(file):
+    if file is None:
+        return "No file uploaded"
+    if isinstance(file, str):
+        with open(file, 'r', encoding='utf-8') as f:
+            return f.read()
+    if hasattr(file, 'name'):  # Check if it's a file-like object
+        if file.name.endswith('.txt'):
+            return file.content
+        elif file.name.endswith('.pdf'):
+            pdf_reader = PyPDF2.PdfReader(io.BytesIO(file.content))
+            return "\n".join(page.extract_text() for page in pdf_reader.pages)
+        elif file.name.endswith('.docx'):
+            doc = docx.Document(io.BytesIO(file.content))
+            return "\n".join(paragraph.text for paragraph in doc.paragraphs)
+        else:
+            return "Unsupported file format"
+    return "Unable to read file"
+def generate_report_from_file(file, language):
+    try:
+        file_content = read_file(file)
+        if file_content == "No file uploaded" or file_content == "Unsupported file format" or file_content == "Unable to read file":
+            return file_content
+        file_content = file_content[:100000]
+        report_language = language.strip().lower() if language else "english"
+        print('preferred language:', report_language)
+        print(f"Generating report in language: {report_language}")  # For debugging
+        # Reinitialize the report chain with the new language
+        _, report_retrieval_chain, _ = setup_knowledge_retrieval(llm, report_language)
+        result = report_retrieval_chain.invoke({
+            "input": "Please provide a clinical report based on the following content:",
+            "history": file_content,
+            "language": report_language
+        })
+        report_content = result.get("answer", "Unable to generate report due to insufficient information.")
+        pdf_path = create_pdf(report_content)
+        return report_content, pdf_path
+    except Exception as e:
+        return f"An error occurred while processing the file: {str(e)}", None
+def generate_interview_report(interview_history, language):
+    try:
+        report_language = language.strip().lower() if language else "english"
+        print('preferred report_language language:', report_language)
+        _, report_retrieval_chain, _ = setup_knowledge_retrieval(llm, report_language)
+        result = report_retrieval_chain.invoke({
+            "input": "Please provide a clinical report based on the following interview:",
+            "history": "\n".join(interview_history),
+            "language": report_language
+        })
+        report_content = result.get("answer", "Unable to generate report due to insufficient information.")
+        pdf_path = create_pdf(report_content)
+        return report_content, pdf_path
+    except Exception as e:
+        return f"An error occurred while generating the report: {str(e)}", None
+def create_pdf(content):
+    random_string = generate_random_string()
+    temp_file = tempfile.NamedTemporaryFile(delete=False, suffix=f'_report.pdf')
+    doc = SimpleDocTemplate(temp_file.name, pagesize=letter)
+    styles = getSampleStyleSheet()
+    # Create a custom style for bold text
+    bold_style = ParagraphStyle('Bold', parent=styles['Normal'], fontName='Helvetica-Bold', fontSize=10)
+    # Create a custom style for normal text with justification
+    normal_style = ParagraphStyle('Normal', parent=styles['Normal'], alignment=TA_JUSTIFY)
+    flowables = []
+    for line in content.split('\n'):
+        # Use regex to find words surrounded by **
+        parts = re.split(r'(\*\*.*?\*\*)', line)
+        paragraph_parts = []
+        for part in parts:
+            if part.startswith('**') and part.endswith('**'):
+                # Bold text
+                bold_text = part.strip('**')
+                paragraph_parts.append(Paragraph(bold_text, bold_style))
+            else:
+                # Normal text
+                paragraph_parts.append(Paragraph(part, normal_style))
+        flowables.extend(paragraph_parts)
+        flowables.append(Spacer(1, 12))  # Add space between paragraphs
+    doc.build(flowables)
+    return temp_file.name