DrishtiSharma's picture
Create app.py
cff2190 verified
raw history blame
No virus
5.09 kB
import os
import dotenv
import gradio as gr
import lancedb
import logging
from langchain.embeddings.cohere import CohereEmbeddings
from langchain.llms import Cohere
from langchain.prompts import PromptTemplate
from langchain.chains import RetrievalQA
from langchain.vectorstores import LanceDB
from langchain.text_splitter import RecursiveCharacterTextSplitter
# Assume these loaders are implemented based on your specific requirements
from custom_document_loaders import TextLoader, PyPDFLoader, DocxLoader, ImageLoader
import argostranslate.package
import argostranslate.translate
import shutil
# Configuration and Logging
dotenv.load_dotenv(".env")
DB_PATH = "/tmp/lancedb"
COHERE_MODEL_NAME = "multilingual-22-12"
LANGUAGE_ISO_CODES = {"English": "en", "Hindi": "hi", "Turkish": "tr", "French": "fr"}
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
# Initialize argostranslate
argostranslate.package.update_package_index()
def initialize_documents_and_embeddings(input_file_path):
logger.info(f"Processing file: {input_file_path}")
file_extension = os.path.splitext(input_file_path)[1].lower()
loader = None
if file_extension in [".txt"]:
loader = TextLoader(input_file_path)
elif file_extension in [".pdf"]:
loader = PyPDFLoader(input_file_path)
elif file_extension in [".doc", ".docx"]:
loader = DocxLoader(input_file_path)
elif file_extension in [".jpg", ".jpeg", ".png"]:
loader = ImageLoader(input_file_path)
else:
raise ValueError("Unsupported file type. Supported files are .txt, .pdf, .docx, and image files.")
documents = loader.load()
text_splitter = RecursiveCharacterTextSplitter(chunk_size=300, chunk_overlap=50)
texts = text_splitter.split_documents(documents)
embeddings = CohereEmbeddings(model=COHERE_MODEL_NAME)
return texts, embeddings
def initialize_database(texts, embeddings):
if os.path.exists(DB_PATH):
shutil.rmtree(DB_PATH) # Ensure a fresh start
db = lancedb.connect(DB_PATH)
table = db.create_table("multiling-rag", mode="overwrite")
return LanceDB.from_documents(texts, embeddings, connection=table)
def translate_text(text, from_code, to_code):
installed_languages = argostranslate.translate.get_installed_languages()
from_lang = next((lang for lang in installed_languages if lang.code == from_code), None)
to_lang = next((lang for lang in installed_languages if lang.code == to_code), None)
if not from_lang or not to_lang:
logger.error("Translation languages not installed.")
return "Translation error"
translation = from_lang.get_translation(to_lang)
return translation.translate(text)
def answer_question(question, input_language, output_language, db):
try:
input_lang_code = LANGUAGE_ISO_CODES[input_language]
output_lang_code = LANGUAGE_ISO_CODES[output_language]
question_in_english = translate_text(question, input_lang_code, "en") if input_language != "English" else question
# Simplified retrieval and response logic for demonstration
response = "This is a simulated response based on the question."
result_in_target_language = translate_text(response, "en", output_lang_code) if output_language != "English" else response
return result_in_target_language
except Exception as e:
logger.error(f"Error in answer_question: {str(e)}")
return "An error occurred while processing your question."
def document_analysis_and_feedback(document_path, feedback):
# Placeholder for document analysis logic
response = "Document analysis and feedback processing is not fully implemented."
return response
def setup_gradio_interface(db):
with gr.Blocks() as demo:
gr.Markdown("# Multilingual Health and Wellness Chatbot")
with gr.Tab("Ask a Question"):
with gr.Row():
input_language = gr.Dropdown(list(LANGUAGE_ISO_CODES.keys()), label="Input Language")
output_language = gr.Dropdown(list(LANGUAGE_ISO_CODES.keys()), label="Output Language")
question = gr.Textbox(label="Your question")
answer = gr.Textbox(label="Answer")
question.submit(lambda q, i, o: answer_question(q, i, o, db), inputs=[question, input_language, output_language], outputs=answer)
with gr.Tab("Upload Document"):
with gr.Row():
document = gr.File(label="Upload your health document")
feedback_box = gr.Textbox(label="Feedback (optional)")
upload_response = gr.Textbox(label="Analysis Result")
document.submit(document_analysis_and_feedback, inputs=[document, feedback_box], outputs=upload_response)
return demo
def main():
INPUT_FILE_PATH = "sample-text.txt" # Placeholder file path
texts, embeddings = initialize_documents_and_embeddings(INPUT_FILE_PATH)
db = initialize_database(texts, embeddings)
demo = setup_gradio_interface(db)
demo.launch()
if __name__ == "__main__":
main()