DrishtiSharma commited on
Commit
cff2190
1 Parent(s): d3d9576

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +115 -0
app.py ADDED
@@ -0,0 +1,115 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import dotenv
3
+ import gradio as gr
4
+ import lancedb
5
+ import logging
6
+ from langchain.embeddings.cohere import CohereEmbeddings
7
+ from langchain.llms import Cohere
8
+ from langchain.prompts import PromptTemplate
9
+ from langchain.chains import RetrievalQA
10
+ from langchain.vectorstores import LanceDB
11
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
12
+ # Assume these loaders are implemented based on your specific requirements
13
+ from custom_document_loaders import TextLoader, PyPDFLoader, DocxLoader, ImageLoader
14
+ import argostranslate.package
15
+ import argostranslate.translate
16
+ import shutil
17
+
18
+ # Configuration and Logging
19
+ dotenv.load_dotenv(".env")
20
+ DB_PATH = "/tmp/lancedb"
21
+ COHERE_MODEL_NAME = "multilingual-22-12"
22
+ LANGUAGE_ISO_CODES = {"English": "en", "Hindi": "hi", "Turkish": "tr", "French": "fr"}
23
+
24
+ logging.basicConfig(level=logging.INFO)
25
+ logger = logging.getLogger(__name__)
26
+
27
+ # Initialize argostranslate
28
+ argostranslate.package.update_package_index()
29
+
30
+ def initialize_documents_and_embeddings(input_file_path):
31
+ logger.info(f"Processing file: {input_file_path}")
32
+ file_extension = os.path.splitext(input_file_path)[1].lower()
33
+ loader = None
34
+
35
+ if file_extension in [".txt"]:
36
+ loader = TextLoader(input_file_path)
37
+ elif file_extension in [".pdf"]:
38
+ loader = PyPDFLoader(input_file_path)
39
+ elif file_extension in [".doc", ".docx"]:
40
+ loader = DocxLoader(input_file_path)
41
+ elif file_extension in [".jpg", ".jpeg", ".png"]:
42
+ loader = ImageLoader(input_file_path)
43
+ else:
44
+ raise ValueError("Unsupported file type. Supported files are .txt, .pdf, .docx, and image files.")
45
+
46
+ documents = loader.load()
47
+ text_splitter = RecursiveCharacterTextSplitter(chunk_size=300, chunk_overlap=50)
48
+ texts = text_splitter.split_documents(documents)
49
+ embeddings = CohereEmbeddings(model=COHERE_MODEL_NAME)
50
+ return texts, embeddings
51
+
52
+ def initialize_database(texts, embeddings):
53
+ if os.path.exists(DB_PATH):
54
+ shutil.rmtree(DB_PATH) # Ensure a fresh start
55
+ db = lancedb.connect(DB_PATH)
56
+ table = db.create_table("multiling-rag", mode="overwrite")
57
+ return LanceDB.from_documents(texts, embeddings, connection=table)
58
+
59
+ def translate_text(text, from_code, to_code):
60
+ installed_languages = argostranslate.translate.get_installed_languages()
61
+ from_lang = next((lang for lang in installed_languages if lang.code == from_code), None)
62
+ to_lang = next((lang for lang in installed_languages if lang.code == to_code), None)
63
+ if not from_lang or not to_lang:
64
+ logger.error("Translation languages not installed.")
65
+ return "Translation error"
66
+ translation = from_lang.get_translation(to_lang)
67
+ return translation.translate(text)
68
+
69
+ def answer_question(question, input_language, output_language, db):
70
+ try:
71
+ input_lang_code = LANGUAGE_ISO_CODES[input_language]
72
+ output_lang_code = LANGUAGE_ISO_CODES[output_language]
73
+ question_in_english = translate_text(question, input_lang_code, "en") if input_language != "English" else question
74
+ # Simplified retrieval and response logic for demonstration
75
+ response = "This is a simulated response based on the question."
76
+ result_in_target_language = translate_text(response, "en", output_lang_code) if output_language != "English" else response
77
+ return result_in_target_language
78
+ except Exception as e:
79
+ logger.error(f"Error in answer_question: {str(e)}")
80
+ return "An error occurred while processing your question."
81
+
82
+ def document_analysis_and_feedback(document_path, feedback):
83
+ # Placeholder for document analysis logic
84
+ response = "Document analysis and feedback processing is not fully implemented."
85
+ return response
86
+
87
+ def setup_gradio_interface(db):
88
+ with gr.Blocks() as demo:
89
+ gr.Markdown("# Multilingual Health and Wellness Chatbot")
90
+ with gr.Tab("Ask a Question"):
91
+ with gr.Row():
92
+ input_language = gr.Dropdown(list(LANGUAGE_ISO_CODES.keys()), label="Input Language")
93
+ output_language = gr.Dropdown(list(LANGUAGE_ISO_CODES.keys()), label="Output Language")
94
+ question = gr.Textbox(label="Your question")
95
+ answer = gr.Textbox(label="Answer")
96
+ question.submit(lambda q, i, o: answer_question(q, i, o, db), inputs=[question, input_language, output_language], outputs=answer)
97
+
98
+ with gr.Tab("Upload Document"):
99
+ with gr.Row():
100
+ document = gr.File(label="Upload your health document")
101
+ feedback_box = gr.Textbox(label="Feedback (optional)")
102
+ upload_response = gr.Textbox(label="Analysis Result")
103
+ document.submit(document_analysis_and_feedback, inputs=[document, feedback_box], outputs=upload_response)
104
+
105
+ return demo
106
+
107
+ def main():
108
+ INPUT_FILE_PATH = "sample-text.txt" # Placeholder file path
109
+ texts, embeddings = initialize_documents_and_embeddings(INPUT_FILE_PATH)
110
+ db = initialize_database(texts, embeddings)
111
+ demo = setup_gradio_interface(db)
112
+ demo.launch()
113
+
114
+ if __name__ == "__main__":
115
+ main()