tmt3103 commited on
Commit
eda66e0
·
1 Parent(s): f6ddfbd

update Vietnamese for chatbot

Browse files
Files changed (2) hide show
  1. app.py +27 -8
  2. src/prompt.py +18 -8
app.py CHANGED
@@ -5,6 +5,9 @@ from langchain_google_genai import ChatGoogleGenerativeAI
5
  from langchain.chains import create_retrieval_chain
6
  from langchain.chains.combine_documents import create_stuff_documents_chain
7
  from langchain_core.prompts import ChatPromptTemplate
 
 
 
8
  from dotenv import load_dotenv
9
  from src.prompt import *
10
  import os
@@ -30,13 +33,13 @@ docsearch = PineconeVectorStore.from_existing_index(
30
  embedding=embeddings
31
  )
32
 
33
- retriever = docsearch.as_retriever(search_type="similarity", search_kwargs={"k":5})
34
 
35
  llm = ChatGoogleGenerativeAI(
36
  model="gemini-2.0-flash-lite",
37
  google_api_key=GEMINI_API_KEY,
38
  temperature=0.4,
39
- max_output_tokens=2048
40
  )
41
  prompt = ChatPromptTemplate.from_messages(
42
  [
@@ -44,9 +47,25 @@ prompt = ChatPromptTemplate.from_messages(
44
  ("human", "{input}"),
45
  ]
46
  )
 
 
 
47
 
48
- question_answer_chain = create_stuff_documents_chain(llm, prompt)
49
- rag_chain = create_retrieval_chain(retriever, question_answer_chain)
 
 
 
 
 
 
 
 
 
 
 
 
 
50
 
51
 
52
  @app.route("/")
@@ -58,9 +77,9 @@ def chat():
58
  msg = request.form["msg"]
59
  input = msg
60
  print(input)
61
- response = rag_chain.invoke({"input": msg})
62
- print("Response : ", response["answer"])
63
- return str(response["answer"])
64
 
65
  if __name__ == '__main__':
66
- app.run(host="0.0.0.0", port= 7860, debug= True)
 
5
  from langchain.chains import create_retrieval_chain
6
  from langchain.chains.combine_documents import create_stuff_documents_chain
7
  from langchain_core.prompts import ChatPromptTemplate
8
+ from langchain_core.output_parsers import StrOutputParser
9
+ from langchain_core.runnables import RunnableLambda, RunnablePassthrough
10
+ from operator import itemgetter
11
  from dotenv import load_dotenv
12
  from src.prompt import *
13
  import os
 
33
  embedding=embeddings
34
  )
35
 
36
+ retriever = docsearch.as_retriever(search_type="similarity", search_kwargs={"k":3})
37
 
38
  llm = ChatGoogleGenerativeAI(
39
  model="gemini-2.0-flash-lite",
40
  google_api_key=GEMINI_API_KEY,
41
  temperature=0.4,
42
+ max_output_tokens=4069
43
  )
44
  prompt = ChatPromptTemplate.from_messages(
45
  [
 
47
  ("human", "{input}"),
48
  ]
49
  )
50
+ # legacy chain
51
+ # question_answer_chain = create_stuff_documents_chain(llm, prompt)
52
+ # rag_chain = create_retrieval_chain(retriever, question_answer_chain)
53
 
54
+ translate_vi_to_en_chain = translate_vi_to_en_prompt | llm | StrOutputParser()
55
+ translate_en_to_vi_chain = translate_en_to_vi_prompt | llm | StrOutputParser()
56
+
57
+ rag_chain = (
58
+ RunnableLambda(lambda x: translate_vi_to_en_chain.invoke({"text": x["text"]}))
59
+ | RunnableLambda(lambda x: {"input": x})
60
+ | {
61
+ "context": lambda x: retriever.invoke(x["input"]),
62
+ "input": itemgetter("input"),
63
+ }
64
+ | prompt
65
+ | llm
66
+ | StrOutputParser()
67
+ | RunnableLambda(lambda x: translate_en_to_vi_chain.invoke({"text": x}))
68
+ )
69
 
70
 
71
  @app.route("/")
 
77
  msg = request.form["msg"]
78
  input = msg
79
  print(input)
80
+ response = rag_chain.invoke({"text": msg})
81
+ print("Response : ", response)
82
+ return response
83
 
84
  if __name__ == '__main__':
85
+ app.run(host="0.0.0.0", port= 8080, debug= True)
src/prompt.py CHANGED
@@ -1,12 +1,22 @@
 
 
1
  system_prompt = ('''
2
  You are a medical question-answering assistant.
3
- Use ONLY the retrieved context below to answer the question.
4
- If the context does not provide enough information, say "I don't know."
5
- When answering:
6
- - Be clear, concise, and medically accurate.
7
- - Limit your response to a maximum of three sentences.
8
- - If relevant, summarize key points in a list for readability.
9
- - Always remind the user to consult a qualified healthcare professional for personalized medical advice.
10
  '''
11
  "{context}"
12
- )
 
 
 
 
 
 
 
 
 
 
 
1
+ from langchain_core.prompts import ChatPromptTemplate
2
+
3
  system_prompt = ('''
4
  You are a medical question-answering assistant.
5
+ Use the retrieved context as your primary source of truth.
6
+ - If the context does not provide enough information, say "I don't know" or supplement with medically sound general knowledge.
7
+ - Provide clear, medically accurate explanations. Responses may be short or long depending on the complexity of the question.
8
+ - Vary your wording so repeated questions do not produce identical answers.
9
+ - When the question involves diagnosis, treatment, or personal medical decisions, remind the user to consult a qualified healthcare professional.
 
 
10
  '''
11
  "{context}"
12
+ )
13
+
14
+ translate_vi_to_en_prompt = ChatPromptTemplate.from_messages([
15
+ ("system", "You are a helpful assistant that translates Vietnamese text to English. Only provide the translated text, do not include any additional information."),
16
+ ("user", "Translate the following Vietnamese text to English: {text}"),
17
+ ])
18
+
19
+ translate_en_to_vi_prompt = ChatPromptTemplate.from_messages([
20
+ ("system", "You are a helpful assistant that translates English to Vietnamese. Only provide the translated text, do not include any additional information."),
21
+ ("user", "Translate the following English text to Vietnamese: {text}"),
22
+ ])