pa3lo commited on
Commit
748f651
1 Parent(s): 98a488d

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +94 -0
app.py ADDED
@@ -0,0 +1,94 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from flask import Flask, redirect, render_template, request, url_for
2
+ from langchain.embeddings import HuggingFaceEmbeddings
3
+ from langchain.chains import RetrievalQA
4
+ from langchain_community.vectorstores import Pinecone
5
+ from langchain.prompts import PromptTemplate
6
+ from langchain_community.llms import CTransformers
7
+ from flask_limiter import Limiter
8
+ from flask_limiter.util import get_remote_address
9
+ from langchain_community.llms import LlamaCpp
10
+ import time
11
+
12
+
13
+ app = Flask(__name__)
14
+
15
+ # Setup Flask-Limiter
16
+ limiter = Limiter(
17
+ app=app,
18
+ key_func=get_remote_address, # Correctly specify key_func as a keyword argument
19
+ default_limits=["200 per day", "20 per hour"]
20
+ )
21
+
22
+
23
+ # Initialize embeddings directly
24
+ embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
25
+
26
+ prompt_template="""
27
+ Use the following pieces of information to answer the user's question.
28
+ If you don't know the answer say that you don't know it, don't try to make up an answer.
29
+
30
+ Context: {context}
31
+ Question: {question}
32
+
33
+ Only return the correct answer in human readable text and avoide printing programming code!
34
+ Make it short with no more text than needed and do not repeat your answers or the question!
35
+ """
36
+
37
+
38
+ # Make sure the model path is correct for your system!
39
+ llm = LlamaCpp(
40
+ model_path="model/phi-2.Q2_K.gguf",
41
+ temperature=0.1,
42
+ max_tokens=128,
43
+ repetition_penalty=1,
44
+ top_p=1,
45
+ verbose=True, # Verbose is required to pass to the callback manager
46
+ )
47
+
48
+
49
+
50
+ PROMPT=PromptTemplate(template=prompt_template, input_variables=["context", "question"])
51
+ docsearch = Pinecone.from_existing_index("medicalbot", embeddings)
52
+ retriever = docsearch.as_retriever(search_type="similarity", search_kwargs={"k": 2})
53
+
54
+ qa = RetrievalQA.from_chain_type(
55
+ llm=llm,
56
+ chain_type="stuff",
57
+ retriever=retriever,
58
+ return_source_documents=True,
59
+ chain_type_kwargs={"prompt": PROMPT}
60
+ )
61
+
62
+
63
+ # chat history
64
+ messages = []
65
+
66
+
67
+ @app.route("/", methods=["GET"])
68
+ @limiter.limit("10/minute")
69
+ def home():
70
+ return render_template("home.html", messages=messages)
71
+
72
+ @app.route("/post_message", methods=["POST"])
73
+ def post_message():
74
+ start_time = time.time()
75
+ try:
76
+ msg = request.form['message']
77
+ messages.append({"sender": "user", "text": msg})
78
+ if len(messages) > 10:
79
+ messages.clear()
80
+
81
+ bot_response = qa({"query": msg})
82
+ response_time = time.time() - start_time
83
+ response_with_time = f"{bot_response['result']} (Response time: {response_time:.2f} seconds)"
84
+ messages.append({"sender": "bot", "text": response_with_time})
85
+ except Exception as e:
86
+ print(f"Error processing the message: {e}")
87
+ messages.append({"sender": "bot", "text": "Sorry, I couldn't process your request."})
88
+
89
+ return redirect(url_for('home'))
90
+
91
+
92
+ if __name__ == "__main__":
93
+ app.run(host='0.0.0.0', port=7860)
94
+