|
from flask import Flask, redirect, render_template, request, url_for |
|
from langchain.embeddings import HuggingFaceEmbeddings |
|
from langchain.chains import RetrievalQA |
|
from langchain_community.vectorstores import Pinecone |
|
from langchain.prompts import PromptTemplate |
|
from langchain_community.llms import CTransformers |
|
from flask_limiter import Limiter |
|
from flask_limiter.util import get_remote_address |
|
from langchain_community.llms import LlamaCpp |
|
import time |
|
|
|
|
|
app = Flask(__name__) |
|
|
|
|
|
limiter = Limiter( |
|
app=app, |
|
key_func=get_remote_address, |
|
default_limits=["200 per day", "20 per hour"] |
|
) |
|
|
|
|
|
|
|
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2") |
|
|
|
prompt_template=""" |
|
Use the following pieces of information to answer the user's question. |
|
If you don't know the answer say that you don't know it, don't try to make up an answer. |
|
|
|
Context: {context} |
|
Question: {question} |
|
|
|
Only return the correct answer in human readable text and avoide printing programming code! |
|
Make it short with no more text than needed and do not repeat your answers or the question! |
|
""" |
|
|
|
|
|
|
|
llm = LlamaCpp( |
|
model_path="model/phi-2.Q2_K.gguf", |
|
temperature=0.1, |
|
max_tokens=128, |
|
repetition_penalty=1, |
|
top_p=1, |
|
verbose=True, |
|
) |
|
|
|
|
|
|
|
PROMPT=PromptTemplate(template=prompt_template, input_variables=["context", "question"]) |
|
docsearch = Pinecone.from_existing_index("medicalbot", embeddings) |
|
retriever = docsearch.as_retriever(search_type="similarity", search_kwargs={"k": 2}) |
|
|
|
qa = RetrievalQA.from_chain_type( |
|
llm=llm, |
|
chain_type="stuff", |
|
retriever=retriever, |
|
return_source_documents=True, |
|
chain_type_kwargs={"prompt": PROMPT} |
|
) |
|
|
|
|
|
|
|
messages = [] |
|
|
|
|
|
@app.route("/", methods=["GET"]) |
|
@limiter.limit("10/minute") |
|
def home(): |
|
return render_template("home.html", messages=messages) |
|
|
|
@app.route("/post_message", methods=["POST"]) |
|
def post_message(): |
|
start_time = time.time() |
|
try: |
|
msg = request.form['message'] |
|
messages.append({"sender": "user", "text": msg}) |
|
if len(messages) > 10: |
|
messages.clear() |
|
|
|
bot_response = qa({"query": msg}) |
|
response_time = time.time() - start_time |
|
response_with_time = f"{bot_response['result']} (Response time: {response_time:.2f} seconds)" |
|
messages.append({"sender": "bot", "text": response_with_time}) |
|
except Exception as e: |
|
print(f"Error processing the message: {e}") |
|
messages.append({"sender": "bot", "text": "Sorry, I couldn't process your request."}) |
|
|
|
return redirect(url_for('home')) |
|
|
|
|
|
if __name__ == "__main__": |
|
app.run(host='0.0.0.0', port=7860) |
|
|
|
|