Spaces:

aelitta
/

BioMistral_gradio

Running

App Files Files Community

BioMistral_gradio / app.py

aelitta

Update app.py

74efca5 verified about 1 month ago

raw history blame contribute delete

No virus

2.7 kB


	from langchain import PromptTemplate
	from langchain_community.llms import LlamaCpp
	from langchain.chains import RetrievalQA
	from langchain.chains import ConversationalRetrievalChain
	from langchain.prompts import SystemMessagePromptTemplate
	from langchain_community.embeddings import SentenceTransformerEmbeddings
	from fastapi import FastAPI, Request, Form, Response
	from fastapi.responses import HTMLResponse
	from fastapi.templating import Jinja2Templates
	from fastapi.staticfiles import StaticFiles
	from fastapi.encoders import jsonable_encoder
	from qdrant_client import QdrantClient
	from langchain_community.vectorstores import Qdrant
	import os
	import json
	import gradio as gr
	import sys

	#sys.path.insert(0, <envs\myenv\lib\site-packages>).

	local_llm = "BioMistral-7B.Q4_K_M.gguf"
	llm = LlamaCpp(model_path=
	local_llm,temperature=0.3,max_tokens=2048,top_p=1,n_ctx= 2048)

	prompt_template = """Use the following pieces of information to answer the user's question.
	If you don't know the answer, just say that you don't know, don't try to make up an answer.

	Chat History: {chat_history}
	Question: {question}

	Only return the helpful answer. Answer must be detailed and well explained.
	Helpful answer:
	"""

	embeddings = SentenceTransformerEmbeddings(model_name="NeuML/pubmedbert-base-embeddings")

	# url = "http://127.0.0.1:6333"

	qdrant_api_key = 'ic_WPSW7zUEOYzJIbHAYKVUxTf7xVXxFfJgTN6UsnvcuXGwkRPGx3g'
	qdrant_url = 'https://ea51a65a-6fad-48ce-b571-846d3b496882.us-east4-0.gcp.cloud.qdrant.io'

	client = QdrantClient(url=qdrant_url,
	port=6333,
	api_key=qdrant_api_key, prefer_grpc=False)
	db = Qdrant(client=client, embeddings=embeddings, collection_name="vector_db")

	retriever = db.as_retriever(search_kwargs={"k":1})

	chat_history = []

	# Create the custom chain
	if llm is not None and db is not None:
	chain = ConversationalRetrievalChain.from_llm(llm=llm,retriever=retriever)
	else:
	print("LLM or Vector Database not initialized")

	def predict(message, history):
	history_langchain_format = []

	prompt = PromptTemplate(template=prompt_template,
	input_variables=["chat_history", 'message'])

	response = chain({"question": message, "chat_history": chat_history})

	answer = response['answer']

	chat_history.append((message, answer))

	temp = []
	for input_question, bot_answer in history:
	temp.append(input_question)
	temp.append(bot_answer)
	history_langchain_format.append(temp)
	temp.clear()
	temp.append(message)
	temp.append(answer)
	history_langchain_format.append(temp)

	return answer


	gr.ChatInterface(predict).launch(debug=True)