Spaces:

teganmosi
/

medicalchatbot

Build error

App Files Files Community

medicalchatbot / app.py

teganmosi

Create app.py

988c7ef over 1 year ago

raw

history blame

3.75 kB



	import logging
	import sys

	logging.basicConfig(stream=sys.stdout, level=logging.INFO)
	logging.getLogger().addHandler(logging.StreamHandler(stream=sys.stdout))

	from llama_index import VectorStoreIndex, SimpleDirectoryReader, ServiceContext
	from llama_index.llms import HuggingFaceLLM
	from langchain.document_loaders import PyPDFLoader


	Mount Google Drive to access data (you may need to authenticate)

	import pandas as pd
	from datasets import load_dataset, concatenate_datasets

	dataset_names = [
	"medalpaca/medical_meadow_mediqa",
	"medalpaca/medical_meadow_medical_flashcards",
	"medalpaca/medical_meadow_wikidoc_patient_information",
	"medalpaca/medical_meadow_wikidoc",
	"medalpaca/medical_meadow_pubmed_casual",
	"medalpaca/medical_meadow_medqa",
	"medalpaca/medical_meadow_health_advice",
	"medalpaca/medical_meadow_cord19"

	]

	datasets = [load_dataset(name, split = "train") for name in dataset_names]
	combined_dataset = concatenate_datasets(datasets)

	#from google.colab import drive
	#drive.mount('/content/drive')

	Reading the data from the saved path in google drive

	#documents = SimpleDirectoryReader("/content/drive/MyDrive/Data").load_data()

	from langchain.text_splitter import CharacterTextSplitter
	from langchain import OpenAI
	from langchain.document_loaders import PyPDFLoader

	Define a system prompt for the Q&A assistant


	from llama_index.prompts.prompts import SimpleInputPrompt


	system_prompt = "You are a medical AI chatbot. Your goal is to answer questions as accurately as possible based on the instructions and context provided.Use only information from the previous context information. Do not invent stuff or give false information"



	# This will wrap the default prompts that are internal to llama-index
	query_wrapper_prompt = SimpleInputPrompt("<\|USER\|>{query_str}<\|ASSISTANT\|>")

	Log in to Hugging Face

	#!huggingface-cli login

	Configure the HuggingFaceLLM (Language Model)

	import torch

	llm = HuggingFaceLLM(
	context_window=4096,
	max_new_tokens=256,
	generate_kwargs={"temperature": 0.5, "do_sample": False},
	system_prompt=system_prompt,
	query_wrapper_prompt=query_wrapper_prompt,
	tokenizer_name="NousResearch/Llama-2-7b-chat-hf",
	model_name="NousResearch/Llama-2-7b-chat-hf",
	device_map="auto",
	# uncomment this if using CUDA to reduce memory usage
	model_kwargs={"torch_dtype": torch.float16 , "load_in_8bit":True})

	Configure embeddings using Hugging Face model

	from langchain.embeddings.huggingface import HuggingFaceEmbeddings
	from llama_index import LangchainEmbedding, ServiceContext

	embed_model = LangchainEmbedding(
	HuggingFaceEmbeddings(model_name="sentence-transformers/all-mpnet-base-v2")
	)


	Configure the service context

	service_context = ServiceContext.from_defaults(
	chunk_size=1024,
	llm=llm,
	embed_model=embed_model
	)

	Create a vector store index from the loaded documents

	index = VectorStoreIndex.from_documents(combined_dataset, service_context=service_context)

	Create a query engine for the index

	query_engine = index.as_query_engine()
	response = query_engine.query("What is gross profit?")

	print(response)

	To interact with the chatbot

	import gradio as gr

	# Define your chatbot function
	def chatbot_interface(query):
	response = query_engine.query(query)
	return response

	# Create a Gradio interface
	iface = gr.Interface(
	fn=chatbot_interface,
	inputs=gr.Textbox(placeholder="Enter your question here..."),
	outputs=gr.Textbox(),
	live=False,
	title="Insurance Chatbot Demo",
	description="Ask questions, and the chatbot will provide answers based on the provided context.",
	)

	# Launch the Gradio interface
	iface.launch(debug=True)