from transformers import AutoModelForCausalLM, AutoTokenizer, TextStreamer
from transformers import pipeline
import torch
import os
# llama 3 implementation starting here
model_name_llama3 = "meta-llama/Meta-Llama-3-8B-Instruct"
tokenizer_llama3 = AutoTokenizer.from_pretrained(model_name_llama3 , token=os.getenv("HF_API_TOKEN"))
model_llama3 = AutoModelForCausalLM.from_pretrained(model_name_llama3, token=os.getenv("HF_API_TOKEN"))
if tokenizer_llama3.pad_token is None:
tokenizer_llama3.pad_token = tokenizer_llama3.eos_token
chatbot_llama3 = pipeline("text-generation", model=model_llama3, tokenizer=tokenizer_llama3)
# prompt_llama3 = "I have fever for the past two days. do not ask any further questions only provide diagnosis with ICD code and treatment"
# responses_llama3 = chatbot(prompt_llama3, max_length=500, truncation=True)
# response = generator(prompt, max_length=100, num_return_sequences=1)
# print(responses[0]['generated_text'])
# llama 3 implementation ends here
from fastapi import FastAPI, File, UploadFile, HTTPException
from fastapi.responses import StreamingResponse
from fastapi.responses import FileResponse, HTMLResponse
import os
import io
from fastapi.middleware.cors import CORSMiddleware
import google.generativeai as genai
import torch
from auto_gptq import AutoGPTQForCausalLM
from langchain import HuggingFacePipeline, PromptTemplate
from langchain.chains import RetrievalQA
from langchain.document_loaders import PyPDFDirectoryLoader
from langchain.embeddings import HuggingFaceInstructEmbeddings
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.vectorstores import Chroma
from langchain.vectorstores import FAISS
from transformers import AutoTokenizer, TextStreamer, pipeline, AutoModelForCausalLM
from peft import PeftModel, PeftConfig
"""Patient Response : hi doctor \n\n"What brings you here today? Please provide me with some background information about your health so far." \nPatient Response : im arafath i have fever"""
DEVICE = "cuda:0" if torch.cuda.is_available() else "cpu"
import googletrans
from googletrans import Translator
translator = Translator()
lan = googletrans.LANGUAGES
keys = list(lan.keys())
vals = list(lan.values())
GOOGLE_API_KEY = os.environ['GOOGLE_API_KEY']
g_key_2 = os.environ['g_key_2']
from transformers import AutoModelForCausalLM, AutoTokenizer, TextStreamer
from transformers import pipeline
import torch
import os
embeddings = HuggingFaceInstructEmbeddings(
model_name="hkunlp/instructor-large", model_kwargs={"device": DEVICE}
)
new_db = FAISS.load_local("faiss_index", embeddings)
model_name_or_path = "llama2"
model_basename = "model"
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path, use_fast=True)
# model = AutoGPTQForCausalLM.from_quantized(
# model_name_or_path,
# revision="gptq-4bit-128g-actorder_True",
# model_basename=model_basename,
# use_safetensors=True,
# trust_remote_code=True,
# device=DEVICE,
# inject_fused_attention=False,
# quantize_config=None,
# )
#default promts it will work when we don't set the our custom system propts
DEFAULT_SYSTEM_PROMPT = """
You are a helpful, respectful and honest assistant. give answer for any questions.
""".strip()
def generate_prompt(prompt: str, system_prompt: str = DEFAULT_SYSTEM_PROMPT) -> str:
return f"""
[INST] <>
{system_prompt}
<>
{prompt} [/INST]
""".strip()
# setting the RAG pipeline
streamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
# text_pipeline = pipeline(
# "text-generation",
# model=model,
# tokenizer=tokenizer,
# max_new_tokens=4096,
# temperature=2,
# top_p=0.95,
# repetition_penalty=1.15,
# streamer=streamer,
# )
# global llm,llm2
# llm = HuggingFacePipeline(pipeline=text_pipeline, model_kwargs={"temperature": 2})
# llm2 = HuggingFacePipeline(pipeline=text_pipeline, model_kwargs={"temperature": 2})
# when the user query is not related to trained PDF data model will give the response from own knowledge
SYSTEM_PROMPT = "give answer from external data's. don't use the provided context"
# template = generate_prompt(
# """
# {context}
# Question: {question}
# """,
# system_prompt=SYSTEM_PROMPT,
# )
# prompt = PromptTemplate(template=template, input_variables=["context", "question"])
# global qa_chain,qa_chain_a
# qa_chain = RetrievalQA.from_chain_type(
# llm=llm,
# chain_type="stuff",
# retriever=new_db.as_retriever(search_kwargs={"k": 2}),
# return_source_documents=True,
# chain_type_kwargs={"prompt": prompt},
# )
# qa_chain_a = RetrievalQA.from_chain_type(
# llm=llm2,
# chain_type="stuff",
# retriever=new_db.as_retriever(search_kwargs={"k": 2}),
# return_source_documents=True,
# chain_type_kwargs={"prompt": prompt},
# )
report_prompt_template = """
this is report format
Patient Name: [Insert name here]
Age: [Insert age here]
sex: [Insert here]
Chief Complaint: [insert here]
History of Present Illness:[insert here]
Past Medical History: [insert here]
Medication List: [insert here]
Social History: [insert here]
Family History: [insert here]
Review of Systems: [insert here]
ICD Code: [insert here]
convert this bellow details into above format don't add any other details .don't use the provided pdfs data's.\n\n"""
# 4. prompt sets for ask some defined questions and its will guide the model correct way
final_question ={
8:"Do you have a history of medical conditions, such as allergies, chronic illnesses, or previous surgeries? If so, please provide details.",
9:"What medications are you currently taking, including supplements and vitamins?",
10:"Can you please Describe Family medical history (particularly close relatives): Does anyone in your immediate family suffer from similar symptoms or health issues?",
11:"Can you please Describe Social history: Marital status, occupation, living arrangements, education level, and support system.",
12:"Could you describe your symptoms, and have you noticed any changes or discomfort related to your respiratory, cardiovascular, gastrointestinal, or other body systems?"
}
# 1 . basic first prompt for handled the llama in correct like a family physician
sys = "You are a general family physician.\n\n"
# 5 . prommpts for get the diagnosis with ICD code based on the conversation, its will handle unrelated questions also(not related to diagnosis)
end_sys_prompts = "\n\ngive correct treatment and most related diagnosis with ICD code don't ask any questions. if question is not related to provided data don't give answer from this provided data's. give answr dirrcctly to patient not like a third persion"
# def refresh_model():
# global llm,llm2
# llm = HuggingFacePipeline(pipeline=text_pipeline, model_kwargs={"temperature": 2})
# llm2 = HuggingFacePipeline(pipeline=text_pipeline, model_kwargs={"temperature": 2})
# global qa_chain,qa_chain_a
# qa_chain = RetrievalQA.from_chain_type(
# llm=llm,
# chain_type="stuff",
# retriever=new_db.as_retriever(search_kwargs={"k": 2}),
# return_source_documents=True,
# chain_type_kwargs={"prompt": prompt},
# )
# qa_chain_a = RetrievalQA.from_chain_type(
# llm=llm2,
# chain_type="stuff",
# retriever=new_db.as_retriever(search_kwargs={"k": 2}),
# return_source_documents=True,
# chain_type_kwargs={"prompt": prompt},
# )
# print("model refreshed")
app = FastAPI()
app.add_middleware(
CORSMiddleware,
allow_origins=["*"],
allow_credentials=True,
allow_methods=["*"],
allow_headers=["*"],
)
def translate(text,language):
return str(translator.translate(text,dest=keys[vals.index(language)]).text)
@app.post("/llm_response/")
async def llm_response(chain,id,mode,language):
id = int(id)
#global qa_chain,qa_chain_a
# global llm,llm2
# llm = HuggingFacePipeline(pipeline=text_pipeline, model_kwargs={"temperature": 2})
# llm2 = HuggingFacePipeline(pipeline=text_pipeline, model_kwargs={"temperature": 2})
# global qa_chain,qa_chain_a
# qa_chain = RetrievalQA.from_chain_type(
# llm=llm,
# chain_type="stuff",
# retriever=new_db.as_retriever(search_kwargs={"k": 2}),
# return_source_documents=True,
# chain_type_kwargs={"prompt": prompt},
# )
# qa_chain_a = RetrievalQA.from_chain_type(
# llm=llm2,
# chain_type="stuff",
# retriever=new_db.as_retriever(search_kwargs={"k": 2}),
# return_source_documents=True,
# chain_type_kwargs={"prompt": prompt},
# )
# print("model refreshed")
#refresh_model()
def get_llm_response(gemini_system_prompts):
import google.generativeai as genai
# 3. prompt will guide the model to ask yes or no questions based on patient response,symptomps type
condition = True
while condition:
import google.generativeai as genai
try:
genai.configure(api_key=g_key_2)
gemini_model = genai.GenerativeModel('gemini-pro')
gemini_response = gemini_model.generate_content(gemini_system_prompts+" \n\n"+chain)
print(gemini_response.text)
condition = False
except Exception as e:
# Handle the exception
print("An error occurred:", e)
return gemini_response
def QA():
print("\nopen QA mode running ========================================\n")
gemini_system_prompts = 'give next small single response for laste patient response like a doctor. '
gemini_response = get_llm_response(gemini_system_prompts)
return gemini_response.text
if str(mode)=="dirrect_QA" and id==7:
responses_llama3 = chatbot(sys+chain+end_sys_prompts, max_length=500, truncation=True)
print(responses_llama3[0]['generated_text'])
diagnosis_and_treatment = responses_llama3
diagnosis_and_treatment = str(responses_llama3[0]['generated_text'])
# diagnosis_and_treatment = qa_chain(sys+chain+end_sys_prompts)
# diagnosis_and_treatment = str(diagnosis_and_treatment['result'])
#print(diagnosis_and_treatment) text
diagnosis_and_treatment = diagnosis_and_treatment + "
Do you have any other question or concerns to ask? It was a pleasure meeting you"
#diagnosis_and_treatment = diagnosis_and_treatment.split('\n\n',1)[-1]
diagnosis_and_treatment = diagnosis_and_treatment.replace("As a general family physician","")
diagnosis_and_treatment = diagnosis_and_treatment.replace("the patient's","your")
print("dirrect answer")
return {"english":diagnosis_and_treatment.replace("\n","
"),"translated":translate(diagnosis_and_treatment.replace("\n","
"),language)}
if str(mode)=="dirrect_QA" and id>7:
qa_text = str(QA())
return {"english":qa_text,"translated":translate(qa_text,language)}
if id<13:
if id>=8:
return {"english":final_question[id],"translated":translate(final_question[id],language)}
else:
if id==1:
print("first question")
gemini_system_prompts = """ give always next small single question for laste patient response like a doctor.
if patient not enter there name then always ask patient name within in the question like a real doctor . always give single question
"""
gemini_response = get_llm_response(gemini_system_prompts)
g_response_f_q = (gemini_response.text).replace("[Patient Name]","what's your name ?")
g_response_f_q = g_response_f_q.replace("[Patient's name]","what's your name ?")
g_response_f_q = g_response_f_q.replace("[patient name]","what's your name ?")
return {"english":g_response_f_q,"translated":translate(g_response_f_q,language)}
if id<5:
gemini_system_prompts = """ youre family physician ask next single suitable "yes", "no" question to get more details for following conversation provided and give responses more natural way like real human
don't give any diagnosis and treatmets . always only ask next single small question
"""
gemini_response = get_llm_response(gemini_system_prompts)
return {"english":gemini_response.text,"translated":translate(gemini_response.text,language)}
# 2 . prompmt control the natural way on question asking based on patient response,symptomps type
# question = qa_chain(sys+chain +"""\n\nask single small queston to get details based on the patient response,and don't ask
# same question again, and don't provide treatment and diagnosis ask next small and short question ,
# always don't ask same question again and again . always only ask next single small new question""")
else:
gemini_system_prompts = """ youre family physician ask next single suitable "yes", "no" question to get more details for following conversation provided and give responses more natural way like real human
don't give any diagnosis and treatmets . always only ask next single small question
"""
gemini_response = get_llm_response(gemini_system_prompts)
return {"english":gemini_response.text,"translated":translate(gemini_response.text,language)}
# question = qa_chain(sys+chain +"""\n\nask single small queston to get details based on the patient response,and don't ask
# same question again, and don't provide treatment and diagnosis ask next small and short question with yes or no format ,
# always don't ask same question again and again don't repeat same question before asked, always only ask next single small question""")
# question = str(question['result']).replace("Hi there,","")
# question = question.replace("Hi","")
# print(question)
try:
if "Patient:" in question or "Patient response:" in question or "Patient Response" in question:
print("complex answer generated")
try:
import google.generativeai as genai
gemini_system_prompts = """ youre family physician ask next single suitable "yes", "no" question to get more details for following conversation provided and give responses more natural way like real human
don't give any diagnosis and treatmets . always only ask next single small question
"""
genai.configure(api_key=g_key_2)
gemini_model = genai.GenerativeModel('gemini-pro')
gemini_response = gemini_model.generate_content(gemini_system_prompts+"\n\n"+chain)
print(gemini_response.text)
return {"english":gemini_response.text,"translated":translate(gemini_response.text,language)}
except:
return {"english":(question.split("\n\n")[-1]).split(":")[-1],"translated":translate((question.split("\n\n")[-1]).split(":")[-1],language)}
else:
return {"english":question.split("\n\n")[1],"translated":translate(question.split("\n\n")[1],language)}
except:
if "Patient:" in question or "Patient response:" in question or "Patient Response" in question:
print("complex answer generated")
try:
import google.generativeai as genai
gemini_system_prompts = """ youre family physician ask next single suitable "yes", "no" question to get more details for following conversation provided and give responses more natural way like real human
don't give any diagnosis and treatmets . always only ask next single small question
"""
genai.configure(api_key=g_key_2)
gemini_model = genai.GenerativeModel('gemini-pro')
gemini_response = gemini_model.generate_content(gemini_system_prompts+"\n\n"+chain)
print(gemini_response.text)
return {"english":gemini_response.text,"translated":translate(gemini_response.text,language)}
except:
return {"english":question.split(":")[-1],"translated":translate(question.split(":")[-1],language)}
else:
return {"english":question,"translated":translate(uestion,language)}
if id==13:
responses_llama3 = chatbot(sys+chain+end_sys_prompts, max_length=500, truncation=True)
print(responses_llama3[0]['generated_text'])
diagnosis_and_treatment = responses_llama3
diagnosis_and_treatment = str(responses_llama3[0]['generated_text'])
#print(mode,diagnosis_and_treatment)
# report = qa_chain_a(report_prompt_template+sys+chain+"\n\ntreatment & diagnosis with ICD code below\n"+diagnosis_and_treatment)
# print(str(report['result']))
# report = str(report['result']) + "
Do you have any other question or concerns to ask? It was a pleasure meeting you"
# report = report.split('\n\n',1)[-1]
#print("h&P")
return {"english":diagnosis_and_treatment.replace("\n","
"),"translated":translate(diagnosis_and_treatment.replace("\n","
"),language)}
qa_text = str(QA())
return {"english":qa_text,"translated":translate(qa_text,language)}