from langchain.prompts import PromptTemplate from langchain_community.llms import HuggingFaceEndpoint from langchain.vectorstores import Chroma from langchain_community.embeddings.sentence_transformer import SentenceTransformerEmbeddings import os from langchain.prompts.chat import ( ChatPromptTemplate, HumanMessagePromptTemplate, SystemMessagePromptTemplate, ) # Docs:- https://python.langchain.com/docs/modules/model_io/prompts/message_prompts #import chromadb # LLM Generator from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler from langchain.chains import ConversationalRetrievalChain from langchain.memory import ChatMessageHistory, ConversationSummaryBufferMemory, ConversationBufferMemory from langchain_experimental.chat_models import Llama2Chat # Docs:- https://python.langchain.com/docs/integrations/chat/llama2_chat HUGGINGFACEHUB_API_TOKEN = HF_ACCESS_TOKEN #os.environ["HUGGINGFACEHUB_API_TOKEN"] = HUGGINGFACEHUB_API_TOKEN # Implement another function to pass an array of PDFs / CSVs / Excels from rag_pipeline import instantiate_rag retriever = instantiate_rag() #persist_directory="Data/chroma" #chroma_client = chromadb.PersistentClient(persist_directory=persist_directory) #embedding_function = SentenceTransformerEmbeddings(model_name="all-MiniLM-L6-v2") #vectors = Chroma(persist_directory = persist_directory, embedding_function = embedding_function) #retriever = vectors.as_retriever() #(k=6) # Set the url to your Inference Endpoint below #your_endpoint_url = "https://fayjubiy2xqn36z0.us-east-1.aws.endpoints.huggingface.cloud" #how you can access HuggingFaceEndpoint integration of the free Serverless Endpoints API. repo_id = "mistralai/Mistral-7B-Instruct-v0.2" llm = HuggingFaceEndpoint( #endpoint_url=f"{your_endpoint_url}", repo_id=repo_id, #max_length=128, max_new_tokens=512, token=HUGGINGFACEHUB_API_TOKEN, temperature=0.1, repetition_penalty=1.1, #context_length: 4096, # Set to max for Chat Summary, Llama-2 has a max context length of 4096, stream=True, callbacks=[StreamingStdOutCallbackHandler()], #top_k=10, #top_p=0.95, ) model = Llama2Chat(llm=llm) memory = ConversationBufferMemory( llm=llm, memory_key="chat_history", return_messages=True, output_key='answer', input_key='question') # Prompt Context Reference : https://huggingface.co/TheBloke/Llama-2-7B-Chat-GGUF , https://huggingface.co/TheBloke/Llama-2-13B-chat-GPTQ/discussions/5#64b81e9b15ebeb44419a2b9e # Reference:- https://github.com/langchain-ai/langchain/issues/5462 system_message_template = """You're a Mental Health Specialist. Support those with Depressive Disorder. Listen compassionately, respond helpfully. For casual talk, be friendly. For facts, use context. If unsure, say, 'Out of my knowledge.' Always stay direct. If you cannot find the answer from the pieces of context, just say that you don't know, don't try to make up an answer. ---------------- {context}""" messages = [ SystemMessagePromptTemplate.from_template(system_message_template), HumanMessagePromptTemplate.from_template("{question}") ] qa_prompt = ChatPromptTemplate.from_messages(messages) qa_prompt.pretty_print() condense_question = """Given the following conversation and a follow-up message, rephrase the follow-up message to a stand-alone question or instruction that represents the user's intent precisely, add context needed if necessary to generate a complete and unambiguous question, only based on the on the Follow up Question and chat history, don't make up messages. Maintain the same question intent as the follow up input message.\n Chat History: {chat_history}\n Follow Up Input: {question} Standalone question:""" condense_question_prompt = PromptTemplate.from_template(condense_question) condense_question_prompt.pretty_print() retrieval_chain = ConversationalRetrievalChain.from_llm( llm = llm, retriever=retriever, memory = memory, return_source_documents=False, verbose=True, #condense_question_prompt=condense_question_prompt, # chain_type = "stuff", combine_docs_chain_kwargs={'prompt': qa_prompt}, # https://github.com/langchain-ai/langchain/issues/6879 ) human_inputs = ['Nothing logged yet'] ai_responses = ['Nothing logged yet'] history = ChatMessageHistory() def llm_generation(question: str): llm_answer = retrieval_chain.invoke({'question':question, 'chat_history':history.messages})['answer'] #Answer = Dict Key = Latest response by the AI history.add_user_message(question) history.add_ai_message(llm_answer) return llm_answer # Decide wether to place this in streamlit.py # or make a new post_process.py and import that to streamlit def extract_dialogues(text): ''' returns a two lists for human and ai dialogues, ''' human_dialogues = [] ai_dialogues = [] lines = text.split('\n') # Iterate through each line for line in lines: # Remove leading and trailing whitespace line = line.strip() # Check if the line starts with 'Human:' or 'AI:' if line.startswith('Human:'): # Extract the text after 'Human:' human_dialogues.append(line[len('Human:'):].strip()) elif line.startswith('AI:'): # Extract the text after 'AI:' ai_dialogues.append(line[len('AI:'):].strip()) return human_dialogues, ai_dialogues def update_list(): global human_inputs, ai_responses human_responses, ai_responses = extract_dialogues(memory.buffer_as_str) return 'responses updated' def is_depressed(): '''' returns wether according to human inputs the person is depressed or not ''' # Implement Classification all_user_inputs = ''.join(human_inputs) from nlp_models import sentiment_class, pattern_classification, corelation_analysis is_depressed = sentiment_class(all_user_inputs) return 'Not so depressed' if is_depressed[0][1] > 0.5 else 'is_depressed'