Spaces:

akellyirl
/

mhl_chat

Runtime error

File size: 7,033 Bytes

81fe4a4
8fd295a
8ade4f6
5e32c80
d482bc9
 
 
 
 
e6490b2
 
 
 
 
 
 
 
 
 
 
 
 
b377fa2
e0f0766
07cd78b
7dee805
 
ab07ea3
7dee805
 
c526e30
7dee805
80ca9d4
7dee805
6dfd255
ab07ea3
e0f0766
ab07ea3
e0f0766
2d22527
d404697
e0f0766
 
7dee805
5e7c8a0
 
 
 
 
 
 
 
 
 
 
 
b458a65
ab07ea3
5e7c8a0
 
 
 
 
2094894
5e7c8a0
 
 
 
 
e6490b2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ab2cc6f
e6490b2
d482e79
c526e30
6a40908
6ca90c0
bad3155
 
 
 
3232c75
 
e6490b2
390c047
 
 
aebdf05
cf8598c
2403a58
 
6338960
 
53d2cd1
14b2eae
 
 
6338960
2403a58
390c047
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
05f37c5
 
 
80ca9d4
b000312
05f37c5
 
e0970aa
e788605
e0970aa
 
05f37c5
b000312
c24a3e7
 
497ebca
9408214
6338960
 
daf5ff1
390c047
 
 
c531271
c9c45da
f971e2d
390c047
 
 
 
 
eeff945
 
14b2eae
f0e8369
 
 
9a8ce69
 
f0e8369
 
 
 
 
 
 
390c047
 
4a29efa
7fd333e
f0e8369
b000312
 
1c4d1f8

import os
os.system("pip uninstall -y gradio")
os.system("pip install gradio==4.12.0")

from langchain_community.chat_models import ChatPerplexity

#from langchain.llms import OpenAI
#from langchain.chat_models import ChatOpenAI

from langchain.chains import ConversationalRetrievalChain
from langchain.document_loaders import PyPDFLoader, Docx2txtLoader
from langchain.embeddings import OpenAIEmbeddings
from langchain.vectorstores import Chroma
from langchain.memory import ConversationSummaryMemory, ConversationBufferMemory
from langchain.prompts import (
    ChatPromptTemplate,
    MessagesPlaceholder,
    SystemMessagePromptTemplate,
    HumanMessagePromptTemplate,
)

import gradio as gr
import datetime
from huggingface_hub import Repository
from datasets import load_dataset
import random
import string
from pyairtable import Api

os.environ["OPENAI_API_KEY"]
os.environ["PPLX_API_KEY"]
os.environ["HUB_TOKEN"]
#os.environ["AIR_TOKEN"]


# Pull Lesson docs from dataset repo for privacy
repo = Repository(
    local_dir="private",
    repo_type="dataset",
    clone_from="https://huggingface.co/datasets/akellyirl/private_MHL",
    token=os.environ["HUB_TOKEN"]
)
repo.git_pull()
    
# Scan the directories : if a 'topic.txt' file exists then it's considered a valid directory
def find_and_read_topics(base_path):
    topics_list = []
    
    for dirpath, dirnames, filenames in os.walk(base_path):
        if "topic.txt" in filenames:
            with open(os.path.join(dirpath, "topic.txt"), "r", encoding="utf-8") as file:
                topic = file.read().strip()
                topics_list.append((dirpath, topic))
                
    return topics_list

# Lesson docs pulled from repo for privacy
base_directory = "./private/docs" 
topics = find_and_read_topics(base_directory)
for directory, topic_content in topics:
    print(f"Directory: {directory}\nTopic Content: {topic_content}\n")

# Select Topic
select = 0 # <=========

dir = topics[select][0]
topic = topics[select][1]

# Scan select directory for pdf files
files = []
for foldername, subfolders, filenames in os.walk(dir):
    for filename in filenames:
        if filename.endswith(('.pdf','.PDF')):
            # Construct full file path and append to pdf_files list
            path = os.path.join(foldername, filename)
            
            if os.path.isfile(path):
                files.append(path)
            else:
                print(f"{path} is not a valid path.")
            
print(f'{len(files)} files')

print(files)

# https://python.langchain.com/docs/use_cases/question_answering/how_to/chat_vector_db

# Create an instance of PyPDFLoader for each PDF file
loaders = [PyPDFLoader(file) for file in files]

# Load and split the PDFs into individual documents
data = []
for loader in loaders:
    data += loader.load_and_split()

# SPLIT
from langchain.text_splitter import RecursiveCharacterTextSplitter

text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=0)
all_splits = text_splitter.split_documents(data)

# STORE
vectorstore = Chroma.from_documents(documents=all_splits, embedding=OpenAIEmbeddings())

# Chat Model
#model = 'gpt-3.5-turbo-0125'
#model = 'gpt-4'

#llm = ChatOpenAI(model=model, temperature=0)

model = "pplx-70b-chat"
llm = ChatPerplexity(temperature=0, model= model)

retriever = vectorstore.as_retriever()

def predict(message, history):

    system_template = r""" 
    - You are a health education chatbot for people with mental health difficulties or their family or friends.
    - You only discuss the documents provided and information related to the them.
    - Always list references for your answers, from the documents, including section and page number.
    - If you did not find the information in the documents provided, then say so and try to provide a reference.
    - Your goal is to improve the understanding of mental disorders, treatments, and enhance help-seeking efficacy.
    - You always show empathy.
    - Your answers should explain things clearly and avoid jargon. 
    - You are allowed to chat with the user in general conversation to support your goal.
    - If the user goes off topic, gently and politely let them know and go back on topic.
    - You must be safe to use. If you don't know the answer then say that. Do not make anything up.
    - Always try to keep the conversation going.

    ----
    {context}
    ----
    """
    
    user_template = "Question:```{question}```"
    
    qa_prompt = ChatPromptTemplate.from_messages([SystemMessagePromptTemplate.from_template(system_template),
                HumanMessagePromptTemplate.from_template(user_template)])

    qa = ConversationalRetrievalChain.from_llm(llm, retriever=retriever, combine_docs_chain_kwargs={"prompt": qa_prompt})

    chat_history = []
    for h1, h2 in history:
        chat_history.append((h1,h2))

    ans = qa({"question":message, "chat_history": chat_history})['answer']

    history.append((message, ans))

    return "", history

def generate_session_id(length=10):
    # Generate a random alphanumeric string
    characters = string.ascii_letters + string.digits
    return user_id+'_MHL_'+''.join(random.choice(characters) for _ in range(length))

def initialize_id():
    return generate_session_id()


user_id = ""

with gr.Blocks(theme=gr.themes.Default()) as chat:

    # Generate a unique Session ID
    session_id = gr.Textbox(label="Session ID", value=initialize_id, interactive=False, visible=False)
    
    gr.Markdown(f"""# I am a customised AI chatbot for {topic}.
                <i>Running {model}. NOTE: If I'm taking too long to respond, 
                please refresh the page and continue.""")
        
    chatbot = gr.Chatbot(height=300, show_copy_button = False, show_share_button = False)
    
    with gr.Row():
        msg = gr.Textbox(placeholder="Type  here  >> ", container=False, scale=10, min_width=250)
        submit = gr.Button(value="Submit", variant="primary", scale=1, min_width=20)
        
    with gr.Row():
        report = gr.Button(value="REPORT", variant="secondary",
                                  link="https://padlet.com/akellyirl/strathbot-flagging-2b4ko3rhk94wja6e")
        clear = gr.ClearButton([msg, chatbot])

    examples=(["What can we talk about?","Explain this very simply",
                  "Suggest a topic","Tell me more about that","Where can I go for help?",
                  "Provide more reading"])

    def on_select(ex):
        return ex

    gr.Markdown("#### *Examples:*")
    ex = {}
    with gr.Group("Examples"):
        with gr.Row():
            for ind, exa in enumerate(examples):
                ex[ind] = gr.Textbox(exa, container=False, interactive=True)
                ex[ind].focus(fn=on_select, inputs=ex[ind], outputs=msg)

    # Submit on Enter or Button click
    gr.on(triggers=[msg.submit, submit.click],
         fn= predict, inputs=[msg, chatbot], outputs=[msg, chatbot],
          concurrency_limit = 100,)

    sessionID = generate_session_id()

chat.launch()