File size: 7,033 Bytes
81fe4a4 8fd295a 8ade4f6 5e32c80 d482bc9 e6490b2 b377fa2 e0f0766 07cd78b 7dee805 ab07ea3 7dee805 c526e30 7dee805 80ca9d4 7dee805 6dfd255 ab07ea3 e0f0766 ab07ea3 e0f0766 2d22527 d404697 e0f0766 7dee805 5e7c8a0 b458a65 ab07ea3 5e7c8a0 2094894 5e7c8a0 e6490b2 ab2cc6f e6490b2 d482e79 c526e30 6a40908 6ca90c0 bad3155 3232c75 e6490b2 390c047 aebdf05 cf8598c 2403a58 6338960 53d2cd1 14b2eae 6338960 2403a58 390c047 05f37c5 80ca9d4 b000312 05f37c5 e0970aa e788605 e0970aa 05f37c5 b000312 c24a3e7 497ebca 9408214 6338960 daf5ff1 390c047 c531271 c9c45da f971e2d 390c047 eeff945 14b2eae f0e8369 9a8ce69 f0e8369 390c047 4a29efa 7fd333e f0e8369 b000312 1c4d1f8 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 |
import os
os.system("pip uninstall -y gradio")
os.system("pip install gradio==4.12.0")
from langchain_community.chat_models import ChatPerplexity
#from langchain.llms import OpenAI
#from langchain.chat_models import ChatOpenAI
from langchain.chains import ConversationalRetrievalChain
from langchain.document_loaders import PyPDFLoader, Docx2txtLoader
from langchain.embeddings import OpenAIEmbeddings
from langchain.vectorstores import Chroma
from langchain.memory import ConversationSummaryMemory, ConversationBufferMemory
from langchain.prompts import (
ChatPromptTemplate,
MessagesPlaceholder,
SystemMessagePromptTemplate,
HumanMessagePromptTemplate,
)
import gradio as gr
import datetime
from huggingface_hub import Repository
from datasets import load_dataset
import random
import string
from pyairtable import Api
os.environ["OPENAI_API_KEY"]
os.environ["PPLX_API_KEY"]
os.environ["HUB_TOKEN"]
#os.environ["AIR_TOKEN"]
# Pull Lesson docs from dataset repo for privacy
repo = Repository(
local_dir="private",
repo_type="dataset",
clone_from="https://huggingface.co/datasets/akellyirl/private_MHL",
token=os.environ["HUB_TOKEN"]
)
repo.git_pull()
# Scan the directories : if a 'topic.txt' file exists then it's considered a valid directory
def find_and_read_topics(base_path):
topics_list = []
for dirpath, dirnames, filenames in os.walk(base_path):
if "topic.txt" in filenames:
with open(os.path.join(dirpath, "topic.txt"), "r", encoding="utf-8") as file:
topic = file.read().strip()
topics_list.append((dirpath, topic))
return topics_list
# Lesson docs pulled from repo for privacy
base_directory = "./private/docs"
topics = find_and_read_topics(base_directory)
for directory, topic_content in topics:
print(f"Directory: {directory}\nTopic Content: {topic_content}\n")
# Select Topic
select = 0 # <=========
dir = topics[select][0]
topic = topics[select][1]
# Scan select directory for pdf files
files = []
for foldername, subfolders, filenames in os.walk(dir):
for filename in filenames:
if filename.endswith(('.pdf','.PDF')):
# Construct full file path and append to pdf_files list
path = os.path.join(foldername, filename)
if os.path.isfile(path):
files.append(path)
else:
print(f"{path} is not a valid path.")
print(f'{len(files)} files')
print(files)
# https://python.langchain.com/docs/use_cases/question_answering/how_to/chat_vector_db
# Create an instance of PyPDFLoader for each PDF file
loaders = [PyPDFLoader(file) for file in files]
# Load and split the PDFs into individual documents
data = []
for loader in loaders:
data += loader.load_and_split()
# SPLIT
from langchain.text_splitter import RecursiveCharacterTextSplitter
text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=0)
all_splits = text_splitter.split_documents(data)
# STORE
vectorstore = Chroma.from_documents(documents=all_splits, embedding=OpenAIEmbeddings())
# Chat Model
#model = 'gpt-3.5-turbo-0125'
#model = 'gpt-4'
#llm = ChatOpenAI(model=model, temperature=0)
model = "pplx-70b-chat"
llm = ChatPerplexity(temperature=0, model= model)
retriever = vectorstore.as_retriever()
def predict(message, history):
system_template = r"""
- You are a health education chatbot for people with mental health difficulties or their family or friends.
- You only discuss the documents provided and information related to the them.
- Always list references for your answers, from the documents, including section and page number.
- If you did not find the information in the documents provided, then say so and try to provide a reference.
- Your goal is to improve the understanding of mental disorders, treatments, and enhance help-seeking efficacy.
- You always show empathy.
- Your answers should explain things clearly and avoid jargon.
- You are allowed to chat with the user in general conversation to support your goal.
- If the user goes off topic, gently and politely let them know and go back on topic.
- You must be safe to use. If you don't know the answer then say that. Do not make anything up.
- Always try to keep the conversation going.
----
{context}
----
"""
user_template = "Question:```{question}```"
qa_prompt = ChatPromptTemplate.from_messages([SystemMessagePromptTemplate.from_template(system_template),
HumanMessagePromptTemplate.from_template(user_template)])
qa = ConversationalRetrievalChain.from_llm(llm, retriever=retriever, combine_docs_chain_kwargs={"prompt": qa_prompt})
chat_history = []
for h1, h2 in history:
chat_history.append((h1,h2))
ans = qa({"question":message, "chat_history": chat_history})['answer']
history.append((message, ans))
return "", history
def generate_session_id(length=10):
# Generate a random alphanumeric string
characters = string.ascii_letters + string.digits
return user_id+'_MHL_'+''.join(random.choice(characters) for _ in range(length))
def initialize_id():
return generate_session_id()
user_id = ""
with gr.Blocks(theme=gr.themes.Default()) as chat:
# Generate a unique Session ID
session_id = gr.Textbox(label="Session ID", value=initialize_id, interactive=False, visible=False)
gr.Markdown(f"""# I am a customised AI chatbot for {topic}.
<i>Running {model}. NOTE: If I'm taking too long to respond,
please refresh the page and continue.""")
chatbot = gr.Chatbot(height=300, show_copy_button = False, show_share_button = False)
with gr.Row():
msg = gr.Textbox(placeholder="Type here >> ", container=False, scale=10, min_width=250)
submit = gr.Button(value="Submit", variant="primary", scale=1, min_width=20)
with gr.Row():
report = gr.Button(value="REPORT", variant="secondary",
link="https://padlet.com/akellyirl/strathbot-flagging-2b4ko3rhk94wja6e")
clear = gr.ClearButton([msg, chatbot])
examples=(["What can we talk about?","Explain this very simply",
"Suggest a topic","Tell me more about that","Where can I go for help?",
"Provide more reading"])
def on_select(ex):
return ex
gr.Markdown("#### *Examples:*")
ex = {}
with gr.Group("Examples"):
with gr.Row():
for ind, exa in enumerate(examples):
ex[ind] = gr.Textbox(exa, container=False, interactive=True)
ex[ind].focus(fn=on_select, inputs=ex[ind], outputs=msg)
# Submit on Enter or Button click
gr.on(triggers=[msg.submit, submit.click],
fn= predict, inputs=[msg, chatbot], outputs=[msg, chatbot],
concurrency_limit = 100,)
sessionID = generate_session_id()
chat.launch() |