talk-to-arslan

Runtime error

App Files Files Community

arslan-ahmed commited on Sep 9, 2023

Commit

ed9ad5e

1 Parent(s): 918a154

added mode type

Browse files

Files changed (3) hide show

app.py +8 -11
ttyd_consts.py +11 -13
ttyd_functions.py +11 -8

app.py CHANGED Viewed

@@ -31,7 +31,7 @@ from ttyd_consts import *
 # selct the mode from ttyd_consts.py
 mode = mode_general
-if mode.name!='general':
     # local vector store as opposed to gradio state vector store
     vsDict_hard = localData_vecStore(os.getenv("OPENAI_API_KEY"), inputDir=mode.inputDir, file_list=mode.file_list, url_list=mode.url_list)
@@ -74,13 +74,10 @@ def initializeChatbot(temp, k, modelName, stdlQs, api_key_st, vsDict_st, progres
 def setApiKey(api_key):
-    if api_key==os.getenv("TEMP_PWD") and os.getenv("OPENAI_API_KEY") is not None:
-        api_key=os.getenv("OPENAI_API_KEY")
     try:
-        api_key='Null' if api_key is None or api_key=='' else api_key
         openai.Model.list(api_key=api_key) # test the API key
         api_key_st = api_key
         return aKey_tb.update('API Key accepted', interactive=False, type='text'), aKey_btn.update(interactive=False), api_key_st
     except Exception as e:
         return aKey_tb.update(str(e), type='text'), *[x.update() for x in [aKey_btn, api_key_state]]
@@ -124,7 +121,7 @@ def uiData_vecStore(userFiles, userUrls, api_key_st, vsDict_st={}, progress=gr.P
 # just update the QA Chain, no updates to any UI
 def updateQaChain(temp, k, modelName, stdlQs, api_key_st, vsDict_st):
     # if we are not adding data from ui, then use vsDict_hard as vectorstore
-    if vsDict_st=={} and mode.name!='general': vsDict_st=vsDict_hard
     modelName = modelName.split('(')[0].strip() # so we can provide any info in brackets
     # check if the input model is chat model or legacy model
     try:
@@ -176,7 +173,7 @@ with gr.Blocks(theme=gr.themes.Default(primary_hue='orange', secondary_hue='gray
     # Initialize state variables - stored in this browser session - these can only be used within input or output of .click/.submit etc, not as a python var coz they are not stored in backend, only as a frontend gradio component
     # but if you initialize it with a default value, that value will be stored in backend and accessible across all users. You can also change it with statear.value='newValue'
     qa_state = gr.State()
-    api_key_state = gr.State()
     chromaVS_state = gr.State({})
@@ -189,7 +186,7 @@ with gr.Blocks(theme=gr.themes.Default(primary_hue='orange', secondary_hue='gray
                     aKey_tb = gr.Textbox(label="OpenAI API Key", type='password'\
                             , info='You can find OpenAI API key at https://platform.openai.com/account/api-keys'\
                             , placeholder='Enter your API key here and hit enter to begin chatting')
-                    aKey_btn = gr.Button("Submit API Key")
             with gr.Row(visible=mode.uiAddDataVis):
                 upload_fb = gr.Files(scale=5, label="Upload (multiple) Files - pdf/txt/docx supported", file_types=['.doc', '.docx', 'text', '.pdf', '.csv'])
                 urls_tb = gr.Textbox(scale=5, label="Enter URLs starting with https (comma separated)"\
@@ -230,7 +227,7 @@ with gr.Blocks(theme=gr.themes.Default(primary_hue='orange', secondary_hue='gray
     aKey_tb.submit(**aKey_btn_args)
     # Data Ingest Button
-    data_ingest_btn.click(uiData_vecStore, [upload_fb, urls_tb, api_key_state, chromaVS_state], [chromaVS_state, status_tb, data_ingest_btn, upload_fb, urls_tb])
     # Adv Settings
     advSet_args = {'fn':updateQaChain, 'inputs':[temp_sld, k_sld, model_dd, stdlQs_rb, api_key_state, chromaVS_state], 'outputs':[qa_state]}
@@ -241,7 +238,7 @@ with gr.Blocks(theme=gr.themes.Default(primary_hue='orange', secondary_hue='gray
     # Initialize button
     initCb_args = {'fn':initializeChatbot, 'inputs':[temp_sld, k_sld, model_dd, stdlQs_rb, api_key_state, chromaVS_state], 'outputs':[qa_state, btn, initChatbot_btn, aKey_tb, tabs, chatbot]}
-    if mode.loadUi=='chatbot':
         demo.load(**initCb_args) # load Chatbot UI directly on startup
     initChatbot_btn.click(**initCb_args)
@@ -250,5 +247,5 @@ with gr.Blocks(theme=gr.themes.Default(primary_hue='orange', secondary_hue='gray
     btn.click(**chat_btn_args)
     msg.submit(**chat_btn_args)
-demo.queue()
 demo.launch(show_error=True)

 # selct the mode from ttyd_consts.py
 mode = mode_general
+if mode.type!='userInputDocs':
     # local vector store as opposed to gradio state vector store
     vsDict_hard = localData_vecStore(os.getenv("OPENAI_API_KEY"), inputDir=mode.inputDir, file_list=mode.file_list, url_list=mode.url_list)
 def setApiKey(api_key):
+    api_key = transformApi(api_key)
     try:
         openai.Model.list(api_key=api_key) # test the API key
         api_key_st = api_key
         return aKey_tb.update('API Key accepted', interactive=False, type='text'), aKey_btn.update(interactive=False), api_key_st
     except Exception as e:
         return aKey_tb.update(str(e), type='text'), *[x.update() for x in [aKey_btn, api_key_state]]
 # just update the QA Chain, no updates to any UI
 def updateQaChain(temp, k, modelName, stdlQs, api_key_st, vsDict_st):
     # if we are not adding data from ui, then use vsDict_hard as vectorstore
+    if vsDict_st=={} and mode.type!='userInputDocs': vsDict_st=vsDict_hard
     modelName = modelName.split('(')[0].strip() # so we can provide any info in brackets
     # check if the input model is chat model or legacy model
     try:
     # Initialize state variables - stored in this browser session - these can only be used within input or output of .click/.submit etc, not as a python var coz they are not stored in backend, only as a frontend gradio component
     # but if you initialize it with a default value, that value will be stored in backend and accessible across all users. You can also change it with statear.value='newValue'
     qa_state = gr.State()
+    api_key_state = gr.State(os.getenv("OPENAI_API_KEY") if mode.type=='personalBot' else 'Null')
     chromaVS_state = gr.State({})
                     aKey_tb = gr.Textbox(label="OpenAI API Key", type='password'\
                             , info='You can find OpenAI API key at https://platform.openai.com/account/api-keys'\
                             , placeholder='Enter your API key here and hit enter to begin chatting')
+                    aKey_btn = gr.Button("Submit API Key")
             with gr.Row(visible=mode.uiAddDataVis):
                 upload_fb = gr.Files(scale=5, label="Upload (multiple) Files - pdf/txt/docx supported", file_types=['.doc', '.docx', 'text', '.pdf', '.csv'])
                 urls_tb = gr.Textbox(scale=5, label="Enter URLs starting with https (comma separated)"\
     aKey_tb.submit(**aKey_btn_args)
     # Data Ingest Button
+    data_ingest_event = data_ingest_btn.click(uiData_vecStore, [upload_fb, urls_tb, api_key_state, chromaVS_state], [chromaVS_state, status_tb, data_ingest_btn, upload_fb, urls_tb])
     # Adv Settings
     advSet_args = {'fn':updateQaChain, 'inputs':[temp_sld, k_sld, model_dd, stdlQs_rb, api_key_state, chromaVS_state], 'outputs':[qa_state]}
     # Initialize button
     initCb_args = {'fn':initializeChatbot, 'inputs':[temp_sld, k_sld, model_dd, stdlQs_rb, api_key_state, chromaVS_state], 'outputs':[qa_state, btn, initChatbot_btn, aKey_tb, tabs, chatbot]}
+    if mode.type=='personalBot':
         demo.load(**initCb_args) # load Chatbot UI directly on startup
     initChatbot_btn.click(**initCb_args)
     btn.click(**chat_btn_args)
     msg.submit(**chat_btn_args)
+demo.queue(concurrency_count=10)
 demo.launch(show_error=True)

ttyd_consts.py CHANGED Viewed

@@ -2,8 +2,8 @@ exp_query = 'Generate top 5 questions that I can ask about this data. Questions
 waitText_initialize = 'Preparing the documents, please wait...'
-initialize_prompt = 'Write a short welcome message to the user. Describe the documents with a comprehensive overview including short summary.\
- If these documents are about a person, mention his name instead of using pronouns. After describing the overview, you should mention top 3 example questions that the user can ask about this data.\
  \n\nYour response should be short and precise. Format of your response should be Summary:\n{Description and Summary} \n\n Example Questions:\n{Example Questions}'
 nustian_exps = ['Tell me about NUSTIAN',
@@ -70,21 +70,19 @@ welcomeMsgArslan = """Summary: The document provides a comprehensive overview of
 class TtydMode():
-    def __init__(self, name='', title='', ui='initialize', type='', dir=None, files=[], urls=[], vis=False, welMsg='', def_k=4):
         self.name = name
-        self.title = title
-        self.loadUi = ui
-        self.type = type
         self.inputDir=dir
         self.file_list=files
         self.url_list=urls
-        self.uiAddDataVis = vis
-        self.welcomeMsg = welMsg
-        self.k = def_k
-mode_general = TtydMode(name='general', title=md_title_general, vis=True)
-mode_nustian = TtydMode(name='nustian', title=md_title_nustian, urls=['https://nustianusa.org', 'https://nustian.ca'])
-mode_arslan = TtydMode(name='arslan', ui='chatbot', title=md_title_arslan, dir='./documents/', welMsg=welcomeMsgArslan, def_k=8)

 waitText_initialize = 'Preparing the documents, please wait...'
+initialize_prompt = 'Write a short welcome message to the user. Describe the data with a comprehensive overview including short summary.\
+ If this data is about a person, mention his name instead of using pronouns. After describing the overview, you should mention top 3 example questions that the user can ask about this data.\
  \n\nYour response should be short and precise. Format of your response should be Summary:\n{Description and Summary} \n\n Example Questions:\n{Example Questions}'
 nustian_exps = ['Tell me about NUSTIAN',
 class TtydMode():
+    def __init__(self, name='', title='', type='', dir=None, files=[], urls=[], vis=False, welMsg='', def_k=4):
         self.name = name
+        self.title = title # markdown title for the top display
+        self.type = type # userInputDocs, fixedDocs, personalBot
         self.inputDir=dir
         self.file_list=files
         self.url_list=urls
+        self.uiAddDataVis = vis # load data from user - this will be true for type = userInputDocs
+        self.welcomeMsg = welMsg #welcome msg constant - if not provided LLM will generate it
+        self.k = def_k # default k docs to retrieve
+mode_general = TtydMode(name='general', title=md_title_general, type='userInputDocs', vis=True)
+mode_nustian = TtydMode(name='nustian', title=md_title_nustian, type='fixedDocs', urls=['https://nustian.ca'])
+mode_arslan = TtydMode(name='arslan', title=md_title_arslan, type='personalBot', dir='./documents/', welMsg=welcomeMsgArslan, def_k=8)

ttyd_functions.py CHANGED Viewed

@@ -1,17 +1,11 @@
 import datetime
-import openai
 import uuid
-import gradio as gr
 from langchain.embeddings import OpenAIEmbeddings
 from langchain.vectorstores import Chroma
-from langchain.text_splitter import CharacterTextSplitter, RecursiveCharacterTextSplitter
-from langchain.chains import ConversationalRetrievalChain
-from langchain.chains import RetrievalQA
 import os
-from langchain.chat_models import ChatOpenAI
-from langchain import OpenAI
 from langchain.document_loaders import WebBaseLoader, TextLoader, Docx2txtLoader, PyMuPDFLoader
 from whatsapp_chat_custom import WhatsAppChatLoader # use this instead of from langchain.document_loaders import WhatsAppChatLoader
@@ -31,6 +25,15 @@ mimetypes.init()
 media_files = tuple([x for x in mimetypes.types_map if mimetypes.types_map[x].split('/')[0] in ['image', 'video', 'audio']])
 filter_strings = ['/email-protection#']
 def get_hyperlinks(url):
     try:
         reqs = requests.get(url)
@@ -58,7 +61,7 @@ def get_domain_hyperlinks(local_domain, url):
         if re.search(HTTP_URL_PATTERN, link):
             # Parse the URL and check if the domain is the same
             url_obj = urlparse(link)
-            if url_obj.netloc == local_domain:
                 clean_link = link
         # If the link is not a URL, check if it is a relative link

 import datetime
 import uuid
 from langchain.embeddings import OpenAIEmbeddings
 from langchain.vectorstores import Chroma
+from langchain.text_splitter import RecursiveCharacterTextSplitter
 import os
 from langchain.document_loaders import WebBaseLoader, TextLoader, Docx2txtLoader, PyMuPDFLoader
 from whatsapp_chat_custom import WhatsAppChatLoader # use this instead of from langchain.document_loaders import WhatsAppChatLoader
 media_files = tuple([x for x in mimetypes.types_map if mimetypes.types_map[x].split('/')[0] in ['image', 'video', 'audio']])
 filter_strings = ['/email-protection#']
+def transformApi(api_key=''):
+    if api_key==os.getenv("TEMP_PWD"):
+        return os.getenv("OPENAI_API_KEY")
+    elif api_key is None or api_key=='':
+        return 'Null'
+    else:
+        return api_key
 def get_hyperlinks(url):
     try:
         reqs = requests.get(url)
         if re.search(HTTP_URL_PATTERN, link):
             # Parse the URL and check if the domain is the same
             url_obj = urlparse(link)
+            if url_obj.netloc.replace('www.','') == local_domain.replace('www.',''):
                 clean_link = link
         # If the link is not a URL, check if it is a relative link