Spaces:

HH-AI-Org
/

HH-azure-openai-poc

Paused

App Files Files Community

Change Liao commited on Jul 7, 2023

Commit

89df8be

1 Parent(s): 25be1b2

add requirements.txt & Langchain_demo.ipynb

Browse files

Files changed (7) hide show

Langchain_demo.ipynb +0 -0
app.py +181 -41
cache.sqlite3 +0 -0
data/audios/tempfile.mp3 +0 -0
data/ks_source/.gitattributes +0 -1
data/videos/tempfile.mp4 +0 -0
requirements.txt +0 -205

Langchain_demo.ipynb ADDED Viewed

The diff for this file is too large to render. See raw diff

app.py CHANGED Viewed

@@ -15,23 +15,35 @@ from sqlitedict import SqliteDict
 import gradio as gr
 from langchain.llms import AzureOpenAI
 from langchain.chat_models import AzureChatOpenAI
 from langchain.embeddings.openai import OpenAIEmbeddings
-from langchain.chains import ConversationalRetrievalChain
 from langchain.memory import ChatMessageHistory
-from langchain import PromptTemplate
 from langchain.vectorstores import Chroma
 from langchain.text_splitter import CharacterTextSplitter
-from langchain.memory import ConversationBufferMemory
-from langchain.document_loaders import DirectoryLoader
 from langchain.document_loaders import UnstructuredFileLoader
-from langchain.text_splitter import RecursiveCharacterTextSplitter
-from langchain.chains.summarize import load_summarize_chain
 import clickhouse_connect
 from pathlib import Path
@@ -41,12 +53,16 @@ from langchain.document_loaders import YoutubeLoader
 from azure_utils import AzureVoiceData
 from polly_utils import PollyVoiceData, NEURAL_ENGINE
 from contextlib import closing
 #os env
 os.environ["OPENAI_API_TYPE"] = "azure"
 os.environ["OPENAI_API_VERSION"] = "2023-03-15-preview"
 os.environ["OPENAI_API_BASE"] = "https://civet-project-001.openai.azure.com/"
 os.environ["OPENAI_API_KEY"] = "0e3e5b666818488fa1b5cb4e4238ffa7"
 global_deployment_id = "CivetGPT"
 global_model_name = "gpt-35-turbo"
@@ -56,6 +72,7 @@ chroma_api_impl = "HH_Azure_Openai"
 root_file_path = "./data/" #其實是data 存放的位置
 hr_source_path = "hr_source"
 ks_source_path = "ks_source"
 sqlite_name = "cache.sqlite3"
 sqlite_key="stored_files"
@@ -63,6 +80,7 @@ persist_db = "persist_db"
 hr_collection_name = "hr_db"
 chroma_db_impl="localdb+langchain"
 tmp_collection="tmp_collection"
 #global text setting
 inputText = "問題(按q 或Ctrl + c跳出): "
@@ -183,13 +201,13 @@ def get_prompt_summary_string():
 def get_prompt_template_string():
     today = datetime.date.today().strftime("%Y年%m月%d日")
-    template_string = f"我是鴻海的員工, 你是一個超級助理. 今天是{today}".format(today=today)+"""
-請根據歷史對話,針對這次的問題, 形成獨立問題並以中文作回答. 請優先從提供的文件中尋找答案, 你被允許回答不知道, 但回答不知道時需要給中央人資的客服聯絡窗口資訊.
 不論什麼問題, 都以中文回答
 歷史對話: {chat_history}
 這次的問題: {question}
-超級助理:
     """
     return template_string
@@ -238,23 +256,58 @@ def local_vector_search(question_str,chat_history, collection_name = hr_collecti
     memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True, ai_prefix = "AI超級助理")
-    llm = AzureChatOpenAI(
-            deployment_name = global_deployment_id,
-            model_name= global_model_name,
-            temperature = 0.2)
     prompt = PromptTemplate(
         template=get_prompt_template_string(),
         input_variables=["question","chat_history"]
     )
     prompt.format(question=question_str,chat_history=chat_history)
-    chain = ConversationalRetrievalChain.from_llm(
-        llm=llm,
         retriever=vectorstore.as_retriever(),
         memory=memory,
         condense_question_prompt=prompt,
     )
-    result = chain({"question": question_str, "chat_history":chat_history})
     return result["answer"]
 def make_markdown_table(array):
@@ -636,18 +689,8 @@ def gradio_run():
                             tmp_aud_file_url = "/file=" + tmp_aud_file.value['name']
                             htm_audio = f'<audio><source src={tmp_aud_file_url} type="audio/mp3"></audio>'
                             audio_html = gr.HTML(htm_audio, visible=False)
-                            def respond(message, chat_history):
-                                vector_search_message = local_vector_search(message, chat_history)
-                                chat_history.append((message, vector_search_message))
-                                html_audio, audio_file_path = do_html_audio_speak(vector_search_message)
-                                res, new_html_video, video_file_path = do_html_video_speak()
-                                if res.status_code == 200:
-                                    return '', chat_history, new_html_video, ''
-                                else:
-                                    return '', chat_history, htm_video, html_audio
                         with gr.Column():
                             gr.Markdown("""
                             ### AI 虛擬客服:
                             * 這是一個實驗性質的AI 客服
@@ -655,6 +698,7 @@ def gradio_run():
                             * 想要放誰的頭像都可以, 要放董事長也可以.
                             * 訂閱制(有效時間 6/13~7/13)
                             """)
                     with gr.Row():
                         chatbot = gr.Chatbot(value=[], elem_id="chatbot").style(height=400)
                     with gr.Row():
@@ -665,8 +709,26 @@ def gradio_run():
                             )
                         with gr.Column(scale=1):
                             clear = gr.Button("清除")
-                        msg.submit(respond, [msg, chatbot], [msg, chatbot, video_html, audio_html], queue=True)
-                        clear.click(lambda: None, None, chatbot, queue=False)
         #3rd youtube
         with gr.Tab("Youtube 影片摘要"):
             with gr.Row():
@@ -678,26 +740,25 @@ def gradio_run():
                 with gr.Column(scale=1):
                     youtube_summary_textbox=gr.Textbox(interactive=False, label="AI 解析", lines=20)
             youtube_analysis_btn.click(youtube_summary,youtube_link,youtube_summary_textbox)
         with gr.Tab("高雄市政府票証"):
-            from langchain.agents import create_pandas_dataframe_agent
-            import pandas as pd
             mypath = root_file_path+ks_source_path
             onlyfiles = os.listdir(mypath)
             df = pd.concat((pd.read_csv(os.path.join(mypath, filename)) for filename in onlyfiles))
             with gr.Row():
-                gr.Markdown("""
                 ### 使用方式
                 這是一個使用高雄公車票證資料, 運用AI協���決策的工具.
                 如果有出現error, 請重新刷新頁面. 有error 就代表運算到最後token 數量超出azure openai 上限了, 這部份還在想辦法調整中.
-                """)
                 invField = gr.Textbox(visible=False)
                 gr.Examples(onlyfiles, label="資料庫檔案", inputs=invField, examples_per_page=4)
             with gr.Row():
                 with gr.Column():
-                    davinci="text-davinci-003"
-                    llm = AzureOpenAI(
-                        deployment_name=davinci,
-                        model_name=davinci,
                         max_tokens=2000,
                         temperature=0,
                     )
@@ -709,7 +770,6 @@ def gradio_run():
                         return_intermediate_steps=False,
                         verbose=True
                     )
                     def tmp_respond(prompt_str,message, chat_history):
                         try:
                             new_str=prompt_str.format(message=message, chat_history=chat_history)
@@ -734,14 +794,14 @@ def gradio_run():
                         with gr.Column(scale=1):
                                 tmp_clear = gr.Button("清除對話")
                 with gr.Column():
-                    prompt_textbox=gr.Textbox("""
 你是一位專業的資料科學家,有下列定義:
 1.每個票卡序號代表一名乘客
 2.原始票價視為花費或是消費
 3.轉乘次數: 一名乘客在同一天有任意兩筆紀錄,其中一筆出下車站的資料等於另一筆進上車站的資料,其出下車站代表的車站的轉乘次數就要增加1.
 歷史訊息是 {chat_history}
 請以中文回答我下面的問題:{message}
-                    """, lines=10, label="Prompt:有{chat_history}及{message}, 請至少保留{message}變數", interactive=True, max_lines=10)
                     console=gr.Textbox(lines=11, label="Console",max_lines=11)
                 tmp_msg.submit(tmp_respond, [prompt_textbox, tmp_msg, tmp_chatbot], [tmp_msg, tmp_chatbot], queue=True)
                 tmp_clear.click(lambda: None, None, tmp_chatbot, queue=False)
@@ -751,8 +811,88 @@ def gradio_run():
                     '0001站轉乘旅客所佔比例',
                     '高雄捷運的2022年7月份運輸量與2022年6月份相比, 增減如何?',
                     '請給我2022年6月至2022年7月之間, 轉乘數量最高排名前五名的車站?',
-                    '0001站 在2022年9月份轉乘數量是未知. 請依2022年7月份到2022年8月份的趨勢, 請以月份做為時間單位, 做出一個數學模型. 用此數學模型來預測 0001站 在2022年9月份的轉乘數量會多少, 增減如何?'
                 ], label="訊息範例",inputs=tmp_msg)
         demo.queue(concurrency_count=10)
         lunch_style(demo,console)

 import gradio as gr
+from langchain import PromptTemplate
+from langchain.agents import Tool
+from langchain.agents import load_tools
+from langchain.agents import initialize_agent
+from langchain.agents import AgentType
+from langchain.chains import LLMMathChain
+from langchain import SerpAPIWrapper
+from langchain.chains import ConversationalRetrievalChain
+from langchain.chains.summarize import load_summarize_chain
 from langchain.llms import AzureOpenAI
 from langchain.chat_models import AzureChatOpenAI
 from langchain.embeddings.openai import OpenAIEmbeddings
 from langchain.memory import ChatMessageHistory
+from langchain.memory import ConversationBufferMemory
 from langchain.vectorstores import Chroma
 from langchain.text_splitter import CharacterTextSplitter
+from langchain.text_splitter import RecursiveCharacterTextSplitter
+from langchain.document_loaders import DirectoryLoader
 from langchain.document_loaders import UnstructuredFileLoader
 import clickhouse_connect
 from pathlib import Path
 from azure_utils import AzureVoiceData
 from polly_utils import PollyVoiceData, NEURAL_ENGINE
 from contextlib import closing
+from langchain.agents import create_pandas_dataframe_agent
+import pandas as pd
 #os env
 os.environ["OPENAI_API_TYPE"] = "azure"
 os.environ["OPENAI_API_VERSION"] = "2023-03-15-preview"
 os.environ["OPENAI_API_BASE"] = "https://civet-project-001.openai.azure.com/"
 os.environ["OPENAI_API_KEY"] = "0e3e5b666818488fa1b5cb4e4238ffa7"
+os.environ["SERPAPI_API_KEY"] = "a5b67b8805b4e12b0ae147c9c6b2a7dbf3ab84fca5f24e531b6963b1f7fc1ff7"
 global_deployment_id = "CivetGPT"
 global_model_name = "gpt-35-turbo"
 root_file_path = "./data/" #其實是data 存放的位置
 hr_source_path = "hr_source"
 ks_source_path = "ks_source"
+believe_source_path = 'be_source'
 sqlite_name = "cache.sqlite3"
 sqlite_key="stored_files"
 hr_collection_name = "hr_db"
 chroma_db_impl="localdb+langchain"
 tmp_collection="tmp_collection"
+davinci = "text-davinci-003"
 #global text setting
 inputText = "問題(按q 或Ctrl + c跳出): "
 def get_prompt_template_string():
     today = datetime.date.today().strftime("%Y年%m月%d日")
+    template_string = f"我是鴻海(等同Foxconn)的員工, 你是一個鴻海的人資專家. 今天是{today}".format(today=today)+"""
+請根據歷史對話,針對這次的問題, 形成獨立問題. 請優先從提供的文件中尋找答案, 你被允許回答不知道, 但回答不知道時需要給中央人資的客服聯絡窗口資訊.
 不論什麼問題, 都以中文回答
 歷史對話: {chat_history}
 這次的問題: {question}
+人資專家:
     """
     return template_string
     memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True, ai_prefix = "AI超級助理")
+    llm = AzureOpenAI(
+        deployment_name = global_deployment_id,
+        model_name= global_model_name,
+        temperature = 0.0)
+    chat_llm = AzureChatOpenAI(
+        deployment_name = global_deployment_id,
+        model_name= global_model_name,
+        temperature = 0.2)
     prompt = PromptTemplate(
         template=get_prompt_template_string(),
         input_variables=["question","chat_history"]
     )
     prompt.format(question=question_str,chat_history=chat_history)
+    km_chain = ConversationalRetrievalChain.from_llm(
+        llm=chat_llm,
         retriever=vectorstore.as_retriever(),
         memory=memory,
         condense_question_prompt=prompt,
     )
+    km_tool = Tool(
+        name='Knowledge Base',
+        func=km_chain.run,
+        description='一個非常有用的工具, 當要查詢任何公司政策以及鴻海相關資料都使用這個工具'
+    )
+    math_math = LLMMathChain(llm=llm,verbose=True)
+    math_tool = Tool(
+        name='Calculator',
+        func=math_math.run,
+        description='Useful for when you need to answer questions about math.'
+    )
+    search = SerpAPIWrapper()
+    search_tool = Tool(
+        name="Search",
+        func=search.run,
+        description="當你需要回答一般問題時,非常有用; 不可以用來回答任何跟鴻海有關的問題.",
+    )
+    tools=[math_tool,km_tool, search_tool]
+    agent=initialize_agent(
+        agent=AgentType.CHAT_ZERO_SHOT_REACT_DESCRIPTION,
+        tools=tools,
+        llm=chat_llm,
+        verbose=True,
+        memory=memory,
+        max_iterations=30,
+    )
+    result=km_chain(question_str)
+    #result=agent.run(question_str)
     return result["answer"]
 def make_markdown_table(array):
                             tmp_aud_file_url = "/file=" + tmp_aud_file.value['name']
                             htm_audio = f'<audio><source src={tmp_aud_file_url} type="audio/mp3"></audio>'
                             audio_html = gr.HTML(htm_audio, visible=False)
                         with gr.Column():
+                            isAudio = gr.Checkbox(label="是否要有語音", info="要開啟語音嗎?查詢時間會增長")
                             gr.Markdown("""
                             ### AI 虛擬客服:
                             * 這是一個實驗性質的AI 客服
                             * 想要放誰的頭像都可以, 要放董事長也可以.
                             * 訂閱制(有效時間 6/13~7/13)
                             """)
                     with gr.Row():
                         chatbot = gr.Chatbot(value=[], elem_id="chatbot").style(height=400)
                     with gr.Row():
                             )
                         with gr.Column(scale=1):
                             clear = gr.Button("清除")
+                    def respond(message, chat_history):
+                        vector_search_message = local_vector_search(message, chat_history)
+                        chat_history.append((message, vector_search_message))
+                        print("vector_search:"+vector_search_message)
+                        if isAudio.value is False:
+                            print("isAudio is False")
+                            return '', chat_history, htm_video, ''
+                        else:
+                            print("isAudio is True")
+                            html_audio, audio_file_path = do_html_audio_speak(vector_search_message)
+                            res, new_html_video, video_file_path = do_html_video_speak()
+                            if res.status_code == 200:
+                                return '', chat_history, new_html_video, ''
+                            else:
+                                return '', chat_history, htm_video, html_audio
+                    msg.submit(respond, [msg, chatbot], [msg, chatbot, video_html, audio_html], queue=True)
+                    clear.click(lambda: None, None, chatbot, queue=False)
         #3rd youtube
         with gr.Tab("Youtube 影片摘要"):
             with gr.Row():
                 with gr.Column(scale=1):
                     youtube_summary_textbox=gr.Textbox(interactive=False, label="AI 解析", lines=20)
             youtube_analysis_btn.click(youtube_summary,youtube_link,youtube_summary_textbox)
+        """
         with gr.Tab("高雄市政府票証"):
             mypath = root_file_path+ks_source_path
             onlyfiles = os.listdir(mypath)
             df = pd.concat((pd.read_csv(os.path.join(mypath, filename)) for filename in onlyfiles))
             with gr.Row():
+                gr.Markdown('
                 ### 使用方式
                 這是一個使用高雄公車票證資料, 運用AI協���決策的工具.
                 如果有出現error, 請重新刷新頁面. 有error 就代表運算到最後token 數量超出azure openai 上限了, 這部份還在想辦法調整中.
+                ')
                 invField = gr.Textbox(visible=False)
                 gr.Examples(onlyfiles, label="資料庫檔案", inputs=invField, examples_per_page=4)
             with gr.Row():
                 with gr.Column():
+                    llm = AzureChatOpenAI(
+                        deployment_name=global_deployment_id,
+                        model_name=global_model_name,
                         max_tokens=2000,
                         temperature=0,
                     )
                         return_intermediate_steps=False,
                         verbose=True
                     )
                     def tmp_respond(prompt_str,message, chat_history):
                         try:
                             new_str=prompt_str.format(message=message, chat_history=chat_history)
                         with gr.Column(scale=1):
                                 tmp_clear = gr.Button("清除對話")
                 with gr.Column():
+                    prompt_textbox=gr.Textbox('
 你是一位專業的資料科學家,有下列定義:
 1.每個票卡序號代表一名乘客
 2.原始票價視為花費或是消費
 3.轉乘次數: 一名乘客在同一天有任意兩筆紀錄,其中一筆出下車站的資料等於另一筆進上車站的資料,其出下車站代表的車站的轉乘次數就要增加1.
 歷史訊息是 {chat_history}
 請以中文回答我下面的問題:{message}
+                    ', lines=10, label="Prompt:有{chat_history}及{message}, 請至少保留{message}變數", interactive=True, max_lines=10)
                     console=gr.Textbox(lines=11, label="Console",max_lines=11)
                 tmp_msg.submit(tmp_respond, [prompt_textbox, tmp_msg, tmp_chatbot], [tmp_msg, tmp_chatbot], queue=True)
                 tmp_clear.click(lambda: None, None, tmp_chatbot, queue=False)
                     '0001站轉乘旅客所佔比例',
                     '高雄捷運的2022年7月份運輸量與2022年6月份相比, 增減如何?',
                     '請給我2022年6月至2022年7月之間, 轉乘數量最高排名前五名的車站?',
+                    '0001站 在2022年9月份轉乘數量是未知. 請依2022年7月份到2022年8月份的趨勢, 請以月份做為時間單位, 做出一個數學模型. 用此數學模型來預測 0001站 在2022年9月份的轉乘數量會多少, 增減如何?',
+                    '請以2022年6月~2022年8月0001車站轉乘資料為主,以月份以及轉乘量為變數,做一個迴歸模型,預測2022年9月份的轉乘量增加多少?'
                 ], label="訊息範例",inputs=tmp_msg)
+        """
+        with gr.Tab("相信人員統計助手"):
+            #model_symbol="text-davinci-003"
+            #model_symbol="Text-Curie"
+            mypath = root_file_path + believe_source_path
+            onlyfiles = os.listdir(mypath)
+            df = pd.concat((pd.read_csv(os.path.join(mypath, filename)) for filename in onlyfiles))
+            with gr.Row():
+                gr.Markdown("""
+### 使用方式
+資料裡有 `相信` 的active user 資料,
+右方己經有先算出平均每個問題花費多少, 隨意詢問算法AI 即可算出多少費用.
+若要改費用, 請在右方prompt 更改數字
+                        """)
+                invField = gr.Textbox(visible=False)
+                gr.Examples(onlyfiles, label="資料庫檔案", inputs=invField, examples_per_page=4)
+            with gr.Row():
+                with gr.Column():
+                    llm = AzureOpenAI(
+                        deployment_name=global_deployment_id,
+                        model_name=global_model_name,
+                        max_tokens=2000,
+                        temperature=0,
+                    )
+                    be_agent = create_pandas_dataframe_agent(
+                        llm,
+                        df,
+                        max_iterations=30,
+                        return_intermediate_steps=False,
+                        max_execution_time=60,
+                        handle_parsing_errors="Check your output and make sure it conforms!",
+                        verbose=True)
+                    def tmp_respond(prompt_str, message, chat_history):
+                        new_str = prompt_str.format(message=message, chat_history=chat_history)
+                        answer = be_agent.run(new_str)
+                        chat_history.append((message, answer))
+                        """
+                        try:
+                            new_str = prompt_str.format(message=message, chat_history=chat_history)
+                            answer = be_agent.run(new_str)
+                            chat_history.append((message, answer))
+                        except Exception as e:
+                            response = str(e)
+                            print(f"Got error!{response}")
+                            if not response.startswith("Could not parse LLM output: `"):
+                                raise e
+                            answer = response.removeprefix("Could not parse LLM output: `").removesuffix("`")
+                            print("answer:"+answer)
+                            chat_history.append((message, answer))
+                        """
+                        return '', chat_history
+                    tmp_chatbot = gr.Chatbot(value=[], elem_id="tmp_chatbot").style(height=500)
+                    with gr.Row():
+                        with gr.Column(scale=5):
+                            tmp_msg = gr.Textbox(show_label=False,placeholder="輸入你的問題",)
+                        with gr.Column(scale=1):
+                            tmp_clear = gr.Button("清除對話")
+                with gr.Column():
+                    prompt_textbox = gr.Textbox("""
+你是一位專業資料科學家,提供給你的是研究列表.
+有下列定義:
+1.Title是研究報告的標題
+請以中文回答我下面的問題:{message}
+                            """, lines=10, label="Prompt:有{chat_history}及{message}, 請至少保留{message}變數",interactive=True, max_lines=10)
+                    console = gr.Textbox(lines=11, label="Console", max_lines=11)
+                tmp_msg.submit(tmp_respond, [prompt_textbox, tmp_msg, tmp_chatbot], [tmp_msg, tmp_chatbot],queue=True)
+                tmp_clear.click(lambda: None, None, tmp_chatbot, queue=False)
+            with gr.Row():
+                gr.Examples([
+                    '你有什麼欄位?',
+                    '資料裡有屬於台灣(TW)的員工有多少位?',
+                    '全台灣的員工, 每人每天問五個問題, 1個月花費多少錢?',
+                    '如果龍華廠區的員工每人每天問3個問題,台灣員工每人每天問7個問題, 請問這樣一個月多少錢?'
+                    ], label="訊息範例", inputs=tmp_msg)
         demo.queue(concurrency_count=10)
         lunch_style(demo,console)

cache.sqlite3 CHANGED Viewed

Binary files a/cache.sqlite3 and b/cache.sqlite3 differ

data/audios/tempfile.mp3 CHANGED Viewed

Binary files a/data/audios/tempfile.mp3 and b/data/audios/tempfile.mp3 differ

data/ks_source/.gitattributes DELETED Viewed

	@@ -1 +0,0 @@
1	- *.csv filter=lfs diff=lfs merge=lfs -text

data/videos/tempfile.mp4 CHANGED Viewed

Binary files a/data/videos/tempfile.mp4 and b/data/videos/tempfile.mp4 differ

requirements.txt DELETED Viewed

@@ -1,205 +0,0 @@
-aiofiles==23.1.0
-aiohttp==3.8.4
-aiosignal==1.3.1
-altair==5.0.1
-anyio==3.7.0
-argilla==1.9.0
-argon2-cffi==21.3.0
-argon2-cffi-bindings==21.2.0
-arrow==1.2.3
-asttokens==2.2.1
-async-timeout==4.0.2
-attrs==23.1.0
-backcall==0.2.0
-backoff==2.2.1
-beautifulsoup4==4.12.2
-bleach==6.0.0
-boto3==1.26.152
-botocore==1.29.152
-bs4==0.0.1
-certifi==2023.5.7
-cffi==1.15.1
-chardet==5.1.0
-charset-normalizer==3.1.0
-chromadb==0.3.26
-click==8.1.3
-clickhouse-connect==0.6.2
-colorama==0.4.6
-coloredlogs==15.0.1
-comm==0.1.3
-commonmark==0.9.1
-contourpy==1.0.7
-cryptography==41.0.1
-cycler==0.11.0
-dataclasses-json==0.5.7
-debugpy==1.6.7
-decorator==5.1.1
-defusedxml==0.7.1
-Deprecated==1.2.14
-distlib==0.3.6
-duckdb==0.8.0
-et-xmlfile==1.1.0
-exceptiongroup==1.1.1
-executing==1.2.0
-fastapi==0.96.1
-fastjsonschema==2.17.1
-ffmpy==0.3.0
-filelock==3.12.0
-flatbuffers==23.5.26
-fonttools==4.39.4
-fqdn==1.5.1
-frozenlist==1.3.3
-fsspec==2023.6.0
-gradio==3.34.0
-gradio_client==0.2.6
-greenlet==2.0.2
-h11==0.14.0
-hnswlib==0.7.0
-httpcore==0.16.3
-httptools==0.5.0
-httpx==0.23.3
-huggingface-hub==0.15.1
-humanfriendly==10.0
-idna==3.4
-ipykernel==6.23.2
-ipython==8.14.0
-ipython-genutils==0.2.0
-isoduration==20.11.0
-jedi==0.18.2
-Jinja2==3.1.2
-jmespath==1.0.1
-joblib==1.2.0
-jsonpointer==2.3
-jsonschema==4.17.3
-jupyter-events==0.6.3
-jupyter_client==8.2.0
-jupyter_core==5.3.1
-jupyter_server==2.6.0
-jupyter_server_terminals==0.4.4
-jupyterlab-pygments==0.2.2
-kiwisolver==1.4.4
-langchain==0.0.200
-langchainplus-sdk==0.0.10
-linkify-it-py==2.0.2
-lxml==4.9.2
-lz4==4.3.2
-Markdown==3.4.3
-markdown-it-py==2.2.0
-MarkupSafe==2.1.3
-marshmallow==3.19.0
-marshmallow-enum==1.5.1
-matplotlib==3.7.1
-matplotlib-inline==0.1.6
-mdit-py-plugins==0.3.3
-mdurl==0.1.2
-mistune==2.0.5
-monotonic==1.6
-mpmath==1.3.0
-msg-parser==1.2.0
-multidict==6.0.4
-mypy-extensions==1.0.0
-nbclassic==1.0.0
-nbclient==0.8.0
-nbconvert==7.5.0
-nbformat==5.9.0
-nest-asyncio==1.5.6
-nltk==3.8.1
-notebook_shim==0.2.3
-numexpr==2.8.4
-numpy==1.23.5
-olefile==0.46
-onnxruntime==1.15.0
-openai==0.27.8
-openapi-schema-pydantic==1.2.4
-openpyxl==3.1.2
-orjson==3.9.1
-overrides==7.3.1
-packaging==23.1
-pandas==1.5.3
-pandocfilters==1.5.0
-parso==0.8.3
-pdf2image==1.16.3
-pdfminer.six==20221105
-pickleshare==0.7.5
-Pillow==9.5.0
-pip-search==0.0.12
-platformdirs==3.5.1
-posthog==3.0.1
-prometheus-client==0.17.0
-prompt-toolkit==3.0.38
-protobuf==4.23.2
-psutil==5.9.5
-pulsar-client==3.2.0
-pure-eval==0.2.2
-pycparser==2.21
-pydantic==1.10.9
-pydub==0.25.1
-Pygments==2.15.1
-pypandoc==1.11
-pyparsing==3.0.9
-pyreadline3==3.4.1
-pyrsistent==0.19.3
-python-dateutil==2.8.2
-python-docx==0.8.11
-python-dotenv==1.0.0
-python-json-logger==2.0.7
-python-magic==0.4.27
-python-multipart==0.0.6
-python-pptx==0.6.21
-pytube==15.0.0
-pytz==2023.3
-PyYAML==6.0
-pyzmq==25.1.0
-regex==2023.6.3
-requests==2.31.0
-rfc3339-validator==0.1.4
-rfc3986==1.5.0
-rfc3986-validator==0.1.1
-rich==13.0.1
-s3transfer==0.6.1
-scikit-learn==1.2.2
-scipy==1.10.1
-semantic-version==2.10.0
-Send2Trash==1.8.2
-six==1.16.0
-sklearn==0.0.post5
-sniffio==1.3.0
-soupsieve==2.4.1
-SQLAlchemy==2.0.16
-sqlitedict==2.1.0
-stack-data==0.6.2
-starlette==0.27.0
-sympy==1.12
-tabulate==0.9.0
-tenacity==8.2.2
-terminado==0.17.1
-threadpoolctl==3.1.0
-tiktoken==0.4.0
-tinycss2==1.2.1
-tokenizers==0.13.3
-toolz==0.12.0
-tornado==6.3.2
-tqdm==4.65.0
-traitlets==5.9.0
-typer==0.9.0
-typing-inspect==0.9.0
-typing_extensions==4.6.3
-tzdata==2023.3
-uc-micro-py==1.0.2
-unstructured==0.7.3
-uri-template==1.2.0
-urllib3
-uvicorn==0.22.0
-virtualenv==20.23.0
-watchfiles==0.19.0
-wcwidth==0.2.6
-webcolors==1.13
-webencodings==0.5.1
-websocket-client==1.5.3
-websockets==11.0.3
-wrapt==1.14.1
-xlrd==2.0.1
-XlsxWriter==3.1.2
-yarl==1.9.2
-youtube-transcript-api==0.6.0
-zstandard==0.21.0