Spaces:

r3gm
/

ConversaDocs

Sleeping

App Files Files Community

Roger Condori commited on Aug 7, 2023

Commit

f7ceb03

•

1 Parent(s): 0a43cad

add new features app.py

Browse files

Files changed (1) hide show

app.py +73 -31

app.py CHANGED Viewed

@@ -1,3 +1,15 @@
 import gradio as gr
 from langchain.embeddings.openai import OpenAIEmbeddings
 from langchain.text_splitter import CharacterTextSplitter, RecursiveCharacterTextSplitter
@@ -21,30 +33,25 @@ from langchain.document_loaders import (
     PyPDFLoader,
 )
 import param
-import os
-import torch
 from conversadocs.bones import DocChat
 dc = DocChat()
 ##### GRADIO CONFIG ####
-if torch.cuda.is_available():
-    print("CUDA is available on this system.")
-    os.system('CMAKE_ARGS="-DLLAMA_CUBLAS=on" FORCE_CMAKE=1 pip install llama-cpp-python --force-reinstall --upgrade --no-cache-dir --verbose')
-else:
-    print("CUDA is not available on this system.")
-    os.system('pip install llama-cpp-python')
 css="""
 #col-container {max-width: 1500px; margin-left: auto; margin-right: auto;}
 """
 title = """
 <div style="text-align: center;max-width: 1500px;">
-    <h2>Chat with Documents 📚 - Falcon and Llama-2</h2>
-    <p style="text-align: center;">Upload txt, pdf, doc, docx, enex, epub, html, md, odt, ptt and pttx.
-    Wait for the Status to show Loaded documents, start typing your questions. This is a demo of <a href="https://github.com/R3gm/ConversaDocs">ConversaDocs</a>.<br /></p>
 </div>
 """
@@ -55,11 +62,25 @@ description = """
 - Oficial Repository [![a](https://img.shields.io/badge/GitHub-Repository-black?style=flat-square&logo=github)](https://github.com/R3gm/ConversaDocs/)
-- This application works on both CPU and GPU. For fast inference with GGML models, use the GPU.
-- You can clone the 'space' but to make it work, you need to set My_hf_token in secrets with a valid huggingface [token](https://huggingface.co/settings/tokens)
 - For more information about what GGML models are, you can visit this notebook [![Open in Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/R3gm/InsightSolver-Colab/blob/main/LLM_Inference_with_llama_cpp_python__Llama_2_13b_chat.ipynb)
 """
 theme='aliabid94/new-theme'
@@ -73,7 +94,6 @@ def upload_file(files, max_docs):
 def predict(message, chat_history, max_k, check_memory):
         print(message)
-        print(check_memory)
         bot_message = dc.convchain(message, max_k, check_memory)
         print(bot_message)
         return "", dc.get_chats()
@@ -94,9 +114,8 @@ def convert():
 def clear_api_key(api_key):
   return 'api_key...', dc.openai_model(api_key)
 # Max values in generation
-DOC_DB_LIMIT = 10
 MAX_NEW_TOKENS = 2048
 # Limit in HF, no need to set it
@@ -124,20 +143,43 @@ with gr.Blocks(theme=theme, css=css) as demo:
         sou = gr.HTML("")
     clear_button.click(flag,[],[link_output]).then(dc.clr_history,[], [link_output]).then(lambda: None, None, chatbot, queue=False)
-    upload_button.upload(flag,[],[file_output]).then(upload_file, [upload_button, max_docs], file_output).then(dc.clr_history,[], [link_output]).then(lambda: None, None, chatbot, queue=False)
-  with gr.Tab("Change model"):
-    gr.HTML("<h3>Only models from the GGML library are accepted.</h3>")
     repo_ = gr.Textbox(label="Repository" ,value="TheBloke/Llama-2-7B-Chat-GGML")
-    file_ = gr.Textbox(label="File name" ,value="llama-2-7b-chat.ggmlv3.q2_K.bin")
-    max_tokens = gr.inputs.Slider(1, MAX_NEW_TOKENS, default=16, label="Max new tokens; Limited due to excessively long inference times, use Colab or local to avoid these restrictions.", step=1)
     temperature = gr.inputs.Slider(0.1, 1., default=0.2, label="Temperature", step=0.1)
     top_k = gr.inputs.Slider(0.01, 1., default=0.95, label="Top K", step=0.01)
     top_p = gr.inputs.Slider(0, 100, default=50, label="Top P", step=1)
     repeat_penalty = gr.inputs.Slider(0.1, 100., default=1.2, label="Repeat penalty", step=0.1)
-    change_model_button = gr.Button("Load GGML Model")
-    default_model = gr.HTML("<hr>Default Model</h2>")
     falcon_button = gr.Button("Load FALCON 7B-Instruct")
     openai_gpt_model = gr.HTML("<hr>OpenAI Model gpt-3.5-turbo</h2>")
@@ -145,16 +187,16 @@ with gr.Blocks(theme=theme, css=css) as demo:
     openai_button = gr.Button("Load gpt-3.5-turbo")
     line_ = gr.HTML("<hr> </h2>")
-    model_verify = gr.HTML("Loaded model Falcon 7B-instruct")
-  with gr.Tab("About"):
     description_md = gr.Markdown(description)
   msg.submit(predict,[msg, chatbot, max_docs, check_memory],[msg, chatbot]).then(convert,[],[sou])
-  change_model_button.click(dc.change_llm,[repo_, file_, max_tokens, temperature, top_p, top_k, repeat_penalty, max_docs],[model_verify])
-  falcon_button.click(dc.default_falcon_model, [], [model_verify])
   openai_button.click(clear_api_key, [api_key], [api_key, model_verify])
 demo.launch(enable_queue=True)

+import torch
+import os
+try:
+  from llama_cpp import Llama
+except:
+  if torch.cuda.is_available():
+      print("CUDA is available on this system.")
+      os.system('CMAKE_ARGS="-DLLAMA_CUBLAS=on" FORCE_CMAKE=1 pip install llama-cpp-python --force-reinstall --upgrade --no-cache-dir --verbose')
+  else:
+      print("CUDA is not available on this system.")
+      os.system('pip install llama-cpp-python')
 import gradio as gr
 from langchain.embeddings.openai import OpenAIEmbeddings
 from langchain.text_splitter import CharacterTextSplitter, RecursiveCharacterTextSplitter
     PyPDFLoader,
 )
 import param
 from conversadocs.bones import DocChat
+from conversadocs.llm_chess import ChessGame
+My_hf_token = os.getenv("My_hf_token")
 dc = DocChat()
+cg  = ChessGame(dc)
 ##### GRADIO CONFIG ####
 css="""
 #col-container {max-width: 1500px; margin-left: auto; margin-right: auto;}
 """
 title = """
 <div style="text-align: center;max-width: 1500px;">
+    <h2>Chat with Documents 📚 - Falcon, Llama-2 and OpenAI</h2>
+    <p style="text-align: center;">Upload txt, pdf, doc, docx, enex, epub, html, md, odt, ptt and pttx.
+    Wait for the Status to show Loaded documents, start typing your questions. Oficial Repository <a href="https://github.com/R3gm/ConversaDocs">ConversaDocs</a>.<br /></p>
 </div>
 """
 - Oficial Repository [![a](https://img.shields.io/badge/GitHub-Repository-black?style=flat-square&logo=github)](https://github.com/R3gm/ConversaDocs/)
+- You can upload multiple documents at once to a single database.
+- Every time a new database is created, the previous one is deleted.
+- For maximum privacy, you can click "Load LLAMA GGML Model" to use a Llama 2 model. By default, the model llama-2_7B-Chat is loaded.
+- This application works on both CPU and GPU. For fast inference with GGML models, use the GPU.
 - For more information about what GGML models are, you can visit this notebook [![Open in Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/R3gm/InsightSolver-Colab/blob/main/LLM_Inference_with_llama_cpp_python__Llama_2_13b_chat.ipynb)
+## 📖 News
+🔥 2023/07/24: Document summarization was added.
+🔥 2023/07/29: Error with llama 70B was fixed.
+🔥 2023/08/07: ♟️ Chessboard was added for playing with a LLM.
 """
 theme='aliabid94/new-theme'
 def predict(message, chat_history, max_k, check_memory):
         print(message)
         bot_message = dc.convchain(message, max_k, check_memory)
         print(bot_message)
         return "", dc.get_chats()
 def clear_api_key(api_key):
   return 'api_key...', dc.openai_model(api_key)
 # Max values in generation
+DOC_DB_LIMIT = 5
 MAX_NEW_TOKENS = 2048
 # Limit in HF, no need to set it
         sou = gr.HTML("")
     clear_button.click(flag,[],[link_output]).then(dc.clr_history,[], [link_output]).then(lambda: None, None, chatbot, queue=False)
+    upload_button.upload(flag,[],[file_output]).then(upload_file, [upload_button, max_docs], file_output).then(dc.clr_history,[], [link_output])
+  with gr.Tab("Experimental Summarization"):
+    default_model = gr.HTML("<hr>From DB<br>It may take approximately 5 minutes to complete 15 pages in GPU. Please use files with fewer pages if you want to use summarization.<br></h2>")
+    summarize_button = gr.Button("Start summarization")
+    summarize_verify = gr.HTML(" ")
+    summarize_button.click(dc.summarize, [], [summarize_verify])
+  with gr.Tab("♟️ Chess Game with a LLM"):
+    with gr.Column():
+        gr.HTML('<div style="display: flex; justify-content: center; align-items: center; height: 100vh;"><div>♟️ Click to start the Chessboard ♟️</div></div>')
+        start_chess = gr.Button("START GAME")
+        board_chess = gr.HTML()
+        info_chess = gr.HTML()
+        input_chess = gr.Textbox(label="Type a valid move", placeholder="")
+    start_chess.click(cg.start_game,[],[board_chess, info_chess])
+    input_chess.submit(cg.user_move,[input_chess],[board_chess, info_chess, input_chess])
+  with gr.Tab("Config llama-2 model"):
+    gr.HTML("<h3>Only models from the GGML library are accepted. To apply the new configurations, please reload the model.</h3>")
     repo_ = gr.Textbox(label="Repository" ,value="TheBloke/Llama-2-7B-Chat-GGML")
+    file_ = gr.Textbox(label="File name" ,value="llama-2-7b-chat.ggmlv3.q5_1.bin")
+    max_tokens = gr.inputs.Slider(1, 2048, default=256, label="Max new tokens", step=1)
     temperature = gr.inputs.Slider(0.1, 1., default=0.2, label="Temperature", step=0.1)
     top_k = gr.inputs.Slider(0.01, 1., default=0.95, label="Top K", step=0.01)
     top_p = gr.inputs.Slider(0, 100, default=50, label="Top P", step=1)
     repeat_penalty = gr.inputs.Slider(0.1, 100., default=1.2, label="Repeat penalty", step=0.1)
+    change_model_button = gr.Button("Load Llama GGML Model")
+    model_verify_ggml = gr.HTML("Loaded model Llama-2")
+  with gr.Tab("API Models"):
+    default_model = gr.HTML("<hr>Falcon Model</h2>")
+    hf_key = gr.Textbox(label="HF TOKEN", value=My_hf_token,  visible=False)
     falcon_button = gr.Button("Load FALCON 7B-Instruct")
     openai_gpt_model = gr.HTML("<hr>OpenAI Model gpt-3.5-turbo</h2>")
     openai_button = gr.Button("Load gpt-3.5-turbo")
     line_ = gr.HTML("<hr> </h2>")
+    model_verify = gr.HTML(" ")
+  with gr.Tab("Help"):
     description_md = gr.Markdown(description)
   msg.submit(predict,[msg, chatbot, max_docs, check_memory],[msg, chatbot]).then(convert,[],[sou])
+  change_model_button.click(dc.change_llm,[repo_, file_, max_tokens, temperature, top_p, top_k, repeat_penalty, max_docs],[model_verify_ggml])
+  falcon_button.click(dc.default_falcon_model, [hf_key], [model_verify])
   openai_button.click(clear_api_key, [api_key], [api_key, model_verify])
 demo.launch(enable_queue=True)