Roger Condori commited on
Commit
f7ceb03
β€’
1 Parent(s): 0a43cad

add new features app.py

Browse files
Files changed (1) hide show
  1. app.py +73 -31
app.py CHANGED
@@ -1,3 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
  import gradio as gr
2
  from langchain.embeddings.openai import OpenAIEmbeddings
3
  from langchain.text_splitter import CharacterTextSplitter, RecursiveCharacterTextSplitter
@@ -21,30 +33,25 @@ from langchain.document_loaders import (
21
  PyPDFLoader,
22
  )
23
  import param
24
- import os
25
- import torch
26
  from conversadocs.bones import DocChat
 
 
 
27
 
28
  dc = DocChat()
 
29
 
30
  ##### GRADIO CONFIG ####
31
 
32
- if torch.cuda.is_available():
33
- print("CUDA is available on this system.")
34
- os.system('CMAKE_ARGS="-DLLAMA_CUBLAS=on" FORCE_CMAKE=1 pip install llama-cpp-python --force-reinstall --upgrade --no-cache-dir --verbose')
35
- else:
36
- print("CUDA is not available on this system.")
37
- os.system('pip install llama-cpp-python')
38
-
39
  css="""
40
  #col-container {max-width: 1500px; margin-left: auto; margin-right: auto;}
41
  """
42
 
43
  title = """
44
  <div style="text-align: center;max-width: 1500px;">
45
- <h2>Chat with Documents πŸ“š - Falcon and Llama-2</h2>
46
- <p style="text-align: center;">Upload txt, pdf, doc, docx, enex, epub, html, md, odt, ptt and pttx.
47
- Wait for the Status to show Loaded documents, start typing your questions. This is a demo of <a href="https://github.com/R3gm/ConversaDocs">ConversaDocs</a>.<br /></p>
48
  </div>
49
  """
50
 
@@ -55,11 +62,25 @@ description = """
55
 
56
  - Oficial Repository [![a](https://img.shields.io/badge/GitHub-Repository-black?style=flat-square&logo=github)](https://github.com/R3gm/ConversaDocs/)
57
 
58
- - This application works on both CPU and GPU. For fast inference with GGML models, use the GPU.
 
 
59
 
60
- - You can clone the 'space' but to make it work, you need to set My_hf_token in secrets with a valid huggingface [token](https://huggingface.co/settings/tokens)
 
 
61
 
62
  - For more information about what GGML models are, you can visit this notebook [![Open in Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/R3gm/InsightSolver-Colab/blob/main/LLM_Inference_with_llama_cpp_python__Llama_2_13b_chat.ipynb)
 
 
 
 
 
 
 
 
 
 
63
  """
64
 
65
  theme='aliabid94/new-theme'
@@ -73,7 +94,6 @@ def upload_file(files, max_docs):
73
 
74
  def predict(message, chat_history, max_k, check_memory):
75
  print(message)
76
- print(check_memory)
77
  bot_message = dc.convchain(message, max_k, check_memory)
78
  print(bot_message)
79
  return "", dc.get_chats()
@@ -94,9 +114,8 @@ def convert():
94
  def clear_api_key(api_key):
95
  return 'api_key...', dc.openai_model(api_key)
96
 
97
-
98
  # Max values in generation
99
- DOC_DB_LIMIT = 10
100
  MAX_NEW_TOKENS = 2048
101
 
102
  # Limit in HF, no need to set it
@@ -124,20 +143,43 @@ with gr.Blocks(theme=theme, css=css) as demo:
124
  sou = gr.HTML("")
125
 
126
  clear_button.click(flag,[],[link_output]).then(dc.clr_history,[], [link_output]).then(lambda: None, None, chatbot, queue=False)
127
- upload_button.upload(flag,[],[file_output]).then(upload_file, [upload_button, max_docs], file_output).then(dc.clr_history,[], [link_output]).then(lambda: None, None, chatbot, queue=False)
128
-
129
- with gr.Tab("Change model"):
130
- gr.HTML("<h3>Only models from the GGML library are accepted.</h3>")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
131
  repo_ = gr.Textbox(label="Repository" ,value="TheBloke/Llama-2-7B-Chat-GGML")
132
- file_ = gr.Textbox(label="File name" ,value="llama-2-7b-chat.ggmlv3.q2_K.bin")
133
- max_tokens = gr.inputs.Slider(1, MAX_NEW_TOKENS, default=16, label="Max new tokens; Limited due to excessively long inference times, use Colab or local to avoid these restrictions.", step=1)
134
  temperature = gr.inputs.Slider(0.1, 1., default=0.2, label="Temperature", step=0.1)
135
  top_k = gr.inputs.Slider(0.01, 1., default=0.95, label="Top K", step=0.01)
136
  top_p = gr.inputs.Slider(0, 100, default=50, label="Top P", step=1)
137
  repeat_penalty = gr.inputs.Slider(0.1, 100., default=1.2, label="Repeat penalty", step=0.1)
138
- change_model_button = gr.Button("Load GGML Model")
139
-
140
- default_model = gr.HTML("<hr>Default Model</h2>")
 
 
 
 
 
141
  falcon_button = gr.Button("Load FALCON 7B-Instruct")
142
 
143
  openai_gpt_model = gr.HTML("<hr>OpenAI Model gpt-3.5-turbo</h2>")
@@ -145,16 +187,16 @@ with gr.Blocks(theme=theme, css=css) as demo:
145
  openai_button = gr.Button("Load gpt-3.5-turbo")
146
 
147
  line_ = gr.HTML("<hr> </h2>")
148
- model_verify = gr.HTML("Loaded model Falcon 7B-instruct")
149
 
150
- with gr.Tab("About"):
151
  description_md = gr.Markdown(description)
152
 
153
  msg.submit(predict,[msg, chatbot, max_docs, check_memory],[msg, chatbot]).then(convert,[],[sou])
154
 
155
- change_model_button.click(dc.change_llm,[repo_, file_, max_tokens, temperature, top_p, top_k, repeat_penalty, max_docs],[model_verify])
156
 
157
- falcon_button.click(dc.default_falcon_model, [], [model_verify])
158
  openai_button.click(clear_api_key, [api_key], [api_key, model_verify])
159
-
160
  demo.launch(enable_queue=True)
 
1
+ import torch
2
+ import os
3
+ try:
4
+ from llama_cpp import Llama
5
+ except:
6
+ if torch.cuda.is_available():
7
+ print("CUDA is available on this system.")
8
+ os.system('CMAKE_ARGS="-DLLAMA_CUBLAS=on" FORCE_CMAKE=1 pip install llama-cpp-python --force-reinstall --upgrade --no-cache-dir --verbose')
9
+ else:
10
+ print("CUDA is not available on this system.")
11
+ os.system('pip install llama-cpp-python')
12
+
13
  import gradio as gr
14
  from langchain.embeddings.openai import OpenAIEmbeddings
15
  from langchain.text_splitter import CharacterTextSplitter, RecursiveCharacterTextSplitter
 
33
  PyPDFLoader,
34
  )
35
  import param
 
 
36
  from conversadocs.bones import DocChat
37
+ from conversadocs.llm_chess import ChessGame
38
+
39
+ My_hf_token = os.getenv("My_hf_token")
40
 
41
  dc = DocChat()
42
+ cg = ChessGame(dc)
43
 
44
  ##### GRADIO CONFIG ####
45
 
 
 
 
 
 
 
 
46
  css="""
47
  #col-container {max-width: 1500px; margin-left: auto; margin-right: auto;}
48
  """
49
 
50
  title = """
51
  <div style="text-align: center;max-width: 1500px;">
52
+ <h2>Chat with Documents πŸ“š - Falcon, Llama-2 and OpenAI</h2>
53
+ <p style="text-align: center;">Upload txt, pdf, doc, docx, enex, epub, html, md, odt, ptt and pttx.
54
+ Wait for the Status to show Loaded documents, start typing your questions. Oficial Repository <a href="https://github.com/R3gm/ConversaDocs">ConversaDocs</a>.<br /></p>
55
  </div>
56
  """
57
 
 
62
 
63
  - Oficial Repository [![a](https://img.shields.io/badge/GitHub-Repository-black?style=flat-square&logo=github)](https://github.com/R3gm/ConversaDocs/)
64
 
65
+ - You can upload multiple documents at once to a single database.
66
+
67
+ - Every time a new database is created, the previous one is deleted.
68
 
69
+ - For maximum privacy, you can click "Load LLAMA GGML Model" to use a Llama 2 model. By default, the model llama-2_7B-Chat is loaded.
70
+
71
+ - This application works on both CPU and GPU. For fast inference with GGML models, use the GPU.
72
 
73
  - For more information about what GGML models are, you can visit this notebook [![Open in Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/R3gm/InsightSolver-Colab/blob/main/LLM_Inference_with_llama_cpp_python__Llama_2_13b_chat.ipynb)
74
+
75
+ ## πŸ“– News
76
+
77
+ πŸ”₯ 2023/07/24: Document summarization was added.
78
+
79
+ πŸ”₯ 2023/07/29: Error with llama 70B was fixed.
80
+
81
+ πŸ”₯ 2023/08/07: β™ŸοΈ Chessboard was added for playing with a LLM.
82
+
83
+
84
  """
85
 
86
  theme='aliabid94/new-theme'
 
94
 
95
  def predict(message, chat_history, max_k, check_memory):
96
  print(message)
 
97
  bot_message = dc.convchain(message, max_k, check_memory)
98
  print(bot_message)
99
  return "", dc.get_chats()
 
114
  def clear_api_key(api_key):
115
  return 'api_key...', dc.openai_model(api_key)
116
 
 
117
  # Max values in generation
118
+ DOC_DB_LIMIT = 5
119
  MAX_NEW_TOKENS = 2048
120
 
121
  # Limit in HF, no need to set it
 
143
  sou = gr.HTML("")
144
 
145
  clear_button.click(flag,[],[link_output]).then(dc.clr_history,[], [link_output]).then(lambda: None, None, chatbot, queue=False)
146
+ upload_button.upload(flag,[],[file_output]).then(upload_file, [upload_button, max_docs], file_output).then(dc.clr_history,[], [link_output])
147
+
148
+ with gr.Tab("Experimental Summarization"):
149
+ default_model = gr.HTML("<hr>From DB<br>It may take approximately 5 minutes to complete 15 pages in GPU. Please use files with fewer pages if you want to use summarization.<br></h2>")
150
+ summarize_button = gr.Button("Start summarization")
151
+
152
+ summarize_verify = gr.HTML(" ")
153
+ summarize_button.click(dc.summarize, [], [summarize_verify])
154
+
155
+ with gr.Tab("β™ŸοΈ Chess Game with a LLM"):
156
+ with gr.Column():
157
+ gr.HTML('<div style="display: flex; justify-content: center; align-items: center; height: 100vh;"><div>β™ŸοΈ Click to start the Chessboard β™ŸοΈ</div></div>')
158
+ start_chess = gr.Button("START GAME")
159
+ board_chess = gr.HTML()
160
+ info_chess = gr.HTML()
161
+ input_chess = gr.Textbox(label="Type a valid move", placeholder="")
162
+
163
+ start_chess.click(cg.start_game,[],[board_chess, info_chess])
164
+ input_chess.submit(cg.user_move,[input_chess],[board_chess, info_chess, input_chess])
165
+
166
+ with gr.Tab("Config llama-2 model"):
167
+ gr.HTML("<h3>Only models from the GGML library are accepted. To apply the new configurations, please reload the model.</h3>")
168
  repo_ = gr.Textbox(label="Repository" ,value="TheBloke/Llama-2-7B-Chat-GGML")
169
+ file_ = gr.Textbox(label="File name" ,value="llama-2-7b-chat.ggmlv3.q5_1.bin")
170
+ max_tokens = gr.inputs.Slider(1, 2048, default=256, label="Max new tokens", step=1)
171
  temperature = gr.inputs.Slider(0.1, 1., default=0.2, label="Temperature", step=0.1)
172
  top_k = gr.inputs.Slider(0.01, 1., default=0.95, label="Top K", step=0.01)
173
  top_p = gr.inputs.Slider(0, 100, default=50, label="Top P", step=1)
174
  repeat_penalty = gr.inputs.Slider(0.1, 100., default=1.2, label="Repeat penalty", step=0.1)
175
+ change_model_button = gr.Button("Load Llama GGML Model")
176
+
177
+ model_verify_ggml = gr.HTML("Loaded model Llama-2")
178
+
179
+ with gr.Tab("API Models"):
180
+
181
+ default_model = gr.HTML("<hr>Falcon Model</h2>")
182
+ hf_key = gr.Textbox(label="HF TOKEN", value=My_hf_token, visible=False)
183
  falcon_button = gr.Button("Load FALCON 7B-Instruct")
184
 
185
  openai_gpt_model = gr.HTML("<hr>OpenAI Model gpt-3.5-turbo</h2>")
 
187
  openai_button = gr.Button("Load gpt-3.5-turbo")
188
 
189
  line_ = gr.HTML("<hr> </h2>")
190
+ model_verify = gr.HTML(" ")
191
 
192
+ with gr.Tab("Help"):
193
  description_md = gr.Markdown(description)
194
 
195
  msg.submit(predict,[msg, chatbot, max_docs, check_memory],[msg, chatbot]).then(convert,[],[sou])
196
 
197
+ change_model_button.click(dc.change_llm,[repo_, file_, max_tokens, temperature, top_p, top_k, repeat_penalty, max_docs],[model_verify_ggml])
198
 
199
+ falcon_button.click(dc.default_falcon_model, [hf_key], [model_verify])
200
  openai_button.click(clear_api_key, [api_key], [api_key, model_verify])
201
+
202
  demo.launch(enable_queue=True)