Omnibus commited on
Commit
f127cf8
1 Parent(s): 781c65c

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +265 -0
app.py ADDED
@@ -0,0 +1,265 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ #import urllib.request
3
+ import requests
4
+ #import bs4
5
+ #import lxml
6
+ import os
7
+ #import subprocess
8
+ from huggingface_hub import InferenceClient,HfApi
9
+ import random
10
+ import json
11
+ import datetime
12
+ #from pypdf import PdfReader
13
+ import uuid
14
+ #from query import tasks
15
+ from agent import (
16
+ PREFIX,
17
+ SAVE_MEMORY,
18
+ COMPRESS_DATA_PROMPT,
19
+ COMPRESS_DATA_PROMPT_SMALL,
20
+ LOG_PROMPT,
21
+ LOG_RESPONSE,
22
+ )
23
+ client = InferenceClient(
24
+ "mistralai/Mixtral-8x7B-Instruct-v0.1"
25
+ )
26
+ reponame="Omnibus/tmp"
27
+ save_data=f'https://huggingface.co/datasets/{reponame}/raw/main/'
28
+ #token_self = os.environ['HF_TOKEN']
29
+ #api=HfApi(token=token_self)
30
+
31
+
32
+
33
+ VERBOSE = True
34
+ MAX_HISTORY = 100
35
+ MAX_DATA = 20000
36
+
37
+ def format_prompt(message, history):
38
+ prompt = "<s>"
39
+ for user_prompt, bot_response in history:
40
+ prompt += f"[INST] {user_prompt} [/INST]"
41
+ prompt += f" {bot_response}</s> "
42
+ prompt += f"[INST] {message} [/INST]"
43
+ return prompt
44
+
45
+
46
+
47
+ def run_gpt(
48
+ prompt_template,
49
+ stop_tokens,
50
+ max_tokens,
51
+ seed,
52
+ **prompt_kwargs,
53
+ ):
54
+ print(seed)
55
+ timestamp=datetime.datetime.now()
56
+
57
+ generate_kwargs = dict(
58
+ temperature=0.9,
59
+ max_new_tokens=max_tokens,
60
+ top_p=0.95,
61
+ repetition_penalty=1.0,
62
+ do_sample=True,
63
+ seed=seed,
64
+ )
65
+
66
+ content = PREFIX.format(
67
+ timestamp=timestamp,
68
+ purpose="Compile the provided data and complete the users task"
69
+ ) + prompt_template.format(**prompt_kwargs)
70
+ if VERBOSE:
71
+ print(LOG_PROMPT.format(content))
72
+
73
+
74
+ #formatted_prompt = format_prompt(f"{system_prompt}, {prompt}", history)
75
+ #formatted_prompt = format_prompt(f'{content}', history)
76
+
77
+ stream = client.text_generation(content, **generate_kwargs, stream=True, details=True, return_full_text=False)
78
+ resp = ""
79
+ for response in stream:
80
+ resp += response.token.text
81
+ #yield resp
82
+
83
+ if VERBOSE:
84
+ print(LOG_RESPONSE.format(resp))
85
+ return resp
86
+
87
+
88
+ def compress_data(c, instruct, history):
89
+ seed=random.randint(1,1000000000)
90
+
91
+ print (c)
92
+ #tot=len(purpose)
93
+ #print(tot)
94
+ divr=int(c)/MAX_DATA
95
+ divi=int(divr)+1 if divr != int(divr) else int(divr)
96
+ chunk = int(int(c)/divr)
97
+ print(f'chunk:: {chunk}')
98
+ print(f'divr:: {divr}')
99
+ print (f'divi:: {divi}')
100
+ out = []
101
+ #out=""
102
+ s=0
103
+ e=chunk
104
+ print(f'e:: {e}')
105
+ new_history=""
106
+ #task = f'Compile this data to fulfill the task: {task}, and complete the purpose: {purpose}\n'
107
+ for z in range(divi):
108
+ print(f's:e :: {s}:{e}')
109
+
110
+ hist = history[s:e]
111
+
112
+ resp = run_gpt(
113
+ COMPRESS_DATA_PROMPT_SMALL,
114
+ stop_tokens=["observation:", "task:", "action:", "thought:"],
115
+ max_tokens=8192,
116
+ seed=seed,
117
+ direction=instruct,
118
+ knowledge="",
119
+ history=hist,
120
+ )
121
+ out.append(resp)
122
+ #new_history = resp
123
+ print (resp)
124
+ #out+=resp
125
+ e=e+chunk
126
+ s=s+chunk
127
+ return out
128
+
129
+
130
+ def compress_data_og(c, instruct, history):
131
+ seed=random.randint(1,1000000000)
132
+
133
+ print (c)
134
+ #tot=len(purpose)
135
+ #print(tot)
136
+ divr=int(c)/MAX_DATA
137
+ divi=int(divr)+1 if divr != int(divr) else int(divr)
138
+ chunk = int(int(c)/divr)
139
+ print(f'chunk:: {chunk}')
140
+ print(f'divr:: {divr}')
141
+ print (f'divi:: {divi}')
142
+ out = []
143
+ #out=""
144
+ s=0
145
+ e=chunk
146
+ print(f'e:: {e}')
147
+ new_history=""
148
+ #task = f'Compile this data to fulfill the task: {task}, and complete the purpose: {purpose}\n'
149
+ for z in range(divi):
150
+ print(f's:e :: {s}:{e}')
151
+
152
+ hist = history[s:e]
153
+
154
+ resp = run_gpt(
155
+ COMPRESS_DATA_PROMPT,
156
+ stop_tokens=["observation:", "task:", "action:", "thought:"],
157
+ max_tokens=8192,
158
+ seed=seed,
159
+ direction=instruct,
160
+ knowledge=new_history,
161
+ history=hist,
162
+ )
163
+
164
+ new_history = resp
165
+ print (resp)
166
+ out+=resp
167
+ e=e+chunk
168
+ s=s+chunk
169
+ '''
170
+ resp = run_gpt(
171
+ COMPRESS_DATA_PROMPT,
172
+ stop_tokens=["observation:", "task:", "action:", "thought:"],
173
+ max_tokens=8192,
174
+ seed=seed,
175
+ direction=instruct,
176
+ knowledge=new_history,
177
+ history="All data has been recieved.",
178
+ )'''
179
+ print ("final" + resp)
180
+ #history = "observation: {}\n".format(resp)
181
+ return resp
182
+
183
+
184
+
185
+ def summarize(inp,history,report_check,sum_mem_check,data=None):
186
+ json_box=[]
187
+ if inp == "":
188
+ inp = "Process this data"
189
+ history.clear()
190
+ history = [(inp,"Working on it...")]
191
+ yield "",history,error_box,json_box
192
+
193
+ if data != "Error" and data != "" and data != None:
194
+ print(inp)
195
+ out = str(data)
196
+ rl = len(out)
197
+ print(f'rl:: {rl}')
198
+ c=1
199
+ for i in str(out):
200
+ if i == " " or i=="," or i=="\n":
201
+ c +=1
202
+ print (f'c:: {c}')
203
+ if sum_mem_check=="Memory":
204
+ #save_memory(inp,out)
205
+ rawp = "Complete"
206
+ if sum_mem_check=="Summarize":
207
+ json_out = compress_data(c,inp,out)
208
+
209
+ out = str(json_out)
210
+ if report_check:
211
+ rl = len(out)
212
+ print(f'rl:: {rl}')
213
+ c=1
214
+ for i in str(out):
215
+ if i == " " or i=="," or i=="\n":
216
+ c +=1
217
+ print (f'c2:: {c}')
218
+ rawp = compress_data_og(c,inp,out)
219
+ else:
220
+ rawp = out
221
+ else:
222
+ rawp = "Provide a valid data source"
223
+ history.clear()
224
+ history.append((inp,rawp))
225
+ yield "", history,error_box,json_out
226
+
227
+
228
+ #################################
229
+ def clear_fn():
230
+ return "",[(None,None)]
231
+
232
+ with gr.Blocks() as app:
233
+ gr.HTML("""<center><h1>Mixtral 8x7B TLDR Summarizer + Web</h1><h3>Summarize Data of unlimited length</h3>""")
234
+ chatbot = gr.Chatbot(label="Mixtral 8x7B Chatbot",show_copy_button=True)
235
+ with gr.Row():
236
+ with gr.Column(scale=3):
237
+ prompt=gr.Textbox(label = "Instructions (optional)")
238
+ with gr.Column(scale=1):
239
+ report_check=gr.Checkbox(label="Return Report", value=True)
240
+ sum_mem_check=gr.Radio(label="Output",choices=["Summary","Memory"])
241
+ button=gr.Button()
242
+
243
+ #models_dd=gr.Dropdown(choices=[m for m in return_list],interactive=True)
244
+ with gr.Row():
245
+ stop_button=gr.Button("Stop")
246
+ clear_btn = gr.Button("Clear")
247
+ with gr.Row():
248
+ with gr.Tab("Text"):
249
+ data=gr.Textbox(label="Input Data (paste text)", lines=6)
250
+ with gr.Tab("File"):
251
+ file=gr.Files(label="Input File(s) (.pdf .txt)")
252
+ with gr.Tab("Raw HTML"):
253
+ url = gr.Textbox(label="URL")
254
+ with gr.Tab("PDF URL"):
255
+ pdf_url = gr.Textbox(label="PDF URL")
256
+ with gr.Tab("PDF Batch"):
257
+ pdf_batch = gr.Textbox(label="PDF URL Batch (comma separated)")
258
+ json_out=gr.JSON()
259
+ e_box=gr.Textbox()
260
+ #text=gr.JSON()
261
+ #inp_query.change(search_models,inp_query,models_dd)
262
+ clear_btn.click(clear_fn,None,[prompt,chatbot])
263
+ go=button.click(summarize,[prompt,chatbot,report_check,sum_mem_check,data,file,url,pdf_url,pdf_batch],[prompt,chatbot,e_box,json_out])
264
+ stop_button.click(None,None,None,cancels=[go])
265
+ app.queue(default_concurrency_limit=20).launch(show_api=False)