Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -1,21 +1,400 @@
|
|
1 |
import gradio as gr
|
2 |
import base64
|
3 |
import random
|
4 |
-
|
5 |
-
#
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
6 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
7 |
|
8 |
-
|
9 |
-
|
10 |
-
|
11 |
-
|
12 |
-
|
13 |
-
|
14 |
-
|
|
|
|
|
|
|
15 |
"""
|
16 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
17 |
|
18 |
-
#def call_chart(mermaidCode):
|
19 |
def mm(graph):
|
20 |
code_out=""
|
21 |
for ea in graph.split("\n"):
|
@@ -24,31 +403,129 @@ def mm(graph):
|
|
24 |
#out_html=f'''<div><iframe src="https://omnibus-mermaid-script.static.hf.space/index.html?mermaid={code_out}&rand={random.randint(1,1111111111)}" height="500" width="500"></iframe></div>'''
|
25 |
out_html=f'''<div><iframe src="https://omnibus-mermaid-script.static.hf.space/index.html?mermaid={code_out}" height="500" width="500"></iframe></div>'''
|
26 |
return out_html
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
27 |
|
28 |
-
""
|
29 |
-
|
30 |
-
|
31 |
-
|
32 |
-
|
33 |
-
|
34 |
-
|
35 |
-
"""
|
36 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
37 |
|
38 |
-
|
39 |
-
|
40 |
-
|
41 |
-
|
42 |
-
|
43 |
-
|
44 |
-
|
45 |
-
|
46 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
47 |
|
48 |
-
with gr.Blocks(css=css) as app:
|
49 |
-
inp_text=gr.Textbox(value=mermaid_code)
|
50 |
-
btn=gr.Button()
|
51 |
-
out_html=gr.HTML("""""")
|
52 |
-
btn.click(mm,inp_text,out_html)
|
53 |
-
app.load(mm,inp_text,out_html)
|
54 |
-
app.launch()
|
|
|
1 |
import gradio as gr
|
2 |
import base64
|
3 |
import random
|
4 |
+
import gradio as gr
|
5 |
+
#import urllib.request
|
6 |
+
import requests
|
7 |
+
import bs4
|
8 |
+
import lxml
|
9 |
+
import os
|
10 |
+
#import subprocess
|
11 |
+
from huggingface_hub import InferenceClient,HfApi
|
12 |
+
import random
|
13 |
+
import json
|
14 |
+
import datetime
|
15 |
+
from pypdf import PdfReader
|
16 |
+
import uuid
|
17 |
+
#from query import tasks
|
18 |
+
from gradio_client import Client
|
19 |
+
|
20 |
+
from agent import (
|
21 |
+
PREFIX,
|
22 |
+
GET_CHART,
|
23 |
+
COMPRESS_DATA_PROMPT,
|
24 |
+
COMPRESS_DATA_PROMPT_SMALL,
|
25 |
+
LOG_PROMPT,
|
26 |
+
LOG_RESPONSE,
|
27 |
+
)
|
28 |
+
api=HfApi()
|
29 |
+
|
30 |
+
client = InferenceClient("mistralai/Mixtral-8x7B-Instruct-v0.1")
|
31 |
+
|
32 |
+
|
33 |
+
def sort_fn(inp):
|
34 |
+
|
35 |
+
client_sort = Client("Omnibus/sort_document")
|
36 |
+
sen,nouns = client_sort.predict(
|
37 |
+
f"{inp}", # str in 'Paste Text' Textbox component
|
38 |
+
api_name="/sort_doc"
|
39 |
+
)
|
40 |
+
return nouns
|
41 |
+
|
42 |
+
def find_all(url):
|
43 |
+
return_list=[]
|
44 |
+
print (url)
|
45 |
+
#if action_input in query.tasks:
|
46 |
+
print (f"trying URL:: {url}")
|
47 |
+
try:
|
48 |
+
if url != "" and url != None:
|
49 |
+
out = []
|
50 |
+
source = requests.get(url)
|
51 |
+
#source = urllib.request.urlopen(url).read()
|
52 |
+
soup = bs4.BeautifulSoup(source.content,'lxml')
|
53 |
|
54 |
+
rawp=(f'RAW TEXT RETURNED: {soup.text}')
|
55 |
+
cnt=0
|
56 |
+
cnt+=len(rawp)
|
57 |
+
out.append(rawp)
|
58 |
+
out.append("HTML fragments: ")
|
59 |
+
q=("a","p","span","content","article")
|
60 |
+
for p in soup.find_all("a"):
|
61 |
+
out.append([{"LINK TITLE":p.get('title'),"URL":p.get('href'),"STRING":p.string}])
|
62 |
+
|
63 |
+
print(rawp)
|
64 |
+
return True, rawp
|
65 |
+
else:
|
66 |
+
return False, "Enter Valid URL"
|
67 |
+
except Exception as e:
|
68 |
+
print (e)
|
69 |
+
return False, f'Error: {e}'
|
70 |
|
71 |
+
#else:
|
72 |
+
# history = "observation: The search query I used did not return a valid response"
|
73 |
+
|
74 |
+
return "MAIN", None, history, task
|
75 |
+
|
76 |
+
FIND_KEYWORDS="""Find keywords from the dictionary of provided keywords that are relevant to the users query.
|
77 |
+
Return the keyword:value pairs from the list in the form of a JSON file output.
|
78 |
+
dictionary:
|
79 |
+
{keywords}
|
80 |
+
user query:
|
81 |
"""
|
82 |
|
83 |
+
def find_keyword_fn(c,inp,data):
|
84 |
+
|
85 |
+
data=str(data)
|
86 |
+
seed=random.randint(1,1000000000)
|
87 |
+
divr=int(c)/20000
|
88 |
+
divi=int(divr)+1 if divr != int(divr) else int(divr)
|
89 |
+
chunk = int(int(c)/divr)
|
90 |
+
out = []
|
91 |
+
s=0
|
92 |
+
e=chunk
|
93 |
+
print(f'e:: {e}')
|
94 |
+
#task = f'Compile this data to fulfill the task: {task}, and complete the purpose: {purpose}\n'
|
95 |
+
for z in range(divi):
|
96 |
+
print(f's:e :: {s}:{e}')
|
97 |
+
|
98 |
+
hist = data[s:e]
|
99 |
+
resp = run_gpt(
|
100 |
+
FIND_KEYWORDS,
|
101 |
+
stop_tokens=[],
|
102 |
+
max_tokens=8192,
|
103 |
+
seed=seed,
|
104 |
+
keywords=data,
|
105 |
+
).strip("\n")
|
106 |
+
out.append(resp)
|
107 |
+
#new_history = resp
|
108 |
+
print (resp)
|
109 |
+
#out+=resp
|
110 |
+
e=e+chunk
|
111 |
+
s=s+chunk
|
112 |
+
return out
|
113 |
+
|
114 |
+
|
115 |
+
def read_txt(txt_path):
|
116 |
+
text=""
|
117 |
+
with open(txt_path,"r") as f:
|
118 |
+
text = f.read()
|
119 |
+
f.close()
|
120 |
+
print (text)
|
121 |
+
return text
|
122 |
+
|
123 |
+
def read_pdf(pdf_path):
|
124 |
+
text=""
|
125 |
+
reader = PdfReader(f'{pdf_path}')
|
126 |
+
number_of_pages = len(reader.pages)
|
127 |
+
for i in range(number_of_pages):
|
128 |
+
page = reader.pages[i]
|
129 |
+
text = f'{text}\n{page.extract_text()}'
|
130 |
+
print (text)
|
131 |
+
return text
|
132 |
+
|
133 |
+
error_box=[]
|
134 |
+
def read_pdf_online(url):
|
135 |
+
uid=uuid.uuid4()
|
136 |
+
print(f"reading {url}")
|
137 |
+
response = requests.get(url, stream=True)
|
138 |
+
print(response.status_code)
|
139 |
+
text=""
|
140 |
+
#################
|
141 |
+
|
142 |
+
#####################
|
143 |
+
try:
|
144 |
+
if response.status_code == 200:
|
145 |
+
with open("test.pdf", "wb") as f:
|
146 |
+
f.write(response.content)
|
147 |
+
#f.close()
|
148 |
+
#out = Path("./data.pdf")
|
149 |
+
#print (out)
|
150 |
+
reader = PdfReader("test.pdf")
|
151 |
+
number_of_pages = len(reader.pages)
|
152 |
+
print(number_of_pages)
|
153 |
+
for i in range(number_of_pages):
|
154 |
+
page = reader.pages[i]
|
155 |
+
text = f'{text}\n{page.extract_text()}'
|
156 |
+
print(f"PDF_TEXT:: {text}")
|
157 |
+
return text
|
158 |
+
else:
|
159 |
+
text = response.status_code
|
160 |
+
error_box.append(url)
|
161 |
+
print(text)
|
162 |
+
return text
|
163 |
+
|
164 |
+
|
165 |
+
except Exception as e:
|
166 |
+
print (e)
|
167 |
+
return e
|
168 |
+
|
169 |
+
|
170 |
+
VERBOSE = True
|
171 |
+
MAX_HISTORY = 100
|
172 |
+
MAX_DATA = 20000
|
173 |
+
|
174 |
+
def format_prompt(message, history):
|
175 |
+
prompt = "<s>"
|
176 |
+
for user_prompt, bot_response in history:
|
177 |
+
prompt += f"[INST] {user_prompt} [/INST]"
|
178 |
+
prompt += f" {bot_response}</s> "
|
179 |
+
prompt += f"[INST] {message} [/INST]"
|
180 |
+
return prompt
|
181 |
+
|
182 |
+
def run_gpt_no_prefix(
|
183 |
+
prompt_template,
|
184 |
+
stop_tokens,
|
185 |
+
max_tokens,
|
186 |
+
seed,
|
187 |
+
**prompt_kwargs,
|
188 |
+
):
|
189 |
+
print(seed)
|
190 |
+
try:
|
191 |
+
generate_kwargs = dict(
|
192 |
+
temperature=0.9,
|
193 |
+
max_new_tokens=max_tokens,
|
194 |
+
top_p=0.95,
|
195 |
+
repetition_penalty=1.0,
|
196 |
+
do_sample=True,
|
197 |
+
seed=seed,
|
198 |
+
)
|
199 |
+
|
200 |
+
content = prompt_template.format(**prompt_kwargs)
|
201 |
+
#if VERBOSE:
|
202 |
+
print(LOG_PROMPT.format(content))
|
203 |
+
|
204 |
+
|
205 |
+
#formatted_prompt = format_prompt(f"{system_prompt}, {prompt}", history)
|
206 |
+
#formatted_prompt = format_prompt(f'{content}', history)
|
207 |
+
|
208 |
+
stream = client.text_generation(content, **generate_kwargs, stream=True, details=True, return_full_text=False)
|
209 |
+
resp = ""
|
210 |
+
for response in stream:
|
211 |
+
resp += response.token.text
|
212 |
+
#yield resp
|
213 |
+
|
214 |
+
#if VERBOSE:
|
215 |
+
print(LOG_RESPONSE.format(resp))
|
216 |
+
return resp
|
217 |
+
except Exception as e:
|
218 |
+
print(f'no_prefix_error:: {e}')
|
219 |
+
return "Error"
|
220 |
+
def run_gpt(
|
221 |
+
prompt_template,
|
222 |
+
stop_tokens,
|
223 |
+
max_tokens,
|
224 |
+
seed,
|
225 |
+
**prompt_kwargs,
|
226 |
+
):
|
227 |
+
print(seed)
|
228 |
+
timestamp=datetime.datetime.now()
|
229 |
+
|
230 |
+
generate_kwargs = dict(
|
231 |
+
temperature=0.9,
|
232 |
+
max_new_tokens=max_tokens,
|
233 |
+
top_p=0.95,
|
234 |
+
repetition_penalty=1.0,
|
235 |
+
do_sample=True,
|
236 |
+
seed=seed,
|
237 |
+
)
|
238 |
+
|
239 |
+
content = PREFIX.format(
|
240 |
+
timestamp=timestamp,
|
241 |
+
purpose="Compile the provided data and complete the users task"
|
242 |
+
) + prompt_template.format(**prompt_kwargs)
|
243 |
+
#if VERBOSE:
|
244 |
+
print(LOG_PROMPT.format(content))
|
245 |
+
|
246 |
+
|
247 |
+
#formatted_prompt = format_prompt(f"{system_prompt}, {prompt}", history)
|
248 |
+
#formatted_prompt = format_prompt(f'{content}', history)
|
249 |
+
|
250 |
+
stream = client.text_generation(content, **generate_kwargs, stream=True, details=True, return_full_text=False)
|
251 |
+
resp = ""
|
252 |
+
for response in stream:
|
253 |
+
resp += response.token.text
|
254 |
+
#yield resp
|
255 |
+
|
256 |
+
if VERBOSE:
|
257 |
+
print(LOG_RESPONSE.format(resp))
|
258 |
+
return resp
|
259 |
+
|
260 |
+
|
261 |
+
def compress_data(c, instruct, history):
|
262 |
+
seed=random.randint(1,1000000000)
|
263 |
+
|
264 |
+
print (c)
|
265 |
+
#tot=len(purpose)
|
266 |
+
#print(tot)
|
267 |
+
divr=int(c)/MAX_DATA
|
268 |
+
divi=int(divr)+1 if divr != int(divr) else int(divr)
|
269 |
+
chunk = int(int(c)/divr)
|
270 |
+
print(f'chunk:: {chunk}')
|
271 |
+
print(f'divr:: {divr}')
|
272 |
+
print (f'divi:: {divi}')
|
273 |
+
out = []
|
274 |
+
#out=""
|
275 |
+
s=0
|
276 |
+
e=chunk
|
277 |
+
print(f'e:: {e}')
|
278 |
+
new_history=""
|
279 |
+
#task = f'Compile this data to fulfill the task: {task}, and complete the purpose: {purpose}\n'
|
280 |
+
for z in range(divi):
|
281 |
+
print(f's:e :: {s}:{e}')
|
282 |
+
|
283 |
+
hist = history[s:e]
|
284 |
+
|
285 |
+
resp = run_gpt(
|
286 |
+
COMPRESS_DATA_PROMPT_SMALL,
|
287 |
+
stop_tokens=["observation:", "task:", "action:", "thought:"],
|
288 |
+
max_tokens=8192,
|
289 |
+
seed=seed,
|
290 |
+
direction=instruct,
|
291 |
+
knowledge="",
|
292 |
+
history=hist,
|
293 |
+
).strip("\n")
|
294 |
+
out.append(resp)
|
295 |
+
#new_history = resp
|
296 |
+
print (resp)
|
297 |
+
#out+=resp
|
298 |
+
e=e+chunk
|
299 |
+
s=s+chunk
|
300 |
+
return out
|
301 |
+
|
302 |
+
|
303 |
+
def compress_data_og(c, instruct, history):
|
304 |
+
seed=random.randint(1,1000000000)
|
305 |
+
|
306 |
+
print (c)
|
307 |
+
#tot=len(purpose)
|
308 |
+
#print(tot)
|
309 |
+
divr=int(c)/MAX_DATA
|
310 |
+
divi=int(divr)+1 if divr != int(divr) else int(divr)
|
311 |
+
chunk = int(int(c)/divr)
|
312 |
+
print(f'chunk:: {chunk}')
|
313 |
+
print(f'divr:: {divr}')
|
314 |
+
print (f'divi:: {divi}')
|
315 |
+
out = []
|
316 |
+
#out=""
|
317 |
+
s=0
|
318 |
+
e=chunk
|
319 |
+
print(f'e:: {e}')
|
320 |
+
new_history=""
|
321 |
+
#task = f'Compile this data to fulfill the task: {task}, and complete the purpose: {purpose}\n'
|
322 |
+
for z in range(divi):
|
323 |
+
print(f's:e :: {s}:{e}')
|
324 |
+
|
325 |
+
hist = history[s:e]
|
326 |
+
|
327 |
+
resp = run_gpt(
|
328 |
+
COMPRESS_DATA_PROMPT,
|
329 |
+
stop_tokens=["observation:", "task:", "action:", "thought:"],
|
330 |
+
max_tokens=8192,
|
331 |
+
seed=seed,
|
332 |
+
direction=instruct,
|
333 |
+
knowledge=new_history,
|
334 |
+
history=hist,
|
335 |
+
).strip("\n")
|
336 |
+
|
337 |
+
new_history = resp
|
338 |
+
print (resp)
|
339 |
+
out+=resp
|
340 |
+
e=e+chunk
|
341 |
+
s=s+chunk
|
342 |
+
'''
|
343 |
+
resp = run_gpt(
|
344 |
+
COMPRESS_DATA_PROMPT,
|
345 |
+
stop_tokens=["observation:", "task:", "action:", "thought:"],
|
346 |
+
max_tokens=8192,
|
347 |
+
seed=seed,
|
348 |
+
direction=instruct,
|
349 |
+
knowledge=new_history,
|
350 |
+
history="All data has been recieved.",
|
351 |
+
)'''
|
352 |
+
print ("final" + resp)
|
353 |
+
#history = "observation: {}\n".format(resp)
|
354 |
+
return resp
|
355 |
+
|
356 |
+
def get_chart(inp):
|
357 |
+
seed=random.randint(1,1000000000)
|
358 |
+
try:
|
359 |
+
resp = run_gpt_no_prefix(
|
360 |
+
GET_CHART,
|
361 |
+
stop_tokens=[],
|
362 |
+
max_tokens=8192,
|
363 |
+
seed=seed,
|
364 |
+
inp=inp,
|
365 |
+
).strip("\n")
|
366 |
+
print(resp)
|
367 |
+
except Exception as e:
|
368 |
+
print(f'Error:: {e}')
|
369 |
+
resp = e
|
370 |
+
return resp
|
371 |
+
|
372 |
+
def format_json(inp):
|
373 |
+
|
374 |
+
print("FORMATTING:::")
|
375 |
+
print(type(inp))
|
376 |
+
print("###########")
|
377 |
+
print(inp)
|
378 |
+
print("###########")
|
379 |
+
print("###########")
|
380 |
+
new_str=""
|
381 |
+
matches=["```","#","//"]
|
382 |
+
for i,line in enumerate(inp):
|
383 |
+
line = line.strip()
|
384 |
+
print(line)
|
385 |
+
#if not any(x in line for x in matches):
|
386 |
+
new_str+=line.strip("\n").strip("```").strip("#").strip("//")
|
387 |
+
print("###########")
|
388 |
+
print("###########")
|
389 |
+
#inp = inp.strip("<\s>")
|
390 |
+
new_str=new_str.strip("</s>")
|
391 |
+
out_json=eval(new_str)
|
392 |
+
print(out_json)
|
393 |
+
print("###########")
|
394 |
+
print("###########")
|
395 |
+
|
396 |
+
return out_json
|
397 |
|
|
|
398 |
def mm(graph):
|
399 |
code_out=""
|
400 |
for ea in graph.split("\n"):
|
|
|
403 |
#out_html=f'''<div><iframe src="https://omnibus-mermaid-script.static.hf.space/index.html?mermaid={code_out}&rand={random.randint(1,1111111111)}" height="500" width="500"></iframe></div>'''
|
404 |
out_html=f'''<div><iframe src="https://omnibus-mermaid-script.static.hf.space/index.html?mermaid={code_out}" height="500" width="500"></iframe></div>'''
|
405 |
return out_html
|
406 |
+
|
407 |
+
def summarize(inp,history,report_check,chart_check,data=None,files=None,directory=None,url=None,pdf_url=None,pdf_batch=None):
|
408 |
+
json_box=[]
|
409 |
+
chart_out=""
|
410 |
+
if inp == "":
|
411 |
+
inp = "Process this data"
|
412 |
+
history.clear()
|
413 |
+
history = [(inp,"Working on it...")]
|
414 |
+
yield "",history,chart_out,chart_out,json_box
|
415 |
|
416 |
+
if pdf_batch.startswith("http"):
|
417 |
+
c=0
|
418 |
+
data=""
|
419 |
+
for i in str(pdf_batch):
|
420 |
+
if i==",":
|
421 |
+
c+=1
|
422 |
+
print (f'c:: {c}')
|
|
|
423 |
|
424 |
+
try:
|
425 |
+
for i in range(c+1):
|
426 |
+
batch_url = pdf_batch.split(",",c)[i]
|
427 |
+
bb = read_pdf_online(batch_url)
|
428 |
+
data=f'{data}\nFile Name URL ({batch_url}):\n{bb}'
|
429 |
+
except Exception as e:
|
430 |
+
print(e)
|
431 |
+
#data=f'{data}\nError reading URL ({batch_url})'
|
432 |
+
|
433 |
+
if directory:
|
434 |
+
for ea in directory:
|
435 |
+
print(ea)
|
436 |
+
|
437 |
+
if pdf_url.startswith("http"):
|
438 |
+
print("PDF_URL")
|
439 |
+
out = read_pdf_online(pdf_url)
|
440 |
+
data=out
|
441 |
+
if url.startswith("http"):
|
442 |
+
val, out = find_all(url)
|
443 |
+
if not val:
|
444 |
+
data="Error"
|
445 |
+
rawp = str(out)
|
446 |
+
else:
|
447 |
+
data=out
|
448 |
+
if files:
|
449 |
+
for i, file in enumerate(files):
|
450 |
+
try:
|
451 |
+
print (file)
|
452 |
+
if file.endswith(".pdf"):
|
453 |
+
zz=read_pdf(file)
|
454 |
+
print (zz)
|
455 |
+
data=f'{data}\nFile Name ({file}):\n{zz}'
|
456 |
+
elif file.endswith(".txt"):
|
457 |
+
zz=read_txt(file)
|
458 |
+
print (zz)
|
459 |
+
data=f'{data}\nFile Name ({file}):\n{zz}'
|
460 |
+
except Exception as e:
|
461 |
+
data=f'{data}\nError opening File Name ({file})'
|
462 |
+
print (e)
|
463 |
+
if data != "Error" and data != "":
|
464 |
+
print(inp)
|
465 |
+
out = str(data)
|
466 |
+
rl = len(out)
|
467 |
+
print(f'rl:: {rl}')
|
468 |
+
c=1
|
469 |
+
for i in str(out):
|
470 |
+
if i == " " or i=="," or i=="\n":
|
471 |
+
c +=1
|
472 |
+
print (f'c:: {c}')
|
473 |
+
json_out = compress_data(c,inp,out)
|
474 |
+
out = str(json_out)
|
475 |
+
try:
|
476 |
+
json_out=format_json(json_out)
|
477 |
+
except Exception as e:
|
478 |
+
print (e)
|
479 |
+
chart_out = get_chart(str(json_out))
|
480 |
+
chart_html=mm(chart_out)
|
481 |
+
print(chart_out)
|
482 |
+
else:
|
483 |
+
rawp = "Provide a valid data source"
|
484 |
+
history.clear()
|
485 |
+
history.append((inp,rawp))
|
486 |
+
yield "", history,chart_html,chart_out,json_out
|
487 |
|
488 |
+
#################################
|
489 |
+
def clear_fn():
|
490 |
+
return "",[(None,None)]
|
491 |
+
|
492 |
+
with gr.Blocks() as app:
|
493 |
+
gr.HTML("""<center><h1>Mixtral 8x7B TLDR Summarizer + Web</h1><h3>Summarize Data of unlimited length</h3>""")
|
494 |
+
chatbot = gr.Chatbot(label="Mixtral 8x7B Chatbot",show_copy_button=True)
|
495 |
+
with gr.Row():
|
496 |
+
with gr.Column(scale=3):
|
497 |
+
prompt=gr.Textbox(label = "Instructions (optional)")
|
498 |
+
with gr.Column(scale=1):
|
499 |
+
button=gr.Button()
|
500 |
+
|
501 |
+
#models_dd=gr.Dropdown(choices=[m for m in return_list],interactive=True)
|
502 |
+
with gr.Row():
|
503 |
+
stop_button=gr.Button("Stop")
|
504 |
+
clear_btn = gr.Button("Clear")
|
505 |
+
with gr.Row():
|
506 |
+
with gr.Tab("Text"):
|
507 |
+
data=gr.Textbox(label="Input Data (paste text)", lines=6)
|
508 |
+
with gr.Tab("File"):
|
509 |
+
file=gr.Files(label="Input File(s) (.pdf .txt)")
|
510 |
+
with gr.Tab("Folder"):
|
511 |
+
directory=gr.File(label="Folder", file_count='directory')
|
512 |
+
with gr.Tab("Raw HTML"):
|
513 |
+
url = gr.Textbox(label="URL")
|
514 |
+
with gr.Tab("PDF URL"):
|
515 |
+
pdf_url = gr.Textbox(label="PDF URL")
|
516 |
+
with gr.Tab("PDF Batch"):
|
517 |
+
pdf_batch = gr.Textbox(label="PDF URL Batch (comma separated)")
|
518 |
+
m_box=gr.HTML()
|
519 |
+
e_box=gr.Textbox()
|
520 |
+
|
521 |
+
json_out=gr.JSON()
|
522 |
+
#text=gr.JSON()
|
523 |
+
#inp_query.change(search_models,inp_query,models_dd)
|
524 |
+
clear_btn.click(clear_fn,None,[prompt,chatbot])
|
525 |
+
|
526 |
+
#go=button.click(summarize,[prompt,chatbot,report_check,chart_check,data,file,directory,url,pdf_url,pdf_batch],[prompt,chatbot,e_box,json_out])
|
527 |
+
go=button.click(summarize,[prompt,chatbot,report_check,chart_check,data,file,directory,url,pdf_url,pdf_batch],[prompt,chatbot,m_box,e_box,json_out])
|
528 |
+
|
529 |
+
stop_button.click(None,None,None,cancels=[go])
|
530 |
+
app.queue(default_concurrency_limit=20).launch(show_api=False)
|
531 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|