Omnibus commited on
Commit
8485ec2
1 Parent(s): 1f9424f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +67 -19
app.py CHANGED
@@ -1,20 +1,17 @@
1
  import gradio as gr
2
- #import urllib.request
3
  import requests
4
- #import bs4
5
- #import lxml
6
  import os
7
- #import subprocess
8
  from huggingface_hub import InferenceClient,HfApi
9
  import random
10
  import json
11
  import datetime
12
- #from pypdf import PdfReader
13
  import uuid
14
- #from query import tasks
 
 
 
15
  from agent import (
16
  PREFIX,
17
- SAVE_MEMORY,
18
  COMPRESS_DATA_PROMPT,
19
  COMPRESS_DATA_PROMPT_SMALL,
20
  LOG_PROMPT,
@@ -28,12 +25,59 @@ save_data=f'https://huggingface.co/datasets/{reponame}/raw/main/'
28
  #token_self = os.environ['HF_TOKEN']
29
  #api=HfApi(token=token_self)
30
 
31
-
 
 
 
32
 
33
  VERBOSE = True
34
  MAX_HISTORY = 100
35
  MAX_DATA = 20000
36
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
37
  def format_prompt(message, history):
38
  prompt = "<s>"
39
  for user_prompt, bot_response in history:
@@ -231,7 +275,16 @@ def clear_fn():
231
 
232
  with gr.Blocks() as app:
233
  gr.HTML("""<center><h1>Mixtral 8x7B TLDR Summarizer + Web</h1><h3>Summarize Data of unlimited length</h3>""")
 
 
 
 
 
 
 
 
234
  chatbot = gr.Chatbot(label="Mixtral 8x7B Chatbot",show_copy_button=True)
 
235
  with gr.Row():
236
  with gr.Column(scale=3):
237
  prompt=gr.Textbox(label = "Instructions (optional)")
@@ -245,21 +298,16 @@ with gr.Blocks() as app:
245
  stop_button=gr.Button("Stop")
246
  clear_btn = gr.Button("Clear")
247
  with gr.Row():
248
- with gr.Tab("Text"):
249
- data=gr.Textbox(label="Input Data (paste text)", lines=6)
250
- with gr.Tab("File"):
251
- file=gr.Files(label="Input File(s) (.pdf .txt)")
252
- with gr.Tab("Raw HTML"):
253
- url = gr.Textbox(label="URL")
254
- with gr.Tab("PDF URL"):
255
- pdf_url = gr.Textbox(label="PDF URL")
256
- with gr.Tab("PDF Batch"):
257
- pdf_batch = gr.Textbox(label="PDF URL Batch (comma separated)")
258
  json_out=gr.JSON()
259
  e_box=gr.Textbox()
260
  #text=gr.JSON()
261
  #inp_query.change(search_models,inp_query,models_dd)
 
 
 
262
  clear_btn.click(clear_fn,None,[prompt,chatbot])
263
- go=button.click(summarize,[prompt,chatbot,report_check,sum_mem_check,data,file,url,pdf_url,pdf_batch],[prompt,chatbot,e_box,json_out])
264
  stop_button.click(None,None,None,cancels=[go])
265
  app.queue(default_concurrency_limit=20).launch(show_api=False)
 
1
  import gradio as gr
 
2
  import requests
 
 
3
  import os
 
4
  from huggingface_hub import InferenceClient,HfApi
5
  import random
6
  import json
7
  import datetime
 
8
  import uuid
9
+ import yt_dlp
10
+ import cv2
11
+ import whisper
12
+
13
  from agent import (
14
  PREFIX,
 
15
  COMPRESS_DATA_PROMPT,
16
  COMPRESS_DATA_PROMPT_SMALL,
17
  LOG_PROMPT,
 
25
  #token_self = os.environ['HF_TOKEN']
26
  #api=HfApi(token=token_self)
27
 
28
+ sizes = list(whisper._MODELS.keys())
29
+ langs = ["none"] + sorted(list(whisper.tokenizer.LANGUAGES.values()))
30
+ current_size = "base"
31
+ loaded_model = whisper.load_model(current_size)
32
 
33
  VERBOSE = True
34
  MAX_HISTORY = 100
35
  MAX_DATA = 20000
36
 
37
+ def dl(inp,img):
38
+ fps="Error"
39
+ out = None
40
+ out_file=[]
41
+ if img == None and inp !="":
42
+ try:
43
+ inp_out=inp.replace("https://","")
44
+ inp_out=inp_out.replace("/","_").replace(".","_").replace("=","_").replace("?","_")
45
+ if "twitter" in inp:
46
+ os.system(f'yt-dlp "{inp}" --extractor-arg "twitter:api=syndication" --trim-filenames 160 -o "{uid}/{inp_out}.mp4" -S res,mp4 --recode mp4')
47
+ else:
48
+ os.system(f'yt-dlp "{inp}" --trim-filenames 160 -o "{uid}/{inp_out}.mp4" -S res,mp4 --recode mp4')
49
+
50
+ out = f"{uid}/{inp_out}.mp4"
51
+ capture = cv2.VideoCapture(out)
52
+ fps = capture.get(cv2.CAP_PROP_FPS)
53
+ capture.release()
54
+ except Exception as e:
55
+ print(e)
56
+ out = None
57
+ elif img !=None and inp == "":
58
+ capture = cv2.VideoCapture(img)
59
+ fps = capture.get(cv2.CAP_PROP_FPS)
60
+ capture.release()
61
+ out = f"{img}"
62
+ return out
63
+
64
+ def csv(segments):
65
+ output = ""
66
+ for segment in segments:
67
+ output += f"{segment['start']},{segment['end']},{segment['text']}\n"
68
+ return output
69
+ def transcribe(path,lang,size):
70
+ if size != current_size:
71
+ loaded_model = whisper.load_model(size)
72
+ current_size = size
73
+ results = loaded_model.transcribe(path, language=lang)
74
+ subs = ".csv"
75
+ if subs == "None":
76
+ return results["text"]
77
+ elif subs == ".csv":
78
+ return csv(results["segments"])
79
+
80
+
81
  def format_prompt(message, history):
82
  prompt = "<s>"
83
  for user_prompt, bot_response in history:
 
275
 
276
  with gr.Blocks() as app:
277
  gr.HTML("""<center><h1>Mixtral 8x7B TLDR Summarizer + Web</h1><h3>Summarize Data of unlimited length</h3>""")
278
+ with gr.Row():
279
+ with gr.Column():
280
+ with gr.Row():
281
+ inp_url = gr.Textbox(label="Video URL")
282
+ url_btn = gr.Button("Load Video")
283
+ vid = gr.Video()
284
+ trans_btn=gr.Button("Transcribe")
285
+ trans = gr.Textbox()
286
  chatbot = gr.Chatbot(label="Mixtral 8x7B Chatbot",show_copy_button=True)
287
+
288
  with gr.Row():
289
  with gr.Column(scale=3):
290
  prompt=gr.Textbox(label = "Instructions (optional)")
 
298
  stop_button=gr.Button("Stop")
299
  clear_btn = gr.Button("Clear")
300
  with gr.Row():
301
+ sz = gr.Dropdown(label="Model Size", choices=sizes, value='base')
302
+ lang = gr.Dropdown(label="Language (Optional)", choices=langs, value="none")
 
 
 
 
 
 
 
 
303
  json_out=gr.JSON()
304
  e_box=gr.Textbox()
305
  #text=gr.JSON()
306
  #inp_query.change(search_models,inp_query,models_dd)
307
+
308
+ url_btn.click(dl,[inp_url,vid],vid)
309
+ trans_btn.click(transcribe,[vid,lang,sz],trans)
310
  clear_btn.click(clear_fn,None,[prompt,chatbot])
311
+ go=button.click(summarize,[prompt,chatbot,report_check,sum_mem_check],[prompt,chatbot,e_box,json_out])
312
  stop_button.click(None,None,None,cancels=[go])
313
  app.queue(default_concurrency_limit=20).launch(show_api=False)