Rimi98 commited on
Commit
c2024d5
1 Parent(s): 3ce5c45

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +52 -6
app.py CHANGED
@@ -2,6 +2,47 @@ import gradio as gr
2
  import onnxruntime
3
  from transformers import AutoTokenizer
4
  import torch
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5
 
6
 
7
  token = AutoTokenizer.from_pretrained('distilroberta-base')
@@ -12,16 +53,21 @@ output_name = inf_session.get_outputs()[0].name
12
 
13
  classes = ['Art', 'Astrology', 'Biology', 'Chemistry', 'Economics', 'History', 'Literature', 'Philosophy', 'Physics', 'Politics', 'Psychology', 'Sociology']
14
 
15
- def classify(vid,review):
16
- input_ids = token(review)['input_ids'][:512]
 
 
 
 
17
  logits = inf_session.run([output_name],{input_name : [input_ids]})[0]
18
  logits = torch.FloatTensor(logits)
19
  probs = torch.sigmoid(logits)[0]
20
- x = 2
21
- return dict(zip(classes,map(float,probs)))
22
 
23
- label = gr.outputs.Label(num_top_classes=5)
24
- iface = gr.Interface(fn=classify,inputs=[gr.Video(),'text'],outputs = label)
 
 
25
  iface.launch(inline=False)
26
 
27
 
 
2
  import onnxruntime
3
  from transformers import AutoTokenizer
4
  import torch
5
+ import os
6
+ from transformers import pipeline
7
+ ### --- Audio/Video to txt ---###
8
+ device = "cuda:0" if torch.cuda.is_available() else "cpu"
9
+ pipe = pipeline("automatic-speech-recognition",
10
+ model="openai/whisper-base.en",
11
+ chunk_length_s=30, device=device)
12
+
13
+ summarizer = pipeline("summarization", model="facebook/bart-large-cnn", device=device)
14
+
15
+
16
+ def video_identity(video):
17
+ transcription = pipe(video)["text"]
18
+ return transcription
19
+
20
+ def summary(text):
21
+ text = text.split('.')
22
+ max_chunk = 500
23
+ current_chunk = 0
24
+ chunks = []
25
+
26
+
27
+ for t in text:
28
+ if len(chunks) == current_chunk + 1:
29
+ if len(chunks[current_chunk]) + len(t.split(' ')) <= max_chunk:
30
+ chunks[current_chunk].extend(t.split(' '))
31
+ else:
32
+ current_chunk += 1
33
+ chunks.append(t.split(' '))
34
+ else:
35
+ chunks.append(t.split(' '))
36
+
37
+ for chunk in range(len(chunks)):
38
+ chunks[chunk] =' '.join(chunks[chunk])
39
+
40
+ summ = summarizer(chunks,max_length = 100)
41
+
42
+ return summ
43
+
44
+
45
+
46
 
47
 
48
  token = AutoTokenizer.from_pretrained('distilroberta-base')
 
53
 
54
  classes = ['Art', 'Astrology', 'Biology', 'Chemistry', 'Economics', 'History', 'Literature', 'Philosophy', 'Physics', 'Politics', 'Psychology', 'Sociology']
55
 
56
+ def classify(vid):
57
+ full_text = video_identity(vid)
58
+ sum = summary(full_text)[0]['summary_text']
59
+
60
+
61
+ input_ids = token(sum)['input_ids'][:512]
62
  logits = inf_session.run([output_name],{input_name : [input_ids]})[0]
63
  logits = torch.FloatTensor(logits)
64
  probs = torch.sigmoid(logits)[0]
65
+ return full_text, sum, dict(zip(classes,map(float,probs)))
 
66
 
67
+ # label = gr.outputs.Label(num_top_classes=5)
68
+ iface = gr.Interface(fn=classify,
69
+ inputs=gr.inputs.Audio(source="upload", type="filepath"),
70
+ outputs = ['text','text',gr.outputs.Label(num_top_classes=3)])
71
  iface.launch(inline=False)
72
 
73