Spaces:
Runtime error
Runtime error
Create app.py
Browse files
app.py
ADDED
@@ -0,0 +1,163 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import gradio as gr
|
2 |
+
import numpy as np
|
3 |
+
import librosa
|
4 |
+
import soundfile as sf
|
5 |
+
import requests
|
6 |
+
import torch
|
7 |
+
import torchaudio
|
8 |
+
import math
|
9 |
+
import os
|
10 |
+
from glob import glob
|
11 |
+
from pytube import YouTube
|
12 |
+
from transformers import (
|
13 |
+
Wav2Vec2CTCTokenizer,
|
14 |
+
Wav2Vec2FeatureExtractor,
|
15 |
+
Wav2Vec2Processor,
|
16 |
+
Wav2Vec2ForCTC,
|
17 |
+
TrainingArguments,
|
18 |
+
Trainer,
|
19 |
+
pipeline
|
20 |
+
)
|
21 |
+
processor = Wav2Vec2Processor.from_pretrained("airesearch/wav2vec2-large-xlsr-53-th")
|
22 |
+
model = Wav2Vec2ForCTC.from_pretrained("BALAKA/wav2vec2-large-xlsr-53-thai")
|
23 |
+
|
24 |
+
demo = gr.Blocks()
|
25 |
+
|
26 |
+
|
27 |
+
def check(sentence):
|
28 |
+
found = []
|
29 |
+
negative = ["กระดอ", "กระทิง", "กระสัน", "กระหรี่", "กรีด", "กวนส้นตีน", "กะหรี่", "กินขี้ปี้เยี่ยว", "ขายตัว", "ขี้", "ขโมย", "ข่มขืน", "ควย", "ควาย", "คอขาด", "ฆ่า", "จังไร", "จัญไร", "ฉิบหาย", "ฉี่", "ชั่ว", "ชาติหมา", "ชิงหมาเกิด", "ชิบหาย", "ช้างเย็ด", "ดาก", "ตอแหล", "ตัดหัว", "ตัดหำ", "ตาย", "ตีกัน", "ทรมาน", "ทาส", "ทุเรศ", "นรก", "บีบคอ", "ปากหมา", "ปี้กัน", "พ่อง", "พ่อมึง", "ฟักยู", "ฟาย", "ยัดแม่", "ยิงกัน", "ระยำ", "ดอกทอง", "โสเภณี", "ล่อกัน", "ศพ", "สถุล",
|
30 |
+
"สทุน", "สัด", "สันดาน", "สัส", "สาด", "ส้นตีน", "หน้าตัวเมืย", "ส้นตีน", "หมอย", "หรรม", "หัวแตก", "หำ", "หน้าหี", "น่าหี", "อนาจาร", "อัปปรี", "อีช้าง", "อีปลาวาฬ", "อีสัด", "อีหน้าหี", "อีหมา", "ห่า", "อับปรี", "เฆี่ยน", "เงี่ยน", "เจี๊ยว", "เชี่ย", "เด้า", "เผด็จการ", "เยี่ยว", "เย็ด", "เลือด", "เสือก", "เหล้า", "เหี้ย", "เอากัน", "แดก", "แตด", "แทง", "แม่ง", "แม่มึง", "แรด", "โคตร", "โง่", "โป๊", "โรคจิต", "ใจหมา", "ไอเข้", "ไอ้ขึ้หมา", "ไอ้บ้า", "ไอ้หมา", "เวร", "เวน"]
|
31 |
+
negative = list(dict.fromkeys(negative))
|
32 |
+
for i in negative:
|
33 |
+
if sentence.find(i) != -1:
|
34 |
+
found.append(i)
|
35 |
+
return found
|
36 |
+
|
37 |
+
|
38 |
+
def resample(file_path):
|
39 |
+
speech_array, sampling_rate = torchaudio.load(file_path)
|
40 |
+
resampler = torchaudio.transforms.Resample(sampling_rate, 16000)
|
41 |
+
return resampler(speech_array)[0].numpy()
|
42 |
+
|
43 |
+
|
44 |
+
def tran_script(file_path):
|
45 |
+
if type(file_path) == 'str':
|
46 |
+
speech = resample(file_path)
|
47 |
+
inputs = processor(speech, sampling_rate=16_000,
|
48 |
+
return_tensors="pt", padding=True)
|
49 |
+
logits = model(inputs.input_values).logits
|
50 |
+
predicted_ids = torch.argmax(logits, dim=-1)
|
51 |
+
predicted_sentence = processor.batch_decode(predicted_ids)
|
52 |
+
return predicted_sentence
|
53 |
+
else:
|
54 |
+
now_path = glob('/content/split_*.mp3')
|
55 |
+
sentence = []
|
56 |
+
for i in range(file_path - 1):
|
57 |
+
now_path = f'/content/split_{i+1}.mp3'
|
58 |
+
speech = resample(now_path)
|
59 |
+
inputs = processor(speech, sampling_rate=16_000,
|
60 |
+
return_tensors="pt", padding=True)
|
61 |
+
logits = model(inputs.input_values).logits
|
62 |
+
predicted_ids = torch.argmax(logits, dim=-1)
|
63 |
+
predicted_sentence = processor.batch_decode(predicted_ids)
|
64 |
+
sentence.append(predicted_sentence)
|
65 |
+
return sentence
|
66 |
+
|
67 |
+
|
68 |
+
def split_file(file_path):
|
69 |
+
speech, sample_rate = librosa.load(file_path)
|
70 |
+
buffer = 5 * sample_rate
|
71 |
+
samples_total = len(speech)
|
72 |
+
samples_wrote = 0
|
73 |
+
counter = 1
|
74 |
+
|
75 |
+
while samples_wrote < samples_total:
|
76 |
+
|
77 |
+
if buffer > (samples_total - samples_wrote):
|
78 |
+
buffer = samples_total - samples_wrote
|
79 |
+
|
80 |
+
block = speech[samples_wrote: (samples_wrote + buffer)]
|
81 |
+
out_filename = "split_" + str(counter) + ".mp3"
|
82 |
+
|
83 |
+
sf.write(out_filename, block, sample_rate)
|
84 |
+
counter += 1
|
85 |
+
samples_wrote += buffer
|
86 |
+
return counter
|
87 |
+
|
88 |
+
|
89 |
+
def process(file_path):
|
90 |
+
if librosa.get_duration(filename=file_path) <= 5:
|
91 |
+
sentence = tran_script(file_path)
|
92 |
+
sentence = str(sentence).replace(' ', '').strip("[]grt")
|
93 |
+
return '[0.00-0.05] found : ' + check(sentence)
|
94 |
+
counter = split_file(file_path)
|
95 |
+
sentence = tran_script(counter)
|
96 |
+
result = ''
|
97 |
+
for index, item in enumerate(sentence):
|
98 |
+
now_sentence = item[0]
|
99 |
+
now_sentence = str(item).replace(' ', '').strip("[]grt")
|
100 |
+
now_sentence = check(now_sentence)
|
101 |
+
if now_sentence:
|
102 |
+
time = (index)*5
|
103 |
+
minutes = math.floor(time / 60)
|
104 |
+
hours = math.floor(minutes/60)
|
105 |
+
seconds = time % 60
|
106 |
+
minutes = str(minutes).zfill(2)
|
107 |
+
hours = str(hours).zfill(2)
|
108 |
+
fist_seconds = str(seconds).zfill(2)
|
109 |
+
last_seconds = str(seconds+5).zfill(2)
|
110 |
+
text = f'found at {hours}h {minutes}m {fist_seconds}-{last_seconds}seconds found {now_sentence}'
|
111 |
+
result += text + '\n'
|
112 |
+
return result
|
113 |
+
|
114 |
+
|
115 |
+
def youtube_loader(link):
|
116 |
+
yt = YouTube(str(link))
|
117 |
+
video = yt.streams.filter(only_audio=True).first()
|
118 |
+
out_file = video.download(output_path='mp3')
|
119 |
+
os.rename(out_file, 'youtube.mp3')
|
120 |
+
return process('/content/mp3/youtube.mp3')
|
121 |
+
|
122 |
+
|
123 |
+
def twitch_loader(link):
|
124 |
+
os.system(f"twitch-dl download -q audio_only {link} --output twitch.wav")
|
125 |
+
return process('/content/twitch.wav')
|
126 |
+
|
127 |
+
|
128 |
+
with demo:
|
129 |
+
gr.Markdown("Select your input type.")
|
130 |
+
with gr.Tabs():
|
131 |
+
with gr.TabItem("From your voice."):
|
132 |
+
with gr.Row():
|
133 |
+
voice = gr.Audio(source="microphone", type="filepath",
|
134 |
+
optional=True, labe="Start record your voice here.")
|
135 |
+
voice_output = gr.Textbox()
|
136 |
+
text_button1 = gr.Button("Flip")
|
137 |
+
with gr.TabItem("From your file."):
|
138 |
+
with gr.Row():
|
139 |
+
file_input = gr.Audio(type="filepath", optional=True, labe="Drop your audio file here.")
|
140 |
+
file_output = gr.Textbox()
|
141 |
+
text_button4 = gr.Button("Flip")
|
142 |
+
with gr.TabItem("From youtube"):
|
143 |
+
with gr.Row():
|
144 |
+
youtube_input = gr.Textbox(
|
145 |
+
label="Insert your youtube link here.", placeholder='https://www.youtube.com/watch?v=dQw4w9WgXcQ')
|
146 |
+
youtube_output = gr.Textbox()
|
147 |
+
text_button2 = gr.Button("Flip")
|
148 |
+
with gr.TabItem("From twitch"):
|
149 |
+
with gr.Row():
|
150 |
+
twitch_input = gr.Textbox(label="Insert your twitch link or ID here.",
|
151 |
+
placeholder='https://www.twitch.tv/videos/1823056925 or 1823056925')
|
152 |
+
twitch_output = gr.Textbox()
|
153 |
+
text_button3 = gr.Button("Flip")
|
154 |
+
|
155 |
+
text_button1.click(process, inputs=voice, outputs=voice_output)
|
156 |
+
text_button2.click(youtube_loader, inputs=youtube_input,
|
157 |
+
outputs=youtube_output)
|
158 |
+
text_button3.click(twitch_loader, inputs=twitch_input,
|
159 |
+
outputs=twitch_output)
|
160 |
+
text_button4.click(process, inputs=file_input,
|
161 |
+
outputs=file_output)
|
162 |
+
|
163 |
+
demo.launch(share=True, enable_queue=True)
|