BALAKA commited on
Commit
cd4c5a1
·
1 Parent(s): d0fd718

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +163 -0
app.py ADDED
@@ -0,0 +1,163 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import numpy as np
3
+ import librosa
4
+ import soundfile as sf
5
+ import requests
6
+ import torch
7
+ import torchaudio
8
+ import math
9
+ import os
10
+ from glob import glob
11
+ from pytube import YouTube
12
+ from transformers import (
13
+ Wav2Vec2CTCTokenizer,
14
+ Wav2Vec2FeatureExtractor,
15
+ Wav2Vec2Processor,
16
+ Wav2Vec2ForCTC,
17
+ TrainingArguments,
18
+ Trainer,
19
+ pipeline
20
+ )
21
+ processor = Wav2Vec2Processor.from_pretrained("airesearch/wav2vec2-large-xlsr-53-th")
22
+ model = Wav2Vec2ForCTC.from_pretrained("BALAKA/wav2vec2-large-xlsr-53-thai")
23
+
24
+ demo = gr.Blocks()
25
+
26
+
27
+ def check(sentence):
28
+ found = []
29
+ negative = ["กระดอ", "กระทิง", "กระสัน", "กระหรี่", "กรีด", "กวนส้นตีน", "กะหรี่", "กินขี้ปี้เยี่ยว", "ขายตัว", "ขี้", "ขโมย", "ข่มขืน", "ควย", "ควาย", "คอขาด", "ฆ่า", "จังไร", "จัญไร", "ฉิบหาย", "ฉี่", "ชั่ว", "ชาติหมา", "ชิงหมาเกิด", "ชิบหาย", "ช้างเย็ด", "ดาก", "ตอแหล", "ตัดหัว", "ตัดหำ", "ตาย", "ตีกัน", "ทรมาน", "ทาส", "ทุเรศ", "นรก", "บีบคอ", "ปากหมา", "ปี้กัน", "พ่อง", "พ่อมึง", "ฟักยู", "ฟาย", "ยัดแม่", "ยิงกัน", "ระยำ", "ดอกทอง", "โสเภณี", "ล่อกัน", "ศพ", "สถุล",
30
+ "สทุน", "สัด", "สันดาน", "สัส", "สาด", "ส้นตีน", "หน้าตัวเมืย", "ส้นตีน", "หมอย", "หรรม", "หัวแตก", "หำ", "หน้าหี", "น่าหี", "อนาจาร", "อัปปรี", "อีช้าง", "อีปลาวาฬ", "อีสัด", "อีหน้าหี", "อีหมา", "ห่า", "อับปรี", "เฆี่ยน", "เงี่ยน", "เจี๊ยว", "เชี่ย", "เด้า", "เผด็จการ", "เยี่ยว", "เย็ด", "เลือด", "เสือก", "เหล้า", "เหี้ย", "เอากัน", "แดก", "แตด", "แทง", "แม่ง", "แม่มึง", "แรด", "โคตร", "โง่", "โป๊", "โรคจิต", "ใจหมา", "ไอเข้", "ไอ้ขึ้หมา", "ไอ้บ้า", "ไอ้หมา", "เวร", "เวน"]
31
+ negative = list(dict.fromkeys(negative))
32
+ for i in negative:
33
+ if sentence.find(i) != -1:
34
+ found.append(i)
35
+ return found
36
+
37
+
38
+ def resample(file_path):
39
+ speech_array, sampling_rate = torchaudio.load(file_path)
40
+ resampler = torchaudio.transforms.Resample(sampling_rate, 16000)
41
+ return resampler(speech_array)[0].numpy()
42
+
43
+
44
+ def tran_script(file_path):
45
+ if type(file_path) == 'str':
46
+ speech = resample(file_path)
47
+ inputs = processor(speech, sampling_rate=16_000,
48
+ return_tensors="pt", padding=True)
49
+ logits = model(inputs.input_values).logits
50
+ predicted_ids = torch.argmax(logits, dim=-1)
51
+ predicted_sentence = processor.batch_decode(predicted_ids)
52
+ return predicted_sentence
53
+ else:
54
+ now_path = glob('/content/split_*.mp3')
55
+ sentence = []
56
+ for i in range(file_path - 1):
57
+ now_path = f'/content/split_{i+1}.mp3'
58
+ speech = resample(now_path)
59
+ inputs = processor(speech, sampling_rate=16_000,
60
+ return_tensors="pt", padding=True)
61
+ logits = model(inputs.input_values).logits
62
+ predicted_ids = torch.argmax(logits, dim=-1)
63
+ predicted_sentence = processor.batch_decode(predicted_ids)
64
+ sentence.append(predicted_sentence)
65
+ return sentence
66
+
67
+
68
+ def split_file(file_path):
69
+ speech, sample_rate = librosa.load(file_path)
70
+ buffer = 5 * sample_rate
71
+ samples_total = len(speech)
72
+ samples_wrote = 0
73
+ counter = 1
74
+
75
+ while samples_wrote < samples_total:
76
+
77
+ if buffer > (samples_total - samples_wrote):
78
+ buffer = samples_total - samples_wrote
79
+
80
+ block = speech[samples_wrote: (samples_wrote + buffer)]
81
+ out_filename = "split_" + str(counter) + ".mp3"
82
+
83
+ sf.write(out_filename, block, sample_rate)
84
+ counter += 1
85
+ samples_wrote += buffer
86
+ return counter
87
+
88
+
89
+ def process(file_path):
90
+ if librosa.get_duration(filename=file_path) <= 5:
91
+ sentence = tran_script(file_path)
92
+ sentence = str(sentence).replace(' ', '').strip("[]grt")
93
+ return '[0.00-0.05] found : ' + check(sentence)
94
+ counter = split_file(file_path)
95
+ sentence = tran_script(counter)
96
+ result = ''
97
+ for index, item in enumerate(sentence):
98
+ now_sentence = item[0]
99
+ now_sentence = str(item).replace(' ', '').strip("[]grt")
100
+ now_sentence = check(now_sentence)
101
+ if now_sentence:
102
+ time = (index)*5
103
+ minutes = math.floor(time / 60)
104
+ hours = math.floor(minutes/60)
105
+ seconds = time % 60
106
+ minutes = str(minutes).zfill(2)
107
+ hours = str(hours).zfill(2)
108
+ fist_seconds = str(seconds).zfill(2)
109
+ last_seconds = str(seconds+5).zfill(2)
110
+ text = f'found at {hours}h {minutes}m {fist_seconds}-{last_seconds}seconds found {now_sentence}'
111
+ result += text + '\n'
112
+ return result
113
+
114
+
115
+ def youtube_loader(link):
116
+ yt = YouTube(str(link))
117
+ video = yt.streams.filter(only_audio=True).first()
118
+ out_file = video.download(output_path='mp3')
119
+ os.rename(out_file, 'youtube.mp3')
120
+ return process('/content/mp3/youtube.mp3')
121
+
122
+
123
+ def twitch_loader(link):
124
+ os.system(f"twitch-dl download -q audio_only {link} --output twitch.wav")
125
+ return process('/content/twitch.wav')
126
+
127
+
128
+ with demo:
129
+ gr.Markdown("Select your input type.")
130
+ with gr.Tabs():
131
+ with gr.TabItem("From your voice."):
132
+ with gr.Row():
133
+ voice = gr.Audio(source="microphone", type="filepath",
134
+ optional=True, labe="Start record your voice here.")
135
+ voice_output = gr.Textbox()
136
+ text_button1 = gr.Button("Flip")
137
+ with gr.TabItem("From your file."):
138
+ with gr.Row():
139
+ file_input = gr.Audio(type="filepath", optional=True, labe="Drop your audio file here.")
140
+ file_output = gr.Textbox()
141
+ text_button4 = gr.Button("Flip")
142
+ with gr.TabItem("From youtube"):
143
+ with gr.Row():
144
+ youtube_input = gr.Textbox(
145
+ label="Insert your youtube link here.", placeholder='https://www.youtube.com/watch?v=dQw4w9WgXcQ')
146
+ youtube_output = gr.Textbox()
147
+ text_button2 = gr.Button("Flip")
148
+ with gr.TabItem("From twitch"):
149
+ with gr.Row():
150
+ twitch_input = gr.Textbox(label="Insert your twitch link or ID here.",
151
+ placeholder='https://www.twitch.tv/videos/1823056925 or 1823056925')
152
+ twitch_output = gr.Textbox()
153
+ text_button3 = gr.Button("Flip")
154
+
155
+ text_button1.click(process, inputs=voice, outputs=voice_output)
156
+ text_button2.click(youtube_loader, inputs=youtube_input,
157
+ outputs=youtube_output)
158
+ text_button3.click(twitch_loader, inputs=twitch_input,
159
+ outputs=twitch_output)
160
+ text_button4.click(process, inputs=file_input,
161
+ outputs=file_output)
162
+
163
+ demo.launch(share=True, enable_queue=True)