Spaces:
Sleeping
Sleeping
yellowcandle
commited on
Commit
•
4b18df1
1
Parent(s):
344a72e
Tried to add youtube video upload
Browse files
app.py
CHANGED
@@ -1,9 +1,33 @@
|
|
1 |
import spaces
|
2 |
import gradio as gr
|
3 |
-
|
|
|
|
|
4 |
import torch
|
5 |
from transformers import AutoModelForSpeechSeq2Seq, AutoProcessor, pipeline, AutoModelForCausalLM, AutoTokenizer
|
6 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
7 |
@spaces.GPU(duration=60)
|
8 |
def transcribe_audio(audio, model_id):
|
9 |
if audio is None:
|
@@ -36,7 +60,6 @@ def transcribe_audio(audio, model_id):
|
|
36 |
result = pipe(audio)
|
37 |
return result["text"]
|
38 |
|
39 |
-
# @spaces.GPU(duration=180)
|
40 |
def proofread(text):
|
41 |
if text is None:
|
42 |
return "Please provide the transcribed text for proofreading."
|
@@ -50,15 +73,13 @@ def proofread(text):
|
|
50 |
tokenizer = AutoTokenizer.from_pretrained("hfl/llama-3-chinese-8b-instruct-v3")
|
51 |
model.to(device)
|
52 |
|
53 |
-
# Perform proofreading using the model
|
54 |
input_text = prompt + text
|
55 |
input_ids = tokenizer.encode(input_text, return_tensors="pt").to(device)
|
56 |
-
output = model.generate(input_ids, max_length=len(input_ids[0])+50, num_return_sequences=1, temperature=0.7)
|
57 |
proofread_text = tokenizer.decode(output[0], skip_special_tokens=True)
|
58 |
|
59 |
return proofread_text
|
60 |
|
61 |
-
|
62 |
with gr.Blocks() as demo:
|
63 |
gr.Markdown("""
|
64 |
# Audio Transcription and Proofreading
|
@@ -70,7 +91,7 @@ with gr.Blocks() as demo:
|
|
70 |
with gr.Row():
|
71 |
with gr.Column():
|
72 |
audio = gr.Audio(sources="upload", type="filepath")
|
73 |
-
|
74 |
model_dropdown = gr.Dropdown(choices=["openai/whisper-large-v3", "alvanlii/whisper-small-cantonese"], value="openai/whisper-large-v3")
|
75 |
|
76 |
transcribe_button = gr.Button("Transcribe")
|
|
|
1 |
import spaces
|
2 |
import gradio as gr
|
3 |
+
import os
|
4 |
+
import logging
|
5 |
+
from pytube import YouTube
|
6 |
import torch
|
7 |
from transformers import AutoModelForSpeechSeq2Seq, AutoProcessor, pipeline, AutoModelForCausalLM, AutoTokenizer
|
8 |
|
9 |
+
def get_text(url):
|
10 |
+
if url != '':
|
11 |
+
output_text_transcribe = ''
|
12 |
+
|
13 |
+
yt = YouTube(url)
|
14 |
+
video = yt.streams.filter(only_audio=True).first()
|
15 |
+
out_file = video.download(output_path=".")
|
16 |
+
|
17 |
+
file_stats = os.stat(out_file)
|
18 |
+
logging.info(f'Size of audio file in Bytes: {file_stats.st_size}')
|
19 |
+
|
20 |
+
if file_stats.st_size <= 30000000:
|
21 |
+
base, ext = os.path.splitext(out_file)
|
22 |
+
new_file = base + '.mp3'
|
23 |
+
os.rename(out_file, new_file)
|
24 |
+
a = new_file
|
25 |
+
|
26 |
+
result = model.transcribe(a)
|
27 |
+
return result['text'].strip()
|
28 |
+
else:
|
29 |
+
logging.error('Videos for transcription on this space are limited to about 1.5 hours. Sorry about this limit but some joker thought they could stop this tool from working by transcribing many extremely long videos. Please visit https://steve.digital to contact me about this space.')
|
30 |
+
|
31 |
@spaces.GPU(duration=60)
|
32 |
def transcribe_audio(audio, model_id):
|
33 |
if audio is None:
|
|
|
60 |
result = pipe(audio)
|
61 |
return result["text"]
|
62 |
|
|
|
63 |
def proofread(text):
|
64 |
if text is None:
|
65 |
return "Please provide the transcribed text for proofreading."
|
|
|
73 |
tokenizer = AutoTokenizer.from_pretrained("hfl/llama-3-chinese-8b-instruct-v3")
|
74 |
model.to(device)
|
75 |
|
|
|
76 |
input_text = prompt + text
|
77 |
input_ids = tokenizer.encode(input_text, return_tensors="pt").to(device)
|
78 |
+
output = model.generate(input_ids, max_length=len(input_ids[0]) + 50, num_return_sequences=1, temperature=0.7)
|
79 |
proofread_text = tokenizer.decode(output[0], skip_special_tokens=True)
|
80 |
|
81 |
return proofread_text
|
82 |
|
|
|
83 |
with gr.Blocks() as demo:
|
84 |
gr.Markdown("""
|
85 |
# Audio Transcription and Proofreading
|
|
|
91 |
with gr.Row():
|
92 |
with gr.Column():
|
93 |
audio = gr.Audio(sources="upload", type="filepath")
|
94 |
+
input_text_url = gr.Textbox(label="Video URL")
|
95 |
model_dropdown = gr.Dropdown(choices=["openai/whisper-large-v3", "alvanlii/whisper-small-cantonese"], value="openai/whisper-large-v3")
|
96 |
|
97 |
transcribe_button = gr.Button("Transcribe")
|