Spaces:
Sleeping
Sleeping
from transformers import pipeline | |
import gradio as gr | |
from pytube import YouTube | |
import os | |
import openai | |
model = pipeline(model="SofiaK/dataset") | |
openai_api_key = os.getenv("OPENAI_API_KEY") | |
# Check if the API key is available | |
if openai_api_key is None: | |
raise ValueError( | |
"OpenAI API key is not set. Make sure to set it as a secret variable in Hugging Face Spaces." | |
) | |
openai.api_key = openai_api_key | |
def youtube_to_text(youtube_url): | |
video = YouTube(youtube_url).streams.filter(only_audio=True).all() | |
audio = video[0].download() | |
text = model(audio)["text"] | |
return text | |
with gr.Blocks() as demo: | |
with gr.Row(): | |
gr.HTML( | |
value="<h1 style='text-align: center;'>Speech-To-Text transcription for Russian Language</h1>" | |
) | |
with gr.Row(): | |
with gr.Column(): | |
radio = gr.Radio( | |
choices=["Audio", "Youtube"], | |
label="Choose your input type: an audio or a youtube link", | |
value="Audio", | |
) | |
audio_input = gr.Audio( | |
sources=["upload", "microphone"], type="filepath", visible=True | |
) | |
youtube_input = gr.Textbox( | |
value="https://www.youtube.com/", label="Youtube Link", visible=False | |
) | |
btn = gr.Button("Transcript") | |
with gr.Column(): | |
output = gr.Text(label="Model Output") | |
chatbot = gr.Chatbot() | |
msg = gr.Textbox(label="Ask a question!") | |
def make_visible(val): | |
audio_visible = val == "Audio" | |
return { | |
audio_input: {"visible": audio_visible, "__type__": "update"}, | |
youtube_input: {"visible": not audio_visible, "__type__": "update"}, | |
} | |
history_gpt = [] | |
def respond(msg, chat_history): | |
history_gpt.append({"role": "user", "content": msg}) | |
response = openai.chat.completions.create( | |
model="gpt-3.5-turbo", messages=history_gpt | |
) | |
history_gpt.append( | |
{"role": "assistant", "content": response.choices[0].message.content} | |
) | |
chat_history.append((msg, response.choices[0].message.content)) | |
return "", chat_history | |
radio.change(make_visible, inputs=radio, outputs=[audio_input, youtube_input]) | |
msg.submit(respond, [msg, chatbot], [msg, chatbot]) | |
def transcript(audio_input, youtube_input, radio): | |
if radio == "Audio": | |
txt = model(audio_input)["text"] | |
else: | |
txt = youtube_to_text(youtube_input) | |
history_gpt.append( | |
{ | |
"role": "system", | |
"content": "Here is a text in Russian that was transcripted from an audio or a video. The user will ask questions about this text such as to translate it to another language, to summarize it, or to get relevant information. By default respond in english, apart if the user tells you otherwise. Here is the text" | |
+ txt, | |
} | |
) | |
return txt | |
btn.click( | |
fn=transcript, | |
inputs=[audio_input, youtube_input, radio], | |
outputs=output, | |
) | |
demo.launch() | |