Spaces:
Runtime error
Runtime error
import gradio as gr | |
from transformers import pipeline | |
import numpy as np | |
import requests | |
import subprocess | |
import os | |
import urllib.parse | |
term = 9 | |
transcriber = pipeline("automatic-speech-recognition", model="filipzawadka/whisper-small-pl-2") | |
#transcriber = pipeline("automatic-speech-recognition", model="openai/whisper-small.en") | |
def offset_time(link, start_offset, clip_length): | |
# Parse the URL | |
parsed_url = urllib.parse.urlparse(link) | |
query_params = urllib.parse.parse_qs(parsed_url.query) | |
# Change the start and stop time by a set number | |
# For example, adding 1000 to both start and stop times | |
start_time = int(query_params['startTime'][0]) + start_offset | |
stop_time = start_time + clip_length | |
# Rebuild the query with the new times | |
new_query_params = {'startTime': [str(start_time)], 'stopTime': [str(stop_time)]} | |
new_query = urllib.parse.urlencode(new_query_params, doseq=True) | |
# Rebuild the entire URL | |
return urllib.parse.urlunparse(( | |
parsed_url.scheme, | |
parsed_url.netloc, | |
parsed_url.path, | |
parsed_url.params, | |
new_query, | |
parsed_url.fragment | |
)) | |
def get_sejm_videos(term): | |
# Replace 'term9' with the desired term | |
url = f"https://api.sejm.gov.pl/sejm/term{term}/videos" | |
# Send a GET request to the API | |
response = requests.get(url) | |
# Check if the request was successful | |
if response.status_code == 200: | |
return response.json() | |
else: | |
return f"Error: {response.status_code}" | |
def get_today_sejm_videos(term): | |
# Replace 'term9' with the desired term | |
url = f"https://api.sejm.gov.pl/sejm/term{term}/videos/today" | |
# Send a GET request to the API | |
response = requests.get(url) | |
# Check if the request was successful | |
if response.status_code == 200: | |
return response.json() | |
else: | |
return f"Error: {response.status_code}" | |
def get_sejm_videos(term, since=None, till=None, title=None, video_type=None, comm=None): | |
base_url = f"https://api.sejm.gov.pl/sejm/term{term}/videos" | |
params = {} | |
if since: | |
params['since'] = since | |
if till: | |
params['till'] = till | |
if title: | |
params['title'] = title | |
if video_type: | |
params['type'] = video_type | |
if comm: | |
params['comm'] = comm | |
response = requests.get(base_url, params=params) | |
if response.status_code == 200: | |
return response.json() | |
else: | |
return f"Error: {response.status_code}" | |
def download_video(video_url, video_path): | |
response = requests.get(video_url) | |
if response.status_code == 200: | |
with open(video_path, 'wb') as file: | |
file.write(response.content) | |
return True | |
else: | |
print(f"Error downloading video: {response.status_code}") | |
return False | |
def extract_audio(video_path, audio_path): | |
command = ['ffmpeg', '-i', video_path, '-q:a', '0', '-map', 'a', audio_path, '-y'] | |
subprocess.run(command) | |
if os.path.exists(audio_path): | |
print("Audio extracted successfully.") | |
else: | |
print("Error extracting audio.") | |
# 600000,10000 | |
def transcribe(num1,num2): | |
videos = get_sejm_videos(term) | |
if download_video(offset_time(videos[0]['videoLink'],num1,num2), "./video.mp4"): | |
extract_audio("./video.mp4", "./audio.mp3") | |
print("./audio.mp3") | |
return transcriber("./audio.mp3")["text"] | |
demo = gr.Interface( | |
fn=transcribe, | |
#inputs=gr.Audio(type="filepath"), | |
inputs=[gr.Number(label="Number 1"), gr.Number(label="Number 2")], | |
outputs="text", | |
) | |
demo.launch() | |