Spaces:
Runtime error
Runtime error
File size: 3,615 Bytes
3cc08f2 3448a7c f583bdc 3cc08f2 55c2266 6d9b516 f583bdc aee5752 39e6918 aee5752 f583bdc 3448a7c c7a64c1 3448a7c cf7f5dc f583bdc cf7f5dc c7a64c1 3cc08f2 3448a7c |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 |
import gradio as gr
from transformers import pipeline
import numpy as np
import requests
import subprocess
import os
import urllib.parse
term = 9
transcriber = pipeline("automatic-speech-recognition", model="filipzawadka/whisper-small-pl-2")
#transcriber = pipeline("automatic-speech-recognition", model="openai/whisper-small.en")
def offset_time(link, start_offset, clip_length):
# Parse the URL
parsed_url = urllib.parse.urlparse(link)
query_params = urllib.parse.parse_qs(parsed_url.query)
# Change the start and stop time by a set number
# For example, adding 1000 to both start and stop times
start_time = int(query_params['startTime'][0]) + start_offset
stop_time = start_time + clip_length
# Rebuild the query with the new times
new_query_params = {'startTime': [str(start_time)], 'stopTime': [str(stop_time)]}
new_query = urllib.parse.urlencode(new_query_params, doseq=True)
# Rebuild the entire URL
return urllib.parse.urlunparse((
parsed_url.scheme,
parsed_url.netloc,
parsed_url.path,
parsed_url.params,
new_query,
parsed_url.fragment
))
def get_sejm_videos(term):
# Replace 'term9' with the desired term
url = f"https://api.sejm.gov.pl/sejm/term{term}/videos"
# Send a GET request to the API
response = requests.get(url)
# Check if the request was successful
if response.status_code == 200:
return response.json()
else:
return f"Error: {response.status_code}"
def get_today_sejm_videos(term):
# Replace 'term9' with the desired term
url = f"https://api.sejm.gov.pl/sejm/term{term}/videos/today"
# Send a GET request to the API
response = requests.get(url)
# Check if the request was successful
if response.status_code == 200:
return response.json()
else:
return f"Error: {response.status_code}"
def get_sejm_videos(term, since=None, till=None, title=None, video_type=None, comm=None):
base_url = f"https://api.sejm.gov.pl/sejm/term{term}/videos"
params = {}
if since:
params['since'] = since
if till:
params['till'] = till
if title:
params['title'] = title
if video_type:
params['type'] = video_type
if comm:
params['comm'] = comm
response = requests.get(base_url, params=params)
if response.status_code == 200:
return response.json()
else:
return f"Error: {response.status_code}"
def download_video(video_url, video_path):
response = requests.get(video_url)
if response.status_code == 200:
with open(video_path, 'wb') as file:
file.write(response.content)
return True
else:
print(f"Error downloading video: {response.status_code}")
return False
def extract_audio(video_path, audio_path):
command = ['ffmpeg', '-i', video_path, '-q:a', '0', '-map', 'a', audio_path, '-y']
subprocess.run(command)
if os.path.exists(audio_path):
print("Audio extracted successfully.")
else:
print("Error extracting audio.")
# 600000,10000
def transcribe(num1,num2):
videos = get_sejm_videos(term)
print(videos[0]['videoLink'])
if download_video(offset_time(videos[0]['videoLink'],num1,num2), "./video.mov"):
extract_audio("./video.mov", "./audio.mp3")
return transcriber("./audio.mp3")["text"]
demo = gr.Interface(
fn=transcribe,
#inputs=gr.Audio(type="filepath"),
inputs=[gr.Number(label="Number 1"), gr.Number(label="Number 2")],
outputs="text",
)
demo.launch()
|