import gradio as gr from transformers import pipeline import numpy as np import requests import subprocess import os import urllib.parse term = 9 transcriber = pipeline("automatic-speech-recognition", model="filipzawadka/whisper-small-pl-2") #transcriber = pipeline("automatic-speech-recognition", model="openai/whisper-small.en") def offset_time(link, start_offset, clip_length): # Parse the URL parsed_url = urllib.parse.urlparse(link) query_params = urllib.parse.parse_qs(parsed_url.query) # Change the start and stop time by a set number # For example, adding 1000 to both start and stop times start_time = int(query_params['startTime'][0]) + start_offset stop_time = start_time + clip_length # Rebuild the query with the new times new_query_params = {'startTime': [str(start_time)], 'stopTime': [str(stop_time)]} new_query = urllib.parse.urlencode(new_query_params, doseq=True) # Rebuild the entire URL return urllib.parse.urlunparse(( parsed_url.scheme, parsed_url.netloc, parsed_url.path, parsed_url.params, new_query, parsed_url.fragment )) def get_sejm_videos(term): # Replace 'term9' with the desired term url = f"https://api.sejm.gov.pl/sejm/term{term}/videos" # Send a GET request to the API response = requests.get(url) # Check if the request was successful if response.status_code == 200: return response.json() else: return f"Error: {response.status_code}" def get_today_sejm_videos(term): # Replace 'term9' with the desired term url = f"https://api.sejm.gov.pl/sejm/term{term}/videos/today" # Send a GET request to the API response = requests.get(url) # Check if the request was successful if response.status_code == 200: return response.json() else: return f"Error: {response.status_code}" def get_sejm_videos(term, since=None, till=None, title=None, video_type=None, comm=None): base_url = f"https://api.sejm.gov.pl/sejm/term{term}/videos" params = {} if since: params['since'] = since if till: params['till'] = till if title: params['title'] = title if video_type: params['type'] = video_type if comm: params['comm'] = comm response = requests.get(base_url, params=params) if response.status_code == 200: return response.json() else: return f"Error: {response.status_code}" def download_video(video_url, video_path): response = requests.get(video_url) if response.status_code == 200: with open(video_path, 'wb') as file: file.write(response.content) return True else: print(f"Error downloading video: {response.status_code}") return False def extract_audio(video_path, audio_path): command = ['ffmpeg', '-i', video_path, '-q:a', '0', '-map', 'a', audio_path, '-y'] subprocess.run(command) if os.path.exists(audio_path): print("Audio extracted successfully.") else: print("Error extracting audio.") # 600000,10000 def transcribe(num1,num2): videos = get_sejm_videos(term) print(videos[0]['videoLink']) if download_video(offset_time(videos[0]['videoLink'],num1,num2), "./video.mp4"): #extract_audio("./video.mov", "./audio.mp3") return transcriber("./video.mp4")["text"] demo = gr.Interface( fn=transcribe, #inputs=gr.Audio(type="filepath"), inputs=[gr.Number(label="Number 1"), gr.Number(label="Number 2")], outputs="text", ) demo.launch()