Spaces:

filipzawadka
/

polish_whisper

Runtime error

App Files Files Community

polish_whisper / app.py

filipzawadka

sejm

f583bdc 11 months ago

raw

history blame

3.61 kB

	import gradio as gr
	from transformers import pipeline
	import numpy as np
	import requests
	import subprocess
	import os
	import urllib.parse

	term = 9

	transcriber = pipeline("automatic-speech-recognition", model="filipzawadka/whisper-small-pl-2")
	#transcriber = pipeline("automatic-speech-recognition", model="openai/whisper-small.en")

	def offset_time(link, start_offset, clip_length):

	# Parse the URL
	parsed_url = urllib.parse.urlparse(link)
	query_params = urllib.parse.parse_qs(parsed_url.query)

	# Change the start and stop time by a set number
	# For example, adding 1000 to both start and stop times
	start_time = int(query_params['startTime'][0]) + start_offset
	stop_time = start_time + clip_length

	# Rebuild the query with the new times
	new_query_params = {'startTime': [str(start_time)], 'stopTime': [str(stop_time)]}
	new_query = urllib.parse.urlencode(new_query_params, doseq=True)

	# Rebuild the entire URL
	return urllib.parse.urlunparse((
	parsed_url.scheme,
	parsed_url.netloc,
	parsed_url.path,
	parsed_url.params,
	new_query,
	parsed_url.fragment
	))

	def get_sejm_videos(term):
	# Replace 'term9' with the desired term
	url = f"https://api.sejm.gov.pl/sejm/term{term}/videos"

	# Send a GET request to the API
	response = requests.get(url)

	# Check if the request was successful
	if response.status_code == 200:
	return response.json()
	else:
	return f"Error: {response.status_code}"

	def get_today_sejm_videos(term):
	# Replace 'term9' with the desired term
	url = f"https://api.sejm.gov.pl/sejm/term{term}/videos/today"

	# Send a GET request to the API
	response = requests.get(url)

	# Check if the request was successful
	if response.status_code == 200:
	return response.json()
	else:
	return f"Error: {response.status_code}"
	def get_sejm_videos(term, since=None, till=None, title=None, video_type=None, comm=None):
	base_url = f"https://api.sejm.gov.pl/sejm/term{term}/videos"
	params = {}

	if since:
	params['since'] = since
	if till:
	params['till'] = till
	if title:
	params['title'] = title
	if video_type:
	params['type'] = video_type
	if comm:
	params['comm'] = comm

	response = requests.get(base_url, params=params)
	if response.status_code == 200:
	return response.json()
	else:
	return f"Error: {response.status_code}"

	def download_video(video_url, video_path):
	response = requests.get(video_url)
	if response.status_code == 200:
	with open(video_path, 'wb') as file:
	file.write(response.content)
	return True
	else:
	print(f"Error downloading video: {response.status_code}")
	return False

	def extract_audio(video_path, audio_path):
	command = ['ffmpeg', '-i', video_path, '-q:a', '0', '-map', 'a', audio_path, '-y']
	subprocess.run(command)
	if os.path.exists(audio_path):
	print("Audio extracted successfully.")
	else:
	print("Error extracting audio.")

	# 600000,10000

	def transcribe(num1,num2):

	videos = get_sejm_videos(term)

	if download_video(offset_time(videos[0]['videoLink'],num1,num2), "./video.mp4"):
	extract_audio("./video.mp4", "./audio.mp3")
	print("./audio.mp3")
	return transcriber("./audio.mp3")["text"]


	demo = gr.Interface(
	fn=transcribe,
	#inputs=gr.Audio(type="filepath"),
	inputs=[gr.Number(label="Number 1"), gr.Number(label="Number 2")],
	outputs="text",
	)

	demo.launch()