Spaces:

filipzawadka
/

polish_whisper

Runtime error

App Files Files Community

polish_whisper / app.py

filipzawadka

update

830c50a 7 months ago

raw

history blame

No virus

3.62 kB

	import gradio as gr
	from transformers import pipeline
	import numpy as np
	import requests
	import subprocess
	import os
	import urllib.parse

	term = 9

	transcriber = pipeline("automatic-speech-recognition", model="filipzawadka/whisper-small-pl-2")
	#transcriber = pipeline("automatic-speech-recognition", model="openai/whisper-small.en")

	def offset_time(link, start_offset, clip_length):

	# Parse the URL
	parsed_url = urllib.parse.urlparse(link)
	query_params = urllib.parse.parse_qs(parsed_url.query)

	# Change the start and stop time by a set number
	# For example, adding 1000 to both start and stop times
	start_time = int(query_params['startTime'][0]) + start_offset
	stop_time = start_time + clip_length

	# Rebuild the query with the new times
	new_query_params = {'startTime': [str(start_time)], 'stopTime': [str(stop_time)]}
	new_query = urllib.parse.urlencode(new_query_params, doseq=True)

	# Rebuild the entire URL
	return urllib.parse.urlunparse((
	parsed_url.scheme,
	parsed_url.netloc,
	parsed_url.path,
	parsed_url.params,
	new_query,
	parsed_url.fragment
	))

	def get_sejm_videos(term):
	# Replace 'term9' with the desired term
	url = f"https://api.sejm.gov.pl/sejm/term{term}/videos"

	# Send a GET request to the API
	response = requests.get(url)

	# Check if the request was successful
	if response.status_code == 200:
	return response.json()
	else:
	return f"Error: {response.status_code}"

	def get_today_sejm_videos(term):
	# Replace 'term9' with the desired term
	url = f"https://api.sejm.gov.pl/sejm/term{term}/videos/today"

	# Send a GET request to the API
	response = requests.get(url)

	# Check if the request was successful
	if response.status_code == 200:
	return response.json()
	else:
	return f"Error: {response.status_code}"
	def get_sejm_videos(term, since=None, till=None, title=None, video_type=None, comm=None):
	base_url = f"https://api.sejm.gov.pl/sejm/term{term}/videos"
	params = {}

	if since:
	params['since'] = since
	if till:
	params['till'] = till
	if title:
	params['title'] = title
	if video_type:
	params['type'] = video_type
	if comm:
	params['comm'] = comm

	response = requests.get(base_url, params=params)
	if response.status_code == 200:
	return response.json()
	else:
	return f"Error: {response.status_code}"

	def download_video(video_url, video_path):
	response = requests.get(video_url)
	if response.status_code == 200:
	with open(video_path, 'wb') as file:
	file.write(response.content)
	return True
	else:
	print(f"Error downloading video: {response.status_code}")
	return False

	def extract_audio(video_path, audio_path):
	command = ['ffmpeg', '-i', video_path, '-q:a', '0', '-map', 'a', audio_path, '-y']
	subprocess.run(command)
	if os.path.exists(audio_path):
	print("Audio extracted successfully.")
	else:
	print("Error extracting audio.")

	# 600000,10000

	def transcribe(num1,num2):

	videos = get_sejm_videos(term)
	print(videos[0]['videoLink'])

	if download_video(offset_time(videos[0]['videoLink'],num1,num2), "./video.mp4"):
	#extract_audio("./video.mov", "./audio.mp3")
	return transcriber("./video.mp4")["text"]


	demo = gr.Interface(
	fn=transcribe,
	#inputs=gr.Audio(type="filepath"),
	inputs=[gr.Number(label="Number 1"), gr.Number(label="Number 2")],
	outputs="text",
	)

	demo.launch()