Spaces:

filipzawadka
/

polish_whisper

Runtime error

App Files Files Community

polish_whisper / app.py

filipzawadka

fix

e7244b3 7 months ago

raw

history blame contribute delete

No virus

5.25 kB

	import gradio as gr
	from transformers import pipeline
	import requests
	import subprocess
	import os
	import urllib.parse

	term = 9

	transcriber = pipeline("automatic-speech-recognition", model="filipzawadka/whisper-small-pl-2")
	#transcriber = pipeline("automatic-speech-recognition", model="openai/whisper-small.en")

	def offset_time(link, start_offset, clip_length):

	# Parse the URL
	parsed_url = urllib.parse.urlparse(link)
	query_params = urllib.parse.parse_qs(parsed_url.query)

	# Change the start and stop time by a set number
	# For example, adding 1000 to both start and stop times
	start_time = int(query_params['startTime'][0]) + start_offset
	stop_time = start_time + clip_length

	# Rebuild the query with the new times
	new_query_params = {'startTime': [str(start_time)], 'stopTime': [str(stop_time)]}
	new_query = urllib.parse.urlencode(new_query_params, doseq=True)

	# Rebuild the entire URL
	return urllib.parse.urlunparse((
	parsed_url.scheme,
	parsed_url.netloc,
	parsed_url.path,
	parsed_url.params,
	new_query,
	parsed_url.fragment
	))

	def get_sejm_videos(term):
	# Replace 'term9' with the desired term
	url = f"https://api.sejm.gov.pl/sejm/term{term}/videos"

	# Send a GET request to the API
	response = requests.get(url)

	# Check if the request was successful
	if response.status_code == 200:
	return response.json()
	else:
	return f"Error: {response.status_code}"

	def get_today_sejm_videos(term):
	# Replace 'term9' with the desired term
	url = f"https://api.sejm.gov.pl/sejm/term{term}/videos/today"

	# Send a GET request to the API
	response = requests.get(url)

	# Check if the request was successful
	if response.status_code == 200:
	return response.json()
	else:
	return f"Error: {response.status_code}"
	def get_sejm_videos(term, since=None, till=None, title=None, video_type=None, comm=None):
	base_url = f"https://api.sejm.gov.pl/sejm/term{term}/videos"
	params = {}

	if since:
	params['since'] = since
	if till:
	params['till'] = till
	if title:
	params['title'] = title
	if video_type:
	params['type'] = video_type
	if comm:
	params['comm'] = comm

	response = requests.get(base_url, params=params)
	if response.status_code == 200:
	return response.json()
	else:
	return f"Error: {response.status_code}"

	def download_video(video_url, video_path):
	response = requests.get(video_url)
	if response.status_code == 200:
	with open(video_path, 'wb') as file:
	file.write(response.content)
	return True
	else:
	print(f"Error downloading video: {response.status_code}")
	return False

	def extract_audio(video_path, audio_path):
	command = ['ffmpeg', '-i', video_path, '-q:a', '0', '-map', 'a', audio_path, '-y']
	subprocess.run(command)
	if os.path.exists(audio_path):
	print("Audio extracted successfully.")
	else:
	print("Error extracting audio.")

	def download_and_extract_audio(video_link, start_time, end_time, audio_output):
	video_path = "temp_video.mp4"

	# Downloading video
	response = requests.get(video_link)
	if response.status_code == 200:
	with open(video_path, 'wb') as file:
	file.write(response.content)
	else:
	print(f"Error downloading video: {response.status_code}")
	return

	# Extracting audio
	command = [
	'ffmpeg', '-i', video_path, '-ss', start_time, '-to', end_time,
	'-q:a', '0', '-map', 'a', audio_output, '-y'
	]
	subprocess.run(command)

	# Clean up downloaded video
	if os.path.exists(video_path):
	os.remove(video_path)

	if os.path.exists(audio_output):
	print("Audio extracted successfully.")
	else:
	print("Error extracting audio.")

	# 600000,10000

	def transcribe(num1,num2):

	num1,num2 = int(num1),int(num2)

	videos = get_sejm_videos(term)
	print(videos[0]['videoLink'])

	start_time = "00:00:00" # Start time of the segment
	end_time = "01:00:00" # End time of the segment
	audio_output = "sejm_audio.mp3"

	print(offset_time(videos[0]['videoLink'],num1,num2))

	download_and_extract_audio(offset_time(videos[0]['videoLink'],num1,num2), start_time, end_time, audio_output)

	return transcriber(audio_output)["text"]

	# if download_video(offset_time(videos[0]['videoLink'],num1,num2), "./video.mov"):
	# extract_audio("./video.mov", "./audio.mp3")
	# # Load your MP4 file
	#
	# #ff = FFmpeg(
	# # inputs={'./video.mp4': None},
	# # outputs={'./audio.mp3': None}
	# #)
	# #ff.run()
	# #video = VideoFileClip("./video.mp4")
	#
	# ## Extract the audio from the video
	# #audio = video.audio
	#
	# ## Write the audio to an MP3 file
	# #audio.write_audiofile("./audio.mp3")
	# return transcriber(audio_output)["text"]


	demo = gr.Interface(
	fn=transcribe,
	#inputs=gr.Audio(type="filepath"),
	inputs=[gr.Number(label="Number 1"), gr.Number(label="Number 2")],
	outputs="text",
	)

	demo.launch()