Spaces:
Runtime error
Runtime error
File size: 5,247 Bytes
3cc08f2 3448a7c f583bdc 5219b46 f583bdc 3cc08f2 55c2266 6d9b516 f583bdc e7244b3 f583bdc e7244b3 f583bdc aee5752 39e6918 e7244b3 481fed3 e7244b3 5219b46 e7244b3 481fed3 e7244b3 3448a7c c7a64c1 3448a7c cf7f5dc f583bdc cf7f5dc c7a64c1 3cc08f2 3448a7c |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 |
import gradio as gr
from transformers import pipeline
import requests
import subprocess
import os
import urllib.parse
term = 9
transcriber = pipeline("automatic-speech-recognition", model="filipzawadka/whisper-small-pl-2")
#transcriber = pipeline("automatic-speech-recognition", model="openai/whisper-small.en")
def offset_time(link, start_offset, clip_length):
# Parse the URL
parsed_url = urllib.parse.urlparse(link)
query_params = urllib.parse.parse_qs(parsed_url.query)
# Change the start and stop time by a set number
# For example, adding 1000 to both start and stop times
start_time = int(query_params['startTime'][0]) + start_offset
stop_time = start_time + clip_length
# Rebuild the query with the new times
new_query_params = {'startTime': [str(start_time)], 'stopTime': [str(stop_time)]}
new_query = urllib.parse.urlencode(new_query_params, doseq=True)
# Rebuild the entire URL
return urllib.parse.urlunparse((
parsed_url.scheme,
parsed_url.netloc,
parsed_url.path,
parsed_url.params,
new_query,
parsed_url.fragment
))
def get_sejm_videos(term):
# Replace 'term9' with the desired term
url = f"https://api.sejm.gov.pl/sejm/term{term}/videos"
# Send a GET request to the API
response = requests.get(url)
# Check if the request was successful
if response.status_code == 200:
return response.json()
else:
return f"Error: {response.status_code}"
def get_today_sejm_videos(term):
# Replace 'term9' with the desired term
url = f"https://api.sejm.gov.pl/sejm/term{term}/videos/today"
# Send a GET request to the API
response = requests.get(url)
# Check if the request was successful
if response.status_code == 200:
return response.json()
else:
return f"Error: {response.status_code}"
def get_sejm_videos(term, since=None, till=None, title=None, video_type=None, comm=None):
base_url = f"https://api.sejm.gov.pl/sejm/term{term}/videos"
params = {}
if since:
params['since'] = since
if till:
params['till'] = till
if title:
params['title'] = title
if video_type:
params['type'] = video_type
if comm:
params['comm'] = comm
response = requests.get(base_url, params=params)
if response.status_code == 200:
return response.json()
else:
return f"Error: {response.status_code}"
def download_video(video_url, video_path):
response = requests.get(video_url)
if response.status_code == 200:
with open(video_path, 'wb') as file:
file.write(response.content)
return True
else:
print(f"Error downloading video: {response.status_code}")
return False
def extract_audio(video_path, audio_path):
command = ['ffmpeg', '-i', video_path, '-q:a', '0', '-map', 'a', audio_path, '-y']
subprocess.run(command)
if os.path.exists(audio_path):
print("Audio extracted successfully.")
else:
print("Error extracting audio.")
def download_and_extract_audio(video_link, start_time, end_time, audio_output):
video_path = "temp_video.mp4"
# Downloading video
response = requests.get(video_link)
if response.status_code == 200:
with open(video_path, 'wb') as file:
file.write(response.content)
else:
print(f"Error downloading video: {response.status_code}")
return
# Extracting audio
command = [
'ffmpeg', '-i', video_path, '-ss', start_time, '-to', end_time,
'-q:a', '0', '-map', 'a', audio_output, '-y'
]
subprocess.run(command)
# Clean up downloaded video
if os.path.exists(video_path):
os.remove(video_path)
if os.path.exists(audio_output):
print("Audio extracted successfully.")
else:
print("Error extracting audio.")
# 600000,10000
def transcribe(num1,num2):
num1,num2 = int(num1),int(num2)
videos = get_sejm_videos(term)
print(videos[0]['videoLink'])
start_time = "00:00:00" # Start time of the segment
end_time = "01:00:00" # End time of the segment
audio_output = "sejm_audio.mp3"
print(offset_time(videos[0]['videoLink'],num1,num2))
download_and_extract_audio(offset_time(videos[0]['videoLink'],num1,num2), start_time, end_time, audio_output)
return transcriber(audio_output)["text"]
# if download_video(offset_time(videos[0]['videoLink'],num1,num2), "./video.mov"):
# extract_audio("./video.mov", "./audio.mp3")
# # Load your MP4 file
#
# #ff = FFmpeg(
# # inputs={'./video.mp4': None},
# # outputs={'./audio.mp3': None}
# #)
# #ff.run()
# #video = VideoFileClip("./video.mp4")
#
# ## Extract the audio from the video
# #audio = video.audio
#
# ## Write the audio to an MP3 file
# #audio.write_audiofile("./audio.mp3")
# return transcriber(audio_output)["text"]
demo = gr.Interface(
fn=transcribe,
#inputs=gr.Audio(type="filepath"),
inputs=[gr.Number(label="Number 1"), gr.Number(label="Number 2")],
outputs="text",
)
demo.launch()
|