Spaces:

filipzawadka
/

polish_whisper

Runtime error

App Files Files Community

filipzawadka commited on Dec 15, 2023

Commit

f583bdc

•

1 Parent(s): 4f4d3a0

sejm

Browse files

Files changed (2) hide show

app.py +106 -4
requirements.txt +3 -0

app.py CHANGED Viewed

@@ -1,19 +1,121 @@
 import gradio as gr
 from transformers import pipeline
 import numpy as np
 transcriber = pipeline("automatic-speech-recognition", model="filipzawadka/whisper-small-pl-2")
 #transcriber = pipeline("automatic-speech-recognition", model="openai/whisper-small.en")
-def transcribe(audio):
-    print(audio)
-    return transcriber(audio)["text"]
 demo = gr.Interface(
     fn=transcribe,
-    inputs=gr.Audio(type="filepath"),
     outputs="text",
 )

 import gradio as gr
 from transformers import pipeline
 import numpy as np
+import requests
+import subprocess
+import os
+import urllib.parse
+term = 9
 transcriber = pipeline("automatic-speech-recognition", model="filipzawadka/whisper-small-pl-2")
 #transcriber = pipeline("automatic-speech-recognition", model="openai/whisper-small.en")
+def offset_time(link, start_offset, clip_length):
+    # Parse the URL
+    parsed_url = urllib.parse.urlparse(link)
+    query_params = urllib.parse.parse_qs(parsed_url.query)
+    # Change the start and stop time by a set number
+    # For example, adding 1000 to both start and stop times
+    start_time = int(query_params['startTime'][0]) + start_offset
+    stop_time = start_time + clip_length
+    # Rebuild the query with the new times
+    new_query_params = {'startTime': [str(start_time)], 'stopTime': [str(stop_time)]}
+    new_query = urllib.parse.urlencode(new_query_params, doseq=True)
+    # Rebuild the entire URL
+    return urllib.parse.urlunparse((
+        parsed_url.scheme,
+        parsed_url.netloc,
+        parsed_url.path,
+        parsed_url.params,
+        new_query,
+        parsed_url.fragment
+    ))
+def get_sejm_videos(term):
+    # Replace 'term9' with the desired term
+    url = f"https://api.sejm.gov.pl/sejm/term{term}/videos"
+    # Send a GET request to the API
+    response = requests.get(url)
+    # Check if the request was successful
+    if response.status_code == 200:
+        return response.json()
+    else:
+        return f"Error: {response.status_code}"
+def get_today_sejm_videos(term):
+    # Replace 'term9' with the desired term
+    url = f"https://api.sejm.gov.pl/sejm/term{term}/videos/today"
+    # Send a GET request to the API
+    response = requests.get(url)
+    # Check if the request was successful
+    if response.status_code == 200:
+        return response.json()
+    else:
+        return f"Error: {response.status_code}"
+def get_sejm_videos(term, since=None, till=None, title=None, video_type=None, comm=None):
+    base_url = f"https://api.sejm.gov.pl/sejm/term{term}/videos"
+    params = {}
+    if since:
+        params['since'] = since
+    if till:
+        params['till'] = till
+    if title:
+        params['title'] = title
+    if video_type:
+        params['type'] = video_type
+    if comm:
+        params['comm'] = comm
+    response = requests.get(base_url, params=params)
+    if response.status_code == 200:
+        return response.json()
+    else:
+        return f"Error: {response.status_code}"
+def download_video(video_url, video_path):
+    response = requests.get(video_url)
+    if response.status_code == 200:
+        with open(video_path, 'wb') as file:
+            file.write(response.content)
+        return True
+    else:
+        print(f"Error downloading video: {response.status_code}")
+        return False
+def extract_audio(video_path, audio_path):
+    command = ['ffmpeg', '-i', video_path, '-q:a', '0', '-map', 'a', audio_path, '-y']
+    subprocess.run(command)
+    if os.path.exists(audio_path):
+        print("Audio extracted successfully.")
+    else:
+        print("Error extracting audio.")
+# 600000,10000
+def transcribe(num1,num2):
+    videos = get_sejm_videos(term)
+    if download_video(offset_time(videos[0]['videoLink'],num1,num2), "./video.mp4"):
+        extract_audio("./video.mp4", "./audio.mp3")
+        print("./audio.mp3")
+        return transcriber("./audio.mp3")["text"]
 demo = gr.Interface(
     fn=transcribe,
+    #inputs=gr.Audio(type="filepath"),
+    inputs=[gr.Number(label="Number 1"), gr.Number(label="Number 2")],
     outputs="text",
 )

requirements.txt CHANGED Viewed

@@ -91,3 +91,6 @@ widgetsnbextension==4.0.9
 xxhash==3.4.1
 yarl==1.9.4
 librosa

 xxhash==3.4.1
 yarl==1.9.4
 librosa
+requests
+os
+subprocess