filipzawadka commited on
Commit
f583bdc
1 Parent(s): 4f4d3a0
Files changed (2) hide show
  1. app.py +106 -4
  2. requirements.txt +3 -0
app.py CHANGED
@@ -1,19 +1,121 @@
1
  import gradio as gr
2
  from transformers import pipeline
3
  import numpy as np
 
 
 
 
 
 
4
 
5
  transcriber = pipeline("automatic-speech-recognition", model="filipzawadka/whisper-small-pl-2")
6
  #transcriber = pipeline("automatic-speech-recognition", model="openai/whisper-small.en")
7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8
 
9
- def transcribe(audio):
10
- print(audio)
11
- return transcriber(audio)["text"]
 
12
 
13
 
14
  demo = gr.Interface(
15
  fn=transcribe,
16
- inputs=gr.Audio(type="filepath"),
 
17
  outputs="text",
18
  )
19
 
 
1
  import gradio as gr
2
  from transformers import pipeline
3
  import numpy as np
4
+ import requests
5
+ import subprocess
6
+ import os
7
+ import urllib.parse
8
+
9
+ term = 9
10
 
11
  transcriber = pipeline("automatic-speech-recognition", model="filipzawadka/whisper-small-pl-2")
12
  #transcriber = pipeline("automatic-speech-recognition", model="openai/whisper-small.en")
13
 
14
+ def offset_time(link, start_offset, clip_length):
15
+
16
+ # Parse the URL
17
+ parsed_url = urllib.parse.urlparse(link)
18
+ query_params = urllib.parse.parse_qs(parsed_url.query)
19
+
20
+ # Change the start and stop time by a set number
21
+ # For example, adding 1000 to both start and stop times
22
+ start_time = int(query_params['startTime'][0]) + start_offset
23
+ stop_time = start_time + clip_length
24
+
25
+ # Rebuild the query with the new times
26
+ new_query_params = {'startTime': [str(start_time)], 'stopTime': [str(stop_time)]}
27
+ new_query = urllib.parse.urlencode(new_query_params, doseq=True)
28
+
29
+ # Rebuild the entire URL
30
+ return urllib.parse.urlunparse((
31
+ parsed_url.scheme,
32
+ parsed_url.netloc,
33
+ parsed_url.path,
34
+ parsed_url.params,
35
+ new_query,
36
+ parsed_url.fragment
37
+ ))
38
+
39
+ def get_sejm_videos(term):
40
+ # Replace 'term9' with the desired term
41
+ url = f"https://api.sejm.gov.pl/sejm/term{term}/videos"
42
+
43
+ # Send a GET request to the API
44
+ response = requests.get(url)
45
+
46
+ # Check if the request was successful
47
+ if response.status_code == 200:
48
+ return response.json()
49
+ else:
50
+ return f"Error: {response.status_code}"
51
+
52
+ def get_today_sejm_videos(term):
53
+ # Replace 'term9' with the desired term
54
+ url = f"https://api.sejm.gov.pl/sejm/term{term}/videos/today"
55
+
56
+ # Send a GET request to the API
57
+ response = requests.get(url)
58
+
59
+ # Check if the request was successful
60
+ if response.status_code == 200:
61
+ return response.json()
62
+ else:
63
+ return f"Error: {response.status_code}"
64
+ def get_sejm_videos(term, since=None, till=None, title=None, video_type=None, comm=None):
65
+ base_url = f"https://api.sejm.gov.pl/sejm/term{term}/videos"
66
+ params = {}
67
+
68
+ if since:
69
+ params['since'] = since
70
+ if till:
71
+ params['till'] = till
72
+ if title:
73
+ params['title'] = title
74
+ if video_type:
75
+ params['type'] = video_type
76
+ if comm:
77
+ params['comm'] = comm
78
+
79
+ response = requests.get(base_url, params=params)
80
+ if response.status_code == 200:
81
+ return response.json()
82
+ else:
83
+ return f"Error: {response.status_code}"
84
+
85
+ def download_video(video_url, video_path):
86
+ response = requests.get(video_url)
87
+ if response.status_code == 200:
88
+ with open(video_path, 'wb') as file:
89
+ file.write(response.content)
90
+ return True
91
+ else:
92
+ print(f"Error downloading video: {response.status_code}")
93
+ return False
94
+
95
+ def extract_audio(video_path, audio_path):
96
+ command = ['ffmpeg', '-i', video_path, '-q:a', '0', '-map', 'a', audio_path, '-y']
97
+ subprocess.run(command)
98
+ if os.path.exists(audio_path):
99
+ print("Audio extracted successfully.")
100
+ else:
101
+ print("Error extracting audio.")
102
+
103
+ # 600000,10000
104
+
105
+ def transcribe(num1,num2):
106
+
107
+ videos = get_sejm_videos(term)
108
 
109
+ if download_video(offset_time(videos[0]['videoLink'],num1,num2), "./video.mp4"):
110
+ extract_audio("./video.mp4", "./audio.mp3")
111
+ print("./audio.mp3")
112
+ return transcriber("./audio.mp3")["text"]
113
 
114
 
115
  demo = gr.Interface(
116
  fn=transcribe,
117
+ #inputs=gr.Audio(type="filepath"),
118
+ inputs=[gr.Number(label="Number 1"), gr.Number(label="Number 2")],
119
  outputs="text",
120
  )
121
 
requirements.txt CHANGED
@@ -91,3 +91,6 @@ widgetsnbextension==4.0.9
91
  xxhash==3.4.1
92
  yarl==1.9.4
93
  librosa
 
 
 
 
91
  xxhash==3.4.1
92
  yarl==1.9.4
93
  librosa
94
+ requests
95
+ os
96
+ subprocess