apenasissso commited on
Commit
7274bc5
1 Parent(s): 4953c74

process from file and fix short audios

Browse files
Files changed (3) hide show
  1. handler.py +14 -10
  2. test_handler_files.py +40 -0
  3. test_handler_url.py +25 -0
handler.py CHANGED
@@ -10,18 +10,21 @@ import os
10
 
11
  def save_chunks_to_temp_files(url, chunk_length=5000): # chunk_length in milliseconds
12
  # Download the audio file from the URL
13
- response = requests.get(url)
14
- response.raise_for_status()
 
15
 
16
- # Ensure the content type is audio
17
- if "audio" not in response.headers["Content-Type"]:
18
- raise ValueError("URL does not seem to be an audio file")
19
 
20
- # Convert the downloaded bytes into a file-like object
21
- audio_file = BytesIO(response.content)
22
 
23
- # Load audio into an AudioSegment
24
- audio_segment = AudioSegment.from_file(audio_file)
 
 
25
 
26
  # Split audio into 10-second chunks
27
  chunks = [
@@ -29,7 +32,8 @@ def save_chunks_to_temp_files(url, chunk_length=5000): # chunk_length in millis
29
  for i in range(0, len(audio_segment), chunk_length)
30
  ]
31
 
32
- chunks[-1] = audio_segment[-chunk_length:] # Ensure last chunk is 10 seconds long
 
33
 
34
  # Save each chunk to a temporary file and store file paths in a list
35
  temp_files = []
 
10
 
11
  def save_chunks_to_temp_files(url, chunk_length=5000): # chunk_length in milliseconds
12
  # Download the audio file from the URL
13
+ if not url.startswith("file://"):
14
+ response = requests.get(url)
15
+ response.raise_for_status()
16
 
17
+ # Ensure the content type is audio
18
+ if "audio" not in response.headers["Content-Type"]:
19
+ raise ValueError("URL does not seem to be an audio file")
20
 
21
+ # Convert the downloaded bytes into a file-like object
22
+ audio_file = BytesIO(response.content)
23
 
24
+ # Load audio into an AudioSegment
25
+ audio_segment = AudioSegment.from_file(audio_file)
26
+ else:
27
+ audio_segment = AudioSegment.from_file(url[7:])
28
 
29
  # Split audio into 10-second chunks
30
  chunks = [
 
32
  for i in range(0, len(audio_segment), chunk_length)
33
  ]
34
 
35
+ if len(chunks) > 1:
36
+ chunks[-1] = audio_segment[-chunk_length:]
37
 
38
  # Save each chunk to a temporary file and store file paths in a list
39
  temp_files = []
test_handler_files.py ADDED
@@ -0,0 +1,40 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+ from handler import EndpointHandler
3
+
4
+ # init handler
5
+ my_handler = EndpointHandler()
6
+
7
+ import os
8
+
9
+ # Specify the folder path here
10
+ folder_path = (
11
+ "/Users/apenasisso/pl/pl-bots/notebooks/analytics/data/elevenlabs_analysis"
12
+ )
13
+
14
+ # List all files in the folder
15
+ for filename in os.listdir(folder_path):
16
+ full_filename = os.path.join(folder_path, filename)
17
+ if os.path.isfile(full_filename) and full_filename.endswith(".wav"):
18
+ print(full_filename)
19
+
20
+ holiday_payload = {
21
+ "inputs": f"file://{full_filename}"
22
+ # "inputs": "https://pl-bots-public-media.s3.amazonaws.com/5527999790371_8825d2c9-d87b-49c3-bf70-cb536328ba92.mp3"
23
+ }
24
+
25
+ full_filename_json = full_filename.replace(".wav", ".json")
26
+
27
+ if os.path.isfile(full_filename_json):
28
+ print("file exists", full_filename_json)
29
+ continue
30
+
31
+ # test the handler
32
+ payload = my_handler(holiday_payload)
33
+
34
+ # save payload to file
35
+
36
+ with open(full_filename_json, "w") as outfile:
37
+ json.dump(payload, outfile, indent=4)
38
+
39
+ # show results
40
+ print("holiday_payload", payload)
test_handler_url.py ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+ from handler import EndpointHandler
3
+
4
+ # init handler
5
+ my_handler = EndpointHandler()
6
+
7
+ import os
8
+
9
+ # Specify the folder path here
10
+ folder_path = (
11
+ "/Users/apenasisso/pl/pl-bots/notebooks/analytics/data/elevenlabs_analysis"
12
+ )
13
+
14
+ # List all files in the folder
15
+
16
+ holiday_payload = {
17
+ "inputs": "https://pl-bots-public-media.s3.amazonaws.com/5511996969344_093275df-8324-4425-be0a-6d933bbcd896.mp3"
18
+ }
19
+
20
+ # test the handler
21
+ payload = my_handler(holiday_payload)
22
+
23
+
24
+ # show results
25
+ print("holiday_payload", payload)