Spaces:
Sleeping
Sleeping
Madhuslista
commited on
Commit
•
df236d9
1
Parent(s):
e43ce77
Refactor: Modify PIPE parameters
Browse files- lib/pipe.py +14 -2
lib/pipe.py
CHANGED
@@ -5,6 +5,12 @@ from time import time
|
|
5 |
|
6 |
from transformers import pipeline
|
7 |
|
|
|
|
|
|
|
|
|
|
|
|
|
8 |
# -->> Tunables <<---------------------
|
9 |
|
10 |
print("Loading model...")
|
@@ -13,6 +19,7 @@ PIPE = pipeline(
|
|
13 |
task="automatic-speech-recognition",
|
14 |
model="openai/whisper-large-v3",
|
15 |
device_map="auto",
|
|
|
16 |
)
|
17 |
time_2 = time()
|
18 |
print("Model loaded!")
|
@@ -26,7 +33,12 @@ def connect_to_pipe(audio_file, audio_path, transcript_folder_path):
|
|
26 |
|
27 |
print("Transcribing...")
|
28 |
time_3 = time()
|
29 |
-
result = PIPE(
|
|
|
|
|
|
|
|
|
|
|
30 |
time_4 = time()
|
31 |
print("Transcription complete!")
|
32 |
|
@@ -36,8 +48,8 @@ def connect_to_pipe(audio_file, audio_path, transcript_folder_path):
|
|
36 |
print("Total: ", round(time_2 - time_1 + time_4 - time_3, 2), " [s]")
|
37 |
|
38 |
# Save the transcript to a file
|
39 |
-
print(result)
|
40 |
# text = "\n ".join([i["text"] for i in result["chunks"]])
|
|
|
41 |
|
42 |
return result
|
43 |
|
|
|
5 |
|
6 |
from transformers import pipeline
|
7 |
|
8 |
+
from .config import (
|
9 |
+
DEVICE,
|
10 |
+
COMPUTE_TYPE,
|
11 |
+
BATCH_SIZE,
|
12 |
+
)
|
13 |
+
|
14 |
# -->> Tunables <<---------------------
|
15 |
|
16 |
print("Loading model...")
|
|
|
19 |
task="automatic-speech-recognition",
|
20 |
model="openai/whisper-large-v3",
|
21 |
device_map="auto",
|
22 |
+
chunk_length_s=30,
|
23 |
)
|
24 |
time_2 = time()
|
25 |
print("Model loaded!")
|
|
|
33 |
|
34 |
print("Transcribing...")
|
35 |
time_3 = time()
|
36 |
+
result = PIPE(
|
37 |
+
audio_path,
|
38 |
+
batch_size=BATCH_SIZE,
|
39 |
+
generate_kwargs={"task": "transcribe"},
|
40 |
+
return_timestamps=True,
|
41 |
+
)
|
42 |
time_4 = time()
|
43 |
print("Transcription complete!")
|
44 |
|
|
|
48 |
print("Total: ", round(time_2 - time_1 + time_4 - time_3, 2), " [s]")
|
49 |
|
50 |
# Save the transcript to a file
|
|
|
51 |
# text = "\n ".join([i["text"] for i in result["chunks"]])
|
52 |
+
text = result['text']
|
53 |
|
54 |
return result
|
55 |
|