Spaces:

Madhuslista
/

whisper_wrapper

Sleeping

whisper_wrapper / lib /pipe.py

Refactor: Modify PIPE parameters

df236d9 over 1 year ago

1.33 kB

	#!/usr/bin/python
	# -- coding: utf-8 --

	from time import time

	from transformers import pipeline

	from .config import (
	DEVICE,
	COMPUTE_TYPE,
	BATCH_SIZE,
	)

	# -->> Tunables <<---------------------

	print("Loading model...")
	time_1 = time()
	PIPE = pipeline(
	task="automatic-speech-recognition",
	model="openai/whisper-large-v3",
	device_map="auto",
	chunk_length_s=30,
	)
	time_2 = time()
	print("Model loaded!")

	# -->> Definitions <<------------------


	# -->> API <<--------------------------

	def connect_to_pipe(audio_file, audio_path, transcript_folder_path):

	print("Transcribing...")
	time_3 = time()
	result = PIPE(
	audio_path,
	batch_size=BATCH_SIZE,
	generate_kwargs={"task": "transcribe"},
	return_timestamps=True,
	)
	time_4 = time()
	print("Transcription complete!")

	print("\nTime Report: ")
	print("Loading model: ", round(time_2 - time_1, 2), " [s]")
	print("Transcribing: ", round(time_4 - time_3, 2), " [s]")
	print("Total: ", round(time_2 - time_1 + time_4 - time_3, 2), " [s]")

	# Save the transcript to a file
	# text = "\n ".join([i["text"] for i in result["chunks"]])
	text = result['text']

	return result

	# -->> Execute <<----------------------


	# -->> Export <<-----------------------