Spaces:

darag
/

kurdish-kurmanci-to-text-srt

Sleeping

App Files Files Community

kurdish-kurmanci-to-text-srt / app.py

darag

Update app.py

37cc811 verified 4 months ago

raw

history blame

2.45 kB

	# -- coding: utf-8 --
	import torch
	from transformers import Wav2Vec2ForCTC, Wav2Vec2Processor
	import librosa
	import numpy as np
	from datetime import timedelta
	import gradio as gr
	import os

	def format_time(seconds):
	td = timedelta(seconds=seconds)
	hours, remainder = divmod(td.seconds, 3600)
	minutes, seconds = divmod(remainder, 60)
	milliseconds = td.microseconds // 1000
	return f"{hours:02d}:{minutes:02d}:{seconds:02d},{milliseconds:03d}"

	def estimate_word_timings(transcription, total_duration):
	words = transcription.split()
	total_chars = sum(len(word) for word in words)
	char_duration = total_duration / total_chars

	word_timings = []
	current_time = 0

	for word in words:
	word_duration = len(word) * char_duration
	start_time = current_time
	end_time = current_time + word_duration
	word_timings.append((word, start_time, end_time))
	current_time = end_time

	return word_timings

	model_name = "Akashpb13/xlsr_kurmanji_kurdish"
	model = Wav2Vec2ForCTC.from_pretrained(model_name)
	processor = Wav2Vec2Processor.from_pretrained(model_name)

	def transcribe_audio(file):
	speech, rate = librosa.load(file, sr=16000)
	input_values = processor(speech, return_tensors="pt", sampling_rate=rate).input_values

	with torch.no_grad():
	logits = model(input_values).logits

	predicted_ids = torch.argmax(logits, dim=-1)
	transcription = processor.batch_decode(predicted_ids)[0]
	total_duration = len(speech) / rate
	word_timings = estimate_word_timings(transcription, total_duration)

	srt_content = ""
	for i, (word, start_time, end_time) in enumerate(word_timings, start=1):
	start_time_str = format_time(start_time)
	end_time_str = format_time(end_time)
	srt_content += f"{i}\n{start_time_str} --> {end_time_str}\n{word}\n\n"

	output_filename = "output_word_by_word.srt"
	with open(output_filename, "w", encoding="utf-8") as f:
	f.write(srt_content)

	return transcription, output_filename

	interface = gr.Interface(
	fn=transcribe_audio,
	inputs=gr.Audio(type="filepath"),
	outputs=[gr.Textbox(label="Transcription"), gr.File(label="Download SRT File")],
	title="Deng --- Nivîsandin ::: Kurdî-Kurmancî",
	description="Dengê xwe ji me re rêke û li Submit bixe ... û bila bêhna te fireh be .",
	article="By Derax Elî"
	)

	if __name__ == "__main__":
	interface.launch()