YSU
/

aspram

Automatic Speech Recognition

mozilla-foundation/common_voice_9_0

Inference Endpoints

Model card Files Files and versions Community

aspram / utils.py

lilitket's picture

Source Files

5f1c16f over 2 years ago

2 kB

	import re

	def clean_characters(sample, lower: bool = False, only_mesropatar: bool = False):

	if 'sentence' not in sample:
	if 'transcription' not in sample:
	raise NotImplementedError()
	else:
	sample['sentence'] = sample['transcription']

	allowed_chars = (
	"-"
	"a-z"
	"A-Z"
	"0-9"
	"ԱԲԳԴԵԶԷԸԹԺԻԼԽԾԿՀՁՂՃՄՅՆՇՈՉՊՋՌՍՎՏՐՑՒՓՔՕՖ"
	"աբգդեզէըթժիլխծկհձղճմյնշոչպջռսվտրցւփքօֆև"
	" \"'։֊.:?;,ՙ՚՛՜՝՞՟\(\)"
	)
	if lower:
	sample["sentence"] = sample["sentence"].lower()

	if only_mesropatar:
	allowed_chars = (
	"ԱԲԳԴԵԶԷԸԹԺԻԼԽԾԿՀՁՂՃՄՅՆՇՈՉՊՋՌՍՎՏՐՑՒՓՔՕՖ"
	"աբգդեզէըթժիլխծկհձղճմյնշոչպջռսվտրցւփքօֆև"
	" -"
	)
	sample["sentence"] = re.sub(f"[^{allowed_chars}]", "", sample["sentence"])
	# print(sample["sentence"])
	return sample

	def extract_all_chars(batch):
	all_text = " ".join(batch["sentence"])
	vocab = list(set(all_text))
	return {"vocab": [vocab], "all_text": [all_text]}

	def prepare_dataset(smaple, processor):
	audio = smaple["audio"]

	smaple["input_values"] = processor(
	audio["array"], sampling_rate=audio["sampling_rate"]
	).input_values[0]
	smaple["input_length"] = len(smaple["input_values"])

	with processor.as_target_processor():
	smaple["labels"] = processor(smaple["sentence"]).input_ids
	return smaple


	def batched_prepare_dataset(batch, processor):
	batch = batch.copy()
	audio = batch["audio"]

	batch["input_values"] = processor(
	[i["array"] for i in audio], sampling_rate=16_000
	).input_values
	batch["input_length"] = [len(i) for i in batch["input_values"] ]

	with processor.as_target_processor():
	batch["labels"] = processor(batch["sentence"]).input_ids
	return batch