Spaces:

SammyGasana
/

artemis-analysis

Sleeping

App Files Files Community

artemis-analysis / audio_predictions.py

SammyGasana

Update audio_predictions.py

3990fcd verified almost 2 years ago

raw

history blame contribute delete

5.73 kB

	import requests
	import string
	import os
	import json
	from pydub import AudioSegment
	from pydub.utils import which

	AudioSegment.converter = which("ffmpeg")

	class AudioTranslation:
	def __init__(self):
	# Initialize any required variables or settings
	pass
	def convert_to_mp3(self, file_path):
	if not file_path.lower().endswith('.mp3'):
	audio = AudioSegment.from_file(file_path)
	file_path_without_ext = os.path.splitext(file_path[0]) # fix this line
	converted_file_path = f"{file_path_without_ext}.mp3"
	audio.export(converted_file_path, "mp3")
	return converted_file_path
	return file_path

	# def transcribe_audio(self, file_path):
	# url = "https://stt.umuganda.digital/transcribe/"
	# converted_file_path = self.convert_to_mp3(file_path)
	# with open(converted_file_path, 'rb') as file:
	# files = {'file': (file_path, file, 'audio/mpeg')}
	# print('transcribing audio')
	# try:
	# response = requests.post(url, files=files)
	# response.raise_for_status()
	# transcription = response.json()
	# # Remove punctuation
	# translator = str.maketrans('', '', string.punctuation)
	# cleaned_text = transcription['text'].translate(translator)
	# print('cleaned text')
	# print(cleaned_text)
	# return cleaned_text
	# except requests.exceptions.HTTPError as err:
	# print(f"HTTP error occurred: {err}")
	# except Exception as err:
	# print(f"An error occurred: {err}")
	# return None
	def transcribe_audio(self, file_path):
	url = "https://stt.umuganda.digital/transcribe/"

	# Check if the file is an MP3; if not, convert it
	if not file_path.lower().endswith('.mp3'):
	print(f"Converting file to MP3: {file_path}")
	converted_file_path = self.convert_to_mp3(file_path)
	else:
	converted_file_path = file_path

	if not os.path.exists(converted_file_path) or os.path.getsize(converted_file_path) == 0:
	print(f"File does not exist or is empty: {converted_file_path}")
	return None

	with open(converted_file_path, 'rb') as file:
	files = {'file': (os.path.basename(converted_file_path), file, 'audio/mpeg')}
	print('Transcribing audio...')
	try:
	response = requests.post(url, files=files)
	response.raise_for_status()
	transcription = response.json()
	# Remove punctuation
	translator = str.maketrans('', '', string.punctuation)
	cleaned_text = transcription['text'].translate(translator)
	print('Cleaned text:', cleaned_text)
	return cleaned_text
	except requests.exceptions.RequestException as e:
	print(f"Request error occurred: {e}")
	if hasattr(e, 'response') and e.response:
	print(f"Server response: {e.response.text}")
	return None

	def get_translation(self, src, tgt, text):
	url = f"https://nmt-api.umuganda.digital/api/v1/translate/?src={src}&tgt={tgt}&text={text}"
	try:
	response = requests.get(url)
	response.raise_for_status()
	return response.json()
	except requests.exceptions.HTTPError as err:
	print(f"HTTP error occurred: {err}")
	except Exception as err:
	print(f"An error occurred: {err}")

	return None

	def translate_sentence(self, src, tgt, alt, use_multi, text):
	url = "https://nmt-api.umuganda.digital/api/v1/translate/"
	headers = {
	'accept': 'application/json',
	'Content-Type': 'application/json'
	}
	data = {
	"src": src,
	"tgt": tgt,
	"alt": alt,
	"use_multi": use_multi,
	"text": text
	}

	try:
	response = requests.post(url, headers=headers, data=json.dumps(data))
	response.raise_for_status()
	print('translation sentence')
	return response.json()
	except requests.exceptions.HTTPError as err:
	print(f"HTTP error occurred: {err}")
	except Exception as err:
	print(f"An error occurred: {err}")
	print(response.json())
	return None

	def post_batch_translation(self, batch_data):
	url = "https://nmt-api.umuganda.digital/api/v1/translate/batch"
	headers = {'Content-Type': 'application/json'}

	try:
	response = requests.post(url, json=batch_data, headers=headers)
	response.raise_for_status()
	return response.json()
	except requests.exceptions.HTTPError as err:
	print(f"HTTP error occurred: {err}")
	except Exception as err:
	print(f"An error occurred: {err}")

	return None

	# Example usage
	# audio_translator = AudioTranslation()
	# transcription = audio_translator.transcribe_audio("voice_test_1.mp3")
	# translation_result = audio_translator.translate_sentence("rw", "en","MULTI-rw-en","", transcription)
	# print(translation_result)
	'''
	alternative models:
	https://huggingface.co/facebook/nllb-200-3.3B
	https://huggingface.co/facebook/mms-1b-all
	-----
	TODO:
	-function ogg to mp3
	- post_translation : preprocess punctuation
	-function batch transcription
	- merge voice-data to dataset
	- run sentiment analysis prediction -->upload voice (streamlit)

	'''