Spaces:
Sleeping
Sleeping
| import requests | |
| import string | |
| import os | |
| import json | |
| from pydub import AudioSegment | |
| from pydub.utils import which | |
| AudioSegment.converter = which("ffmpeg") | |
| class AudioTranslation: | |
| def __init__(self): | |
| # Initialize any required variables or settings | |
| pass | |
| def convert_to_mp3(self, file_path): | |
| if not file_path.lower().endswith('.mp3'): | |
| audio = AudioSegment.from_file(file_path) | |
| file_path_without_ext = os.path.splitext(file_path[0]) # fix this line | |
| converted_file_path = f"{file_path_without_ext}.mp3" | |
| audio.export(converted_file_path, "mp3") | |
| return converted_file_path | |
| return file_path | |
| # def transcribe_audio(self, file_path): | |
| # url = "https://stt.umuganda.digital/transcribe/" | |
| # converted_file_path = self.convert_to_mp3(file_path) | |
| # with open(converted_file_path, 'rb') as file: | |
| # files = {'file': (file_path, file, 'audio/mpeg')} | |
| # print('transcribing audio') | |
| # try: | |
| # response = requests.post(url, files=files) | |
| # response.raise_for_status() | |
| # transcription = response.json() | |
| # # Remove punctuation | |
| # translator = str.maketrans('', '', string.punctuation) | |
| # cleaned_text = transcription['text'].translate(translator) | |
| # print('cleaned text') | |
| # print(cleaned_text) | |
| # return cleaned_text | |
| # except requests.exceptions.HTTPError as err: | |
| # print(f"HTTP error occurred: {err}") | |
| # except Exception as err: | |
| # print(f"An error occurred: {err}") | |
| # return None | |
| def transcribe_audio(self, file_path): | |
| url = "https://stt.umuganda.digital/transcribe/" | |
| # Check if the file is an MP3; if not, convert it | |
| if not file_path.lower().endswith('.mp3'): | |
| print(f"Converting file to MP3: {file_path}") | |
| converted_file_path = self.convert_to_mp3(file_path) | |
| else: | |
| converted_file_path = file_path | |
| if not os.path.exists(converted_file_path) or os.path.getsize(converted_file_path) == 0: | |
| print(f"File does not exist or is empty: {converted_file_path}") | |
| return None | |
| with open(converted_file_path, 'rb') as file: | |
| files = {'file': (os.path.basename(converted_file_path), file, 'audio/mpeg')} | |
| print('Transcribing audio...') | |
| try: | |
| response = requests.post(url, files=files) | |
| response.raise_for_status() | |
| transcription = response.json() | |
| # Remove punctuation | |
| translator = str.maketrans('', '', string.punctuation) | |
| cleaned_text = transcription['text'].translate(translator) | |
| print('Cleaned text:', cleaned_text) | |
| return cleaned_text | |
| except requests.exceptions.RequestException as e: | |
| print(f"Request error occurred: {e}") | |
| if hasattr(e, 'response') and e.response: | |
| print(f"Server response: {e.response.text}") | |
| return None | |
| def get_translation(self, src, tgt, text): | |
| url = f"https://nmt-api.umuganda.digital/api/v1/translate/?src={src}&tgt={tgt}&text={text}" | |
| try: | |
| response = requests.get(url) | |
| response.raise_for_status() | |
| return response.json() | |
| except requests.exceptions.HTTPError as err: | |
| print(f"HTTP error occurred: {err}") | |
| except Exception as err: | |
| print(f"An error occurred: {err}") | |
| return None | |
| def translate_sentence(self, src, tgt, alt, use_multi, text): | |
| url = "https://nmt-api.umuganda.digital/api/v1/translate/" | |
| headers = { | |
| 'accept': 'application/json', | |
| 'Content-Type': 'application/json' | |
| } | |
| data = { | |
| "src": src, | |
| "tgt": tgt, | |
| "alt": alt, | |
| "use_multi": use_multi, | |
| "text": text | |
| } | |
| try: | |
| response = requests.post(url, headers=headers, data=json.dumps(data)) | |
| response.raise_for_status() | |
| print('translation sentence') | |
| return response.json() | |
| except requests.exceptions.HTTPError as err: | |
| print(f"HTTP error occurred: {err}") | |
| except Exception as err: | |
| print(f"An error occurred: {err}") | |
| print(response.json()) | |
| return None | |
| def post_batch_translation(self, batch_data): | |
| url = "https://nmt-api.umuganda.digital/api/v1/translate/batch" | |
| headers = {'Content-Type': 'application/json'} | |
| try: | |
| response = requests.post(url, json=batch_data, headers=headers) | |
| response.raise_for_status() | |
| return response.json() | |
| except requests.exceptions.HTTPError as err: | |
| print(f"HTTP error occurred: {err}") | |
| except Exception as err: | |
| print(f"An error occurred: {err}") | |
| return None | |
| # Example usage | |
| # audio_translator = AudioTranslation() | |
| # transcription = audio_translator.transcribe_audio("voice_test_1.mp3") | |
| # translation_result = audio_translator.translate_sentence("rw", "en","MULTI-rw-en","", transcription) | |
| # print(translation_result) | |
| ''' | |
| alternative models: | |
| https://huggingface.co/facebook/nllb-200-3.3B | |
| https://huggingface.co/facebook/mms-1b-all | |
| ----- | |
| TODO: | |
| -function ogg to mp3 | |
| - post_translation : preprocess punctuation | |
| -function batch transcription | |
| - merge voice-data to dataset | |
| - run sentiment analysis prediction -->upload voice (streamlit) | |
| ''' |