Spaces:
Runtime error
Runtime error
| # ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. ========= | |
| # Licensed under the Apache License, Version 2.0 (the "License"); | |
| # you may not use this file except in compliance with the License. | |
| # You may obtain a copy of the License at | |
| # | |
| # http://www.apache.org/licenses/LICENSE-2.0 | |
| # | |
| # Unless required by applicable law or agreed to in writing, software | |
| # distributed under the License is distributed on an "AS IS" BASIS, | |
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
| # See the License for the specific language governing permissions and | |
| # limitations under the License. | |
| # ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. ========= | |
| import os | |
| from typing import Any, Optional | |
| class FishAudioModel: | |
| r"""Provides access to FishAudio's Text-to-Speech (TTS) and Speech_to_Text | |
| (STT) models. | |
| """ | |
| def __init__( | |
| self, | |
| api_key: Optional[str] = None, | |
| url: Optional[str] = None, | |
| ) -> None: | |
| r"""Initialize an instance of FishAudioModel. | |
| Args: | |
| api_key (Optional[str]): API key for FishAudio service. If not | |
| provided, the environment variable `FISHAUDIO_API_KEY` will be | |
| used. | |
| url (Optional[str]): Base URL for FishAudio API. If not provided, | |
| the environment variable `FISHAUDIO_API_BASE_URL` will be used. | |
| """ | |
| from fish_audio_sdk import Session | |
| self._api_key = api_key or os.environ.get("FISHAUDIO_API_KEY") | |
| self._url = url or os.environ.get( | |
| "FISHAUDIO_API_BASE_URL", "https://api.fish.audio" | |
| ) | |
| self.session = Session(apikey=self._api_key, base_url=self._url) | |
| def text_to_speech( | |
| self, | |
| input: str, | |
| storage_path: str, | |
| reference_id: Optional[str] = None, | |
| reference_audio: Optional[str] = None, | |
| reference_audio_text: Optional[str] = None, | |
| **kwargs: Any, | |
| ) -> Any: | |
| r"""Convert text to speech and save the output to a file. | |
| Args: | |
| input_text (str): The text to convert to speech. | |
| storage_path (str): The file path where the resulting speech will | |
| be saved. | |
| reference_id (Optional[str]): An optional reference ID to | |
| associate with the request. (default: :obj:`None`) | |
| reference_audio (Optional[str]): Path to an audio file for | |
| reference speech. (default: :obj:`None`) | |
| reference_audio_text (Optional[str]): Text for the reference audio. | |
| (default: :obj:`None`) | |
| **kwargs (Any): Additional parameters to pass to the TTS request. | |
| Raises: | |
| FileNotFoundError: If the reference audio file cannot be found. | |
| """ | |
| from fish_audio_sdk import ReferenceAudio, TTSRequest | |
| directory = os.path.dirname(storage_path) | |
| if directory and not os.path.exists(directory): | |
| os.makedirs(directory) | |
| if not reference_audio: | |
| with open(f"{storage_path}", "wb") as f: | |
| for chunk in self.session.tts( | |
| TTSRequest(reference_id=reference_id, text=input, **kwargs) | |
| ): | |
| f.write(chunk) | |
| else: | |
| if not os.path.exists(reference_audio): | |
| raise FileNotFoundError( | |
| f"Reference audio file not found: {reference_audio}" | |
| ) | |
| if not reference_audio_text: | |
| raise ValueError("reference_audio_text should be provided") | |
| with open(f"{reference_audio}", "rb") as audio_file: | |
| with open(f"{storage_path}", "wb") as f: | |
| for chunk in self.session.tts( | |
| TTSRequest( | |
| text=input, | |
| references=[ | |
| ReferenceAudio( | |
| audio=audio_file.read(), | |
| text=reference_audio_text, | |
| ) | |
| ], | |
| **kwargs, | |
| ) | |
| ): | |
| f.write(chunk) | |
| def speech_to_text( | |
| self, | |
| audio_file_path: str, | |
| language: Optional[str] = None, | |
| ignore_timestamps: Optional[bool] = None, | |
| **kwargs: Any, | |
| ) -> str: | |
| r"""Convert speech to text from an audio file. | |
| Args: | |
| audio_file_path (str): The path to the audio file to transcribe. | |
| language (Optional[str]): The language of the audio. (default: | |
| :obj:`None`) | |
| ignore_timestamps (Optional[bool]): Whether to ignore timestamps. | |
| (default: :obj:`None`) | |
| **kwargs (Any): Additional parameters to pass to the STT request. | |
| Returns: | |
| str: The transcribed text from the audio. | |
| Raises: | |
| FileNotFoundError: If the audio file cannot be found. | |
| """ | |
| from fish_audio_sdk import ASRRequest | |
| if not os.path.exists(audio_file_path): | |
| raise FileNotFoundError(f"Audio file not found: {audio_file_path}") | |
| with open(f"{audio_file_path}", "rb") as audio_file: | |
| audio_data = audio_file.read() | |
| response = self.session.asr( | |
| ASRRequest( | |
| audio=audio_data, | |
| language=language, | |
| ignore_timestamps=ignore_timestamps, | |
| **kwargs, | |
| ) | |
| ) | |
| return response.text | |