import requests import base64 import os from dotenv import load_dotenv load_dotenv() HF_API_KEY = os.getenv("HF_API_KEY") # ----------------------------- # Speech-to-Text (STT) using HuggingFace Whisper # ----------------------------- def speech_to_text(audio_file): with open(audio_file, "rb") as f: audio_bytes = f.read() response = requests.post( "https://api-inference.huggingface.co/models/openai/whisper-small", headers={"Authorization": f"Bearer {HF_API_KEY}"}, data=audio_bytes ) if response.status_code == 200: result = response.json() return result.get("text", "Sorry, I couldn’t transcribe that.") else: print(f"STT request failed: {response.status_code} {response.text}") return "Speech recognition failed." # ----------------------------- # Text-to-Speech (TTS) using Kitten TTS # ----------------------------- def text_to_speech(text): url = "https://huggingface.co/KittenML/kitten-tts-nano-0.1/resolve/main/tts" payload = {"text": text} response = requests.post(url, json=payload) if response.status_code == 200: audio_base64 = response.json()["audio"] audio_bytes = base64.b64decode(audio_base64) output_path = "output_audio.wav" with open(output_path, "wb") as f: f.write(audio_bytes) return output_path else: print(f"TTS request failed: {response.status_code}") return None