Spaces:

Taf2023
/

anycoder-d58beb53

Sleeping

App Files Files Community

anycoder-d58beb53 / utils.py

Taf2023

Upload folder using huggingface_hub

7962c79 verified 24 days ago

raw

history blame

3.12 kB

	import os
	from huggingface_hub import InferenceClient
	import tempfile
	import uuid

	# Initialize the client
	# We rely on the free tier which works for these specific models without a token locally,
	# but in production/Spaces, it uses the environment's token automatically.
	client = InferenceClient()

	# Define Models
	TRANSLATION_MODEL = "Helsinki-NLP/opus-mt-th-en"
	IMAGE_MODEL = "stabilityai/stable-diffusion-xl-base-1.0"
	AUDIO_MODEL = "facebook/mms-tts-eng"

	def translate_text(text):
	"""Translates Thai text to English."""
	try:
	if not text.strip():
	return ""

	# Using the translation API
	result = client.translation(text, model=TRANSLATION_MODEL)
	# The API usually returns [{'translation_text': '...'}] or similar object
	if hasattr(result, 'translation_text'):
	return result.translation_text
	return result[0]['translation_text']
	except Exception as e:
	print(f"Translation Error: {e}")
	return f"Error translating: {text}"

	def generate_image(prompt, style):
	"""Generates an image from text."""
	try:
	# Enhance prompt based on style
	enhanced_prompt = prompt
	if style == "Cinematic":
	enhanced_prompt += ", cinematic lighting, highly detailed, photorealistic, 8k"
	elif style == "Anime":
	enhanced_prompt += ", anime style, japanese animation, vibrant colors"
	elif style == "3D Model":
	enhanced_prompt += ", 3d render, blender, unreal engine 5, isometric"
	elif style == "Oil Painting":
	enhanced_prompt += ", oil painting, textured, artistic, van gogh style"
	elif style == "Pixel Art":
	enhanced_prompt += ", pixel art, 16-bit, retro game style"

	image = client.text_to_image(
	enhanced_prompt,
	model=IMAGE_MODEL
	)
	return image
	except Exception as e:
	print(f"Image Generation Error: {e}")
	return None

	def generate_audio(text):
	"""Generates audio from English text."""
	try:
	# Generate audio bytes
	audio_bytes = client.text_to_speech(
	text,
	model=AUDIO_MODEL
	)

	# Save to a temporary file
	temp_dir = tempfile.gettempdir()
	filename = f"{uuid.uuid4()}.flac"
	filepath = os.path.join(temp_dir, filename)

	with open(filepath, "wb") as f:
	f.write(audio_bytes)

	return filepath
	except Exception as e:
	print(f"Audio Generation Error: {e}")
	return None

	def process_pipeline(thai_text, style):
	"""Main function to orchestrate the flow."""
	if not thai_text:
	return "Please enter text.", None, None

	print(f"Processing: {thai_text}")

	# Step 1: Translate
	eng_text = translate_text(thai_text)

	# Step 2 & 3: Generate Image and Audio (can be done in parallel ideally, but sequential here for simplicity)
	image = generate_image(eng_text, style)
	audio_path = generate_audio(eng_text)

	return eng_text, image, audio_path