Spaces:

Aptheos
/

wallE

Sleeping

App Files Files Community

wallE / app.py

Aptheos

fix inference fail

c27abe9 about 2 months ago

raw

history blame contribute delete

2.57 kB

	import os
	import torch
	import gradio as gr
	from transformers import pipeline
	from huggingface_hub import InferenceClient

	# ----------------------
	# AUDIO-TO-TEXT SETUP
	# ----------------------
	device = 0 if torch.cuda.is_available() else "cpu"
	AUDIO_MODEL_NAME = "distil-whisper/distil-large-v3"
	BATCH_SIZE = 8

	pipe = pipeline(
	task="automatic-speech-recognition",
	model=AUDIO_MODEL_NAME,
	chunk_length_s=30,
	device=device,
	)

	def transcribe(audio_input):
	"""Convert audio to text using Whisper."""
	if audio_input is None:
	raise gr.Error("No audio file submitted!")

	output = pipe(
	audio_input,
	batch_size=BATCH_SIZE,
	generate_kwargs={"task": "transcribe"},
	return_timestamps=True
	)
	return output["text"]

	# ----------------------
	# TEXT ORGANIZATION SETUP
	# ----------------------
	TEXT_MODEL_NAME = "microsoft/Phi-3-mini-4k-instruct"

	# Ensure HF_TOKEN is loaded as a Space secret
	hf_token = os.getenv("HF_TOKEN")
	if not hf_token:
	raise ValueError("HF_TOKEN not found! Add it as a secret in your Space settings.")

	# Force client to use the HF inference API
	client = InferenceClient(token=hf_token, base_url="https://api-inference.huggingface.co")

	def build_messages(meeting_transcript) -> list:
	system_input = "You are an assistant that organizes meeting minutes."
	user_input = f"""
	Take this raw meeting transcript and return an organized, sectioned version.
	You may include a summary at the top.

	Transcript:
	{meeting_transcript}
	"""
	return [
	{"role": "system", "content": system_input},
	{"role": "user", "content": user_input},
	]

	def organize_text(meeting_transcript):
	messages = build_messages(meeting_transcript)
	response = client.chat_completion(
	messages, model=TEXT_MODEL_NAME, max_tokens=300, seed=42
	)
	return response.choices[0].message.content

	# ----------------------
	# COMBINED TOOL
	# ----------------------
	def meeting_transcript_tool(audio_input):
	meeting_text = transcribe(audio_input)
	organized_text = organize_text(meeting_text)
	return organized_text

	# ----------------------
	# GRADIO INTERFACE
	# ----------------------
	demo = gr.Interface(
	fn=meeting_transcript_tool,
	inputs=gr.Audio(type="filepath"),
	outputs=gr.Textbox(show_copy_button=True, label="Organized Transcript"),
	title="🪶 Meeting Transcription Tool",
	description="Upload or record an audio file. This app transcribes it using Whisper and organizes the text using Phi-3",
	)

	demo.launch()