import gradio as gr import torch from transformers import pipeline, WhisperProcessor, WhisperForConditionalGeneration, AutoModelForCausalLM, AutoTokenizer from huggingface_hub import login import os # Retrieve the token from the environment variable hf_api_token = os.getenv("HF_API_TOKEN") if hf_api_token is None: raise ValueError("HF_API_TOKEN environment variable is not set") # Authenticate with Hugging Face login(token=hf_api_token, add_to_git_credential=True) # Initialize the Whisper processor and model whisper_processor = WhisperProcessor.from_pretrained("openai/whisper-base") whisper_model = WhisperForConditionalGeneration.from_pretrained("openai/whisper-base") # Initialize the summarization model and tokenizer # Use a smaller version of the Llama model and load in FP16 summarization_model = AutoModelForCausalLM.from_pretrained( "meta-llama/LlamaGuard-7b", torch_dtype=torch.float16, low_cpu_mem_usage=True ) summarization_tokenizer = AutoTokenizer.from_pretrained("meta-llama/LlamaGuard-7b") # Function to transcribe audio def transcribe_audio(audio_file): # Load audio file audio_input, _ = whisper_processor(audio_file, return_tensors="pt", sampling_rate=16000).input_values # Generate transcription transcription_ids = whisper_model.generate(audio_input) transcription = whisper_processor.decode(transcription_ids[0]) return transcription # Function to summarize text def summarize_text(text): inputs = summarization_tokenizer(text, return_tensors="pt", max_length=512, truncation=True) summary_ids = summarization_model.generate(inputs.input_ids, max_length=150, min_length=40, length_penalty=2.0, num_beams=4, early_stopping=True) summary = summarization_tokenizer.decode(summary_ids[0], skip_special_tokens=True) return summary # Gradio interface def process_audio(audio_file): transcription = transcribe_audio(audio_file) summary = summarize_text(transcription) return transcription, summary # Gradio UI iface = gr.Interface( fn=process_audio, inputs=gr.Audio(source="upload", type="file"), outputs=[ gr.Textbox(label="Transcription"), gr.Textbox(label="Summary") ], title="Audio Transcription and Summarization", description="Upload an audio file to transcribe and summarize the conversation." ) # Launch the app iface.launch()