import gradio as gr import os from huggingface_hub import InferenceClient HUGGINGFACE_API_KEY = os.environ["HUGGINGFACE_API_KEY"] def transcript_audio(audio_file) -> str: model = "openai/whisper-large-v3" api = InferenceClient(model, token=HUGGINGFACE_API_KEY) text = api.automatic_speech_recognition(audio_file) return text def summarize_text(text: str, bullet_points: int, conclusion: bool) -> str: llm_model = "google/gemma-7b-it" api = InferenceClient(llm_model, token=HUGGINGFACE_API_KEY) if conclusion: prompt = f"Summarize the following text into {bullet_points} bullet points and a conclusion: {text}" else: prompt = f"Summarize the following text into {bullet_points} bullet points: {text}" summary = api.text_generation(prompt, max_new_tokens=250, do_sample=True) return summary["generated_text"] def control(audio_file: gr.AudioFile, text: str, bullet_points: int, conclusion: bool) -> str: if audio_file: text = transcript_audio(audio_file) summary = summarize_text(text, bullet_points, conclusion) return summary # make a simeple interface, where the user can input a text and get a summary or input an audio file and get a transcript and a summary iface = gr.Interface( fn=summarize_text, inputs=[ gr.components.Audio(label="Audio file"), gr.components.Textbox(lines=5, label="Text"), gr.components.Slider(minimum=1, maximum=10, value=5, step=1, label="Number of bullet points"), gr.components.Checkbox(label="Add conclusion"), ], outputs=gr.components.Textbox(label="Summary") ) iface.launch()