Spaces:

operablepattern
/

voice-to-cover-letter

Runtime error

File size: 1,608 Bytes

8f81d37
be9658a
dc523cd
08f0765
dc523cd
 
126bdd3
dc523cd
 
 
 
 
 
 
 
 
 
08f0765
f678f2c
08f0765
 
 
 
0c59d93
dc523cd
 
 
 
08f0765
8f81d37
d6af2ee
 
dc523cd
 
 
605b45a
dc523cd
 
 
 
 
 
 
 
 
 
2d3ef06

import gradio as gr
import torch
from transformers import pipeline
from ctransformers import AutoModelForCausalLM, AutoTokenizer

MODEL_NAME = "openai/whisper-tiny"
BATCH_SIZE = 8

device = 0 if torch.cuda.is_available() else "cpu"

pipe = pipeline(
    task="automatic-speech-recognition",
    model=MODEL_NAME,
    chunk_length_s=30,
    device=device,
)

# Set gpu_layers to the number of layers to offload to GPU. Set to 0 if no GPU acceleration is available on your system.
llm = AutoModelForCausalLM.from_pretrained("TheBloke/Mistral-7B-v0.1-GGUF", model_file="mistral-7b-v0.1.Q4_K_M.gguf", model_type="mistral", gpu_layers=0, hf=True)
tokenizer = AutoTokenizer.from_pretrained(llm)

llm_pipe = pipeline("text-generation", model=llm, tokenizer=tokenizer)

def transcribe(inputs, task = "transcribe"):
    if inputs is None:
        raise gr.Error("No audio file submitted! Please upload or record an audio file before submitting your request.")

    text = pipe(inputs, batch_size=BATCH_SIZE, generate_kwargs={"task": task}, return_timestamps=True)["text"]
    return  llm_pipe(text, max_new_tokens=256)



iface = gr.Interface(
    fn=transcribe,
    inputs=[
        gr.Audio(sources="microphone", type="filepath"),
    ],
    outputs="text",
    title="test",
    description=(
        "Transcribe long-form microphone or audio inputs with the click of a button! Demo uses the OpenAI Whisper"
        f" checkpoint [{MODEL_NAME}](https://huggingface.co/{MODEL_NAME}) and 🤗 Transformers to transcribe audio files"
        " of arbitrary length."
    ),
    allow_flagging="never",
)
iface.launch()