File size: 2,144 Bytes
9a372d2
 
 
 
 
 
 
 
3fe0f9e
9a372d2
b7b0044
 
 
9a372d2
415cd3e
b7b0044
9a372d2
 
 
 
 
 
3fe0f9e
9a372d2
 
 
 
 
3fe0f9e
 
 
 
 
 
 
 
 
 
 
 
 
 
9a372d2
 
3fe0f9e
 
 
9a372d2
3fe0f9e
 
 
 
 
 
 
 
 
 
 
9a372d2
 
 
 
 
 
3fe0f9e
 
9a372d2
 
 
3fe0f9e
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
# -*- coding: utf-8 -*-
"""OpenAI Whisper from Hugging Face Transformers with Microsoft PHI 3 Integration"""

import gradio as gr
from transformers import pipeline
import torch
from huggingface_hub import InferenceClient
import os
import librosa

# Fetch the token from Hugging Face Secrets
HF_API_TOKEN = os.getenv("HF_API_TOKEN", "")

client = InferenceClient(
    "microsoft/phi-4",
    token=HF_API_TOKEN
)

# Check if a GPU is available and use it if possible
device = 'cuda' if torch.cuda.is_available() else 'cpu'

# Initialize the Whisper pipeline
whisper = pipeline('automatic-speech-recognition', model='openai/whisper-tiny', device=device)

# Instructions (can be set through Hugging Face Secrets or hardcoded)
instructions = os.getenv("INST", "Your default instructions here.")

def query_phi(prompt):
    print("Sending request to PHI 3 API...")
    response = ""  
    try:
        for message in client.chat_completion(
          messages=[{"role": "user", "content": f"{instructions}\n{prompt}"}],
          max_tokens=500,
          stream=True,
        ):
            response += message.choices[0].delta.content  
    except Exception as e:
        print("Error in PHI 3 API:", e)
        return "PHI 3 API Error: " + str(e)
    
    return response

def transcribe_and_query(audio):
    try:
        # Load the audio file as waveform
        audio_data, sr = librosa.load(audio, sr=16000)

        # Transcribe using Whisper
        transcription = whisper(audio_data)["text"]
        transcription = "Prompt : " + transcription

        # Query Microsoft PHI 3 with the transcribed text
        phi_response = query_phi(transcription)

        return transcription, phi_response

    except Exception as e:
        return f"Error processing audio: {str(e)}", "No response from PHI 3"

# Create Gradio interface
iface = gr.Interface(
    fn=transcribe_and_query,
    inputs=gr.Audio(type="filepath"),
    outputs=["text", "text"],
    title="Scam Call Detector with BEEP",
    description="Upload your recorded call to see if it is a scam or not.\n Stay Safe, Stay Secure."
)

# Launch the interface
iface.launch(share=True)