Spaces:
Sleeping
Sleeping
File size: 3,070 Bytes
6d509cf f14d902 cc99df6 f84d0bd f14d902 b4e5e31 f14d902 4b0c842 cc99df6 4b0c842 cc99df6 f84d0bd cc99df6 4b0c842 f14d902 4b0c842 cc99df6 4b0c842 f84d0bd cc99df6 4b0c842 cc99df6 f14d902 4b0c842 f14d902 4b0c842 f14d902 2db548f cc99df6 2db548f cc99df6 f14d902 cc99df6 f14d902 60f22cf |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 |
#Praise Jesus
#Stable version working with Llama but not satisfied with poor output
import gradio as gr
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
import torch
import docx2txt
import os
# Authenticate with Hugging Face using an environment variable token if set
hf_token = os.getenv('HF_TOKEN')
if hf_token:
from huggingface_hub import login
login(hf_token)
# Whisper model for audio transcription
whisper_model = pipeline("automatic-speech-recognition", model="openai/whisper-large")
# LLaMA 3.2 model for text processing
llama_model_id = "meta-llama/Llama-3.2-3B-Instruct"
tokenizer = AutoTokenizer.from_pretrained(llama_model_id, token=hf_token)
llama_model = AutoModelForCausalLM.from_pretrained(llama_model_id, torch_dtype=torch.bfloat16, token=hf_token)
# Function to generate response using LLaMA 3.2 model
def get_llama_response(input_text):
# Ensure input is detailed enough
if len(input_text.split()) < 10:
return "Please provide a more detailed user story to help generate relevant needs and wants."
# Define prompt for LLaMA model
prompt = f"""
Based on the user story "{input_text}", extract any unarticulated needs and wants.
Only provide essential needs and wants directly relevant to the given story.
Do not speculate or over-extrapolate.
"""
# Process the prompt with LLaMA 3.2
inputs = tokenizer(prompt, return_tensors="pt")
llama_output = llama_model.generate(**inputs, max_new_tokens=100)
response_text = tokenizer.decode(llama_output[0], skip_special_tokens=True)
return response_text
# Main processing function for Gradio interface
def process_input(user_story=None, user_audio=None, user_file=None):
# Process audio input if provided
if user_audio:
transcription = whisper_model(user_audio)["text"]
user_story = transcription
# Process file input if provided and if text is empty
if user_file and not user_story:
user_story = docx2txt.process(user_file)
# Ensure there's text to process
if not user_story:
return "Please provide a user story, an audio file, or upload a Word file."
# Generate response with LLaMA 3.2
llama_response = get_llama_response(user_story)
return f"LLaMA Output:\n{llama_response}"
# Gradio interface with text, audio, and file inputs
interface = gr.Interface(
fn=process_input,
inputs=[
gr.Textbox(label="User Story (Text Input)", placeholder="Enter your user story here..."),
gr.Audio(type="filepath", label="User Story (Audio Input)"),
gr.File(label="Upload Word File (.docx)") # Removed `optional=True`
],
outputs="text",
title="Multimodal Needs & Wants Extractor",
description="**Author:** VictorDaniel\n\nEnter a detailed user story or upload an audio/Word file to extract the unarticulated needs and wants.",
examples=[
["The user often speaks about wanting to improve their health but is hesitant to join a gym."]
]
)
# Launch the Gradio app
interface.launch()
#app2 |