SenseMakingTool / app2.py
VictorDanielG's picture
Update app2.py
60f22cf verified
#Praise Jesus
#Stable version working with Llama but not satisfied with poor output
import gradio as gr
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
import torch
import docx2txt
import os
# Authenticate with Hugging Face using an environment variable token if set
hf_token = os.getenv('HF_TOKEN')
if hf_token:
from huggingface_hub import login
login(hf_token)
# Whisper model for audio transcription
whisper_model = pipeline("automatic-speech-recognition", model="openai/whisper-large")
# LLaMA 3.2 model for text processing
llama_model_id = "meta-llama/Llama-3.2-3B-Instruct"
tokenizer = AutoTokenizer.from_pretrained(llama_model_id, token=hf_token)
llama_model = AutoModelForCausalLM.from_pretrained(llama_model_id, torch_dtype=torch.bfloat16, token=hf_token)
# Function to generate response using LLaMA 3.2 model
def get_llama_response(input_text):
# Ensure input is detailed enough
if len(input_text.split()) < 10:
return "Please provide a more detailed user story to help generate relevant needs and wants."
# Define prompt for LLaMA model
prompt = f"""
Based on the user story "{input_text}", extract any unarticulated needs and wants.
Only provide essential needs and wants directly relevant to the given story.
Do not speculate or over-extrapolate.
"""
# Process the prompt with LLaMA 3.2
inputs = tokenizer(prompt, return_tensors="pt")
llama_output = llama_model.generate(**inputs, max_new_tokens=100)
response_text = tokenizer.decode(llama_output[0], skip_special_tokens=True)
return response_text
# Main processing function for Gradio interface
def process_input(user_story=None, user_audio=None, user_file=None):
# Process audio input if provided
if user_audio:
transcription = whisper_model(user_audio)["text"]
user_story = transcription
# Process file input if provided and if text is empty
if user_file and not user_story:
user_story = docx2txt.process(user_file)
# Ensure there's text to process
if not user_story:
return "Please provide a user story, an audio file, or upload a Word file."
# Generate response with LLaMA 3.2
llama_response = get_llama_response(user_story)
return f"LLaMA Output:\n{llama_response}"
# Gradio interface with text, audio, and file inputs
interface = gr.Interface(
fn=process_input,
inputs=[
gr.Textbox(label="User Story (Text Input)", placeholder="Enter your user story here..."),
gr.Audio(type="filepath", label="User Story (Audio Input)"),
gr.File(label="Upload Word File (.docx)") # Removed `optional=True`
],
outputs="text",
title="Multimodal Needs & Wants Extractor",
description="**Author:** VictorDaniel\n\nEnter a detailed user story or upload an audio/Word file to extract the unarticulated needs and wants.",
examples=[
["The user often speaks about wanting to improve their health but is hesitant to join a gym."]
]
)
# Launch the Gradio app
interface.launch()
#app2