Llama3.2-1B / app.py
spedrox-sac's picture
Update app.py
54779db verified
import streamlit as st
from transformers import pipeline
from huggingface_hub import login
import os
from dotenv import load_dotenv
# Replace 'your_token_here' with your actual Hugging Face token
token = os.getenv('hf_token')
# Log in using the token
login(token)
# Initialize the text generation pipeline with optimizations
pipe = pipeline(
"text-generation",
model="meta-llama/Llama-3.2-1B",
device=-1, # Ensure it runs on CPU
use_fast=True, # Use fast tokenizer
)
# Streamlit app
st.title("Llama3.2-1B")
# Text input from the user
user_input = st.text_input("Enter your message:", "Delete this and write your query?")
# Generate text when the button is clicked
if st.button("Generate"):
messages = [{"role": "user", "content": user_input}]
# Reduce max_new_tokens for faster generation
output = pipe(messages, max_new_tokens=150) # Adjust as needed for speed
generated_text = output[0]['generated_text']
# Display the generated text
st.write("Generated Response:")
st.write(generated_text)