Spaces:

spedrox-sac
/

Llama3.2-1B

Sleeping

Llama3.2-1B / app.py

Update app.py

54779db verified 7 months ago

1.05 kB

	import streamlit as st
	from transformers import pipeline
	from huggingface_hub import login
	import os
	from dotenv import load_dotenv

	# Replace 'your_token_here' with your actual Hugging Face token
	token = os.getenv('hf_token')

	# Log in using the token
	login(token)

	# Initialize the text generation pipeline with optimizations
	pipe = pipeline(
	"text-generation",
	model="meta-llama/Llama-3.2-1B",
	device=-1, # Ensure it runs on CPU
	use_fast=True, # Use fast tokenizer
	)

	# Streamlit app
	st.title("Llama3.2-1B")

	# Text input from the user
	user_input = st.text_input("Enter your message:", "Delete this and write your query?")

	# Generate text when the button is clicked
	if st.button("Generate"):
	messages = [{"role": "user", "content": user_input}]
	# Reduce max_new_tokens for faster generation
	output = pipe(messages, max_new_tokens=150) # Adjust as needed for speed
	generated_text = output[0]['generated_text']

	# Display the generated text
	st.write("Generated Response:")
	st.write(generated_text)