import os import streamlit as st from huggingface_hub import login from transformers import AutoModelForCausalLM, AutoTokenizer from PIL import Image import requests import torch # Step 1: Log in to Hugging Face with your access token from secrets huggingface_token = os.getenv("HUGGINGFACE_TOKEN") # Fetch the token from environment if huggingface_token: login(token=huggingface_token) # Authenticate using the token else: st.error("Hugging Face token not found. Please set it in the Secrets section.") # Step 2: Load the model and tokenizer model_name = "meta-llama/Llama-3.2-11B-Vision-Instruct" # Adjust if needed try: tokenizer = AutoTokenizer.from_pretrained(model_name) model = AutoModelForCausalLM.from_pretrained(model_name) st.success("Model loaded successfully!") except Exception as e: st.error(f"Error loading model: {str(e)}") # Step 3: Create a simple Streamlit app def main(): st.title("Llama 3.2 11B Vision Model") st.write("Upload an image and enter a prompt to generate output.") # Upload image image_file = st.file_uploader("Upload an image", type=["jpg", "jpeg", "png"]) prompt = st.text_area("Enter your prompt here:") if st.button("Generate Output"): if image_file and prompt: # Load image image = Image.open(image_file) st.image(image, caption="Uploaded Image", use_column_width=True) # Preprocess the image if needed (convert to tensor, etc.) # This depends on how the model expects the image input # Example of converting image to a format suitable for the model # Note: Adjust this part based on your model's requirements. # Here, we're just using a placeholder for the model input. # You might need to resize or normalize the image based on the model's requirements. # For example: # image_tensor = preprocess_image(image) try: # Prepare the input for the model inputs = tokenizer(prompt, return_tensors='pt') # Perform inference # Adjust the input format for the model accordingly # Here we assume the model takes a prompt and an image (adjust as necessary) with torch.no_grad(): model_output = model.generate(**inputs) # Pass image tensor if required # Decode the output output_text = tokenizer.decode(model_output[0], skip_special_tokens=True) st.write("Generated Output:", output_text) except Exception as e: st.error(f"Error during prediction: {str(e)}") else: st.warning("Please upload an image and enter a prompt.") if __name__ == "__main__": main()