import streamlit as st from transformers import AutoTokenizer, AutoModelForCausalLM import torch # Load the model and tokenizer @st.cache_resource def load_model(): tokenizer = AutoTokenizer.from_pretrained("01-ai/Yi-6B-Chat") model = AutoModelForCausalLM.from_pretrained("01-ai/Yi-6B-Chat") return tokenizer, model tokenizer, model = load_model() st.title("Chat with AI") # User input user_input = st.text_input("You: ", "Hello, how are you?") if user_input: # Tokenize input and generate response inputs = tokenizer(user_input, return_tensors="pt") with torch.no_grad(): outputs = model.generate(inputs.input_ids, max_length=50, num_return_sequences=1) # Decode and display the response response = tokenizer.decode(outputs[0], skip_special_tokens=True) st.write(f"AI: {response}")