from transformers import AutoTokenizer, AutoModelForCausalLM import transformers import torch import streamlit as st import re model_id = "google/gemma-1.1-2b-it" dtype = torch.bfloat16 tokenizer = AutoTokenizer.from_pretrained(model_id) model = AutoModelForCausalLM.from_pretrained( model_id, #device_map="cpu", torch_dtype=dtype, ) st.title("💬 Chatbot") st.caption("🚀 A streamlit chatbot powered by Google's Gemma") # Initialize chat history if 'messages' not in st.session_state: st.session_state['messages'] = [] #[{"role": "assistant", "content": "How can I help you?"}] # Display chat messages from history on app rerun for messasge in st.session_state.messages: st.chat_message(messasge["role"]).write(messasge["content"]) # React to user input if prompt := st.chat_input(): # Display user message in chat message container st.chat_message("user").write(prompt) # Add user message to chat history st.session_state.messages.append({"role": "user", "content": prompt}) messages=st.session_state.messages text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True) ##Get response to the message using client inputs = tokenizer.encode(text, add_special_tokens=False, return_tensors="pt") outputs = model.generate(input_ids=inputs, max_new_tokens=150) msg = tokenizer.decode(outputs[0]) #output[0]['generated_text'] msg = re.sub(r'<.*?>', '', msg) # Display assistant response in chat message container st.chat_message("assistant").write(msg) # Add assistant response to chat history st.session_state.messages.append({"role": "assistant", "content": msg})