import streamlit as st import torch from transformers import AutoTokenizer, AutoModelForCausalLM, set_seed # Initialize the model and tokenizer set_seed(1234) model_id = "Vikalp026var/gemma-2b-it-pythoncodegen" tokenizer = AutoTokenizer.from_pretrained(model_id) model = AutoModelForCausalLM.from_pretrained(model_id, device_map="cuda") # Initialize chat history chat = [] st.title('Chat with a Language Model') st.write("This is a simple chatbot using a fine-tuned model on GPT-2. Type 'exit' to end the conversation.") # Text input user_input = st.text_input("User:", key="user_input") # Function to handle chat def handle_chat(user_input): if user_input: user_turn = {"role": "user", "content": user_input} chat.append(user_turn) token_inputs = tokenizer.apply_chat_template(chat, tokenize=True, return_tensors="pt", add_generation_prompt=True).to("cuda") token_outputs = model.generate(input_ids=token_inputs, do_sample=True, max_new_tokens=500, temperature=.5) new_tokens = token_outputs[0][token_inputs.shape[-1]:] decoded_output = tokenizer.decode(new_tokens, skip_special_tokens=True) model_turn = {"role": "model", "content": decoded_output} chat.append(model_turn) return decoded_output return "" # Display model response if user_input.lower() == "exit": st.stop() else: response = handle_chat(user_input) st.text_area("Model:", value=response, height=200, key="model_response") # Run the Streamlit app if __name__ == '__main__': st.run()