Spaces:
Sleeping
Sleeping
File size: 1,390 Bytes
1bbf147 da0e5e8 6df050a c00d132 da0e5e8 6df050a da0e5e8 6df050a da0e5e8 6df050a 1bbf147 185e83d 1bbf147 185e83d 1bbf147 6df050a c00d132 1bbf147 6df050a 7444219 c00d132 7444219 5dda369 c00d132 5dda369 6df050a |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 |
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
import streamlit as st
# Set the device to CUDA if available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model_source = 10
if model_source == 1:
#pipe = pipeline("text-generation", model="trained_models/")
pipe = pipeline("text-generation", model="trained_models/", device=device.index if device.type == 'cuda' else -1)
else:
pipe = pipeline("text-generation", model="TinyLlama/TinyLlama-1.1B-Chat-v1.0", torch_dtype=torch.bfloat16, device=device.index if device.type == 'cuda' else -1)
input_text = st.text_input(label='prompt:') #st.text_input(label='prompt:')
context = st.text_input(label='provide context for the model.. who/what should it be?') #st.text_input(label='how do you want me to answer the question? ie. respond as if you are explaining to a child')
messages = [
{
"role": "system",
"content": f"{context}",
},
{"role": "user", "content": f"{input_text}"},
]
# Prepare the prompt
prompt = pipe.tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
if st.button("generate response"):
# Generate a response
outputs = pipe(prompt, max_new_tokens=250, do_sample=True, temperature=0.5, top_k=10, top_p=0.90)
st.write(outputs[0]["generated_text"].split('<|assistant|>')[1])
|