File size: 1,390 Bytes
1bbf147
 
da0e5e8
6df050a
 
c00d132
 
da0e5e8
6df050a
da0e5e8
 
 
6df050a
da0e5e8
 
 
 
 
 
6df050a
1bbf147
 
 
185e83d
1bbf147
185e83d
1bbf147
6df050a
c00d132
1bbf147
6df050a
7444219
c00d132
7444219
5dda369
c00d132
5dda369
6df050a
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
import streamlit as st

# Set the device to CUDA if available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

model_source = 10

if model_source == 1:
    #pipe = pipeline("text-generation", model="trained_models/") 
    pipe = pipeline("text-generation", model="trained_models/", device=device.index if device.type == 'cuda' else -1)

else:
    pipe = pipeline("text-generation", model="TinyLlama/TinyLlama-1.1B-Chat-v1.0", torch_dtype=torch.bfloat16, device=device.index if device.type == 'cuda' else -1)


input_text = st.text_input(label='prompt:') #st.text_input(label='prompt:')
context = st.text_input(label='provide context for the model.. who/what should it be?') #st.text_input(label='how do you want me to answer the question? ie. respond as if you are explaining to a child')

messages = [
    {
        "role": "system",
        "content": f"{context}",
    },
    {"role": "user", "content": f"{input_text}"},
]

# Prepare the prompt
prompt = pipe.tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)

if st.button("generate response"):

    # Generate a response
    outputs = pipe(prompt, max_new_tokens=250, do_sample=True, temperature=0.5, top_k=10, top_p=0.90) 

    st.write(outputs[0]["generated_text"].split('<|assistant|>')[1])