Spaces:

RobertCastagna
/

FIN_LLM

Sleeping

File size: 1,390 Bytes

1bbf147
 
da0e5e8
6df050a
 
c00d132
 
da0e5e8
6df050a
da0e5e8
 
 
6df050a
da0e5e8
 
 
 
 
 
6df050a
1bbf147
 
 
185e83d
1bbf147
185e83d
1bbf147
6df050a
c00d132
1bbf147
6df050a
7444219
c00d132
7444219
5dda369
c00d132
5dda369
6df050a

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
import streamlit as st

# Set the device to CUDA if available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

model_source = 10

if model_source == 1:
    #pipe = pipeline("text-generation", model="trained_models/") 
    pipe = pipeline("text-generation", model="trained_models/", device=device.index if device.type == 'cuda' else -1)

else:
    pipe = pipeline("text-generation", model="TinyLlama/TinyLlama-1.1B-Chat-v1.0", torch_dtype=torch.bfloat16, device=device.index if device.type == 'cuda' else -1)


input_text = st.text_input(label='prompt:') #st.text_input(label='prompt:')
context = st.text_input(label='provide context for the model.. who/what should it be?') #st.text_input(label='how do you want me to answer the question? ie. respond as if you are explaining to a child')

messages = [
    {
        "role": "system",
        "content": f"{context}",
    },
    {"role": "user", "content": f"{input_text}"},
]

# Prepare the prompt
prompt = pipe.tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)

if st.button("generate response"):

    # Generate a response
    outputs = pipe(prompt, max_new_tokens=250, do_sample=True, temperature=0.5, top_k=10, top_p=0.90) 

    st.write(outputs[0]["generated_text"].split('<|assistant|>')[1])