Spaces:

RobertCastagna
/

FIN_LLM

Sleeping

FIN_LLM / app.py

Robert Castagna

format output

5dda369 over 1 year ago

1.39 kB

	import torch
	from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
	import streamlit as st

	# Set the device to CUDA if available
	device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

	model_source = 10

	if model_source == 1:
	#pipe = pipeline("text-generation", model="trained_models/")
	pipe = pipeline("text-generation", model="trained_models/", device=device.index if device.type == 'cuda' else -1)

	else:
	pipe = pipeline("text-generation", model="TinyLlama/TinyLlama-1.1B-Chat-v1.0", torch_dtype=torch.bfloat16, device=device.index if device.type == 'cuda' else -1)


	input_text = st.text_input(label='prompt:') #st.text_input(label='prompt:')
	context = st.text_input(label='provide context for the model.. who/what should it be?') #st.text_input(label='how do you want me to answer the question? ie. respond as if you are explaining to a child')

	messages = [
	{
	"role": "system",
	"content": f"{context}",
	},
	{"role": "user", "content": f"{input_text}"},
	]

	# Prepare the prompt
	prompt = pipe.tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)

	if st.button("generate response"):

	# Generate a response
	outputs = pipe(prompt, max_new_tokens=250, do_sample=True, temperature=0.5, top_k=10, top_p=0.90)

	st.write(outputs[0]["generated_text"].split('<\|assistant\|>')[1])