valencar's picture
Update app.py
133cb70 verified
import streamlit as st
# Load model directly
# from transformers import AutoModel, AutoModelForCausalLM
from huggingface_hub import login
import os
access_token = os.getenv('HF_TOKEN')
login(token = access_token)
file = 'llama-2-7b.Q5_0.gguf'
from llama_cpp import Llama
llm = Llama(
model_path="./" + file,
# n_gpu_layers=-1, # Uncomment to use GPU acceleration
# seed=1337, # Uncomment to set a specific seed
# n_ctx=2048, # Uncomment to increase the context window
)
prompt = "Q: Name the planets in the solar system? A: "
output = llm(
prompt, # Prompt
max_tokens=32, # Generate up to 32 tokens, set to None to generate up to the end of the context window
stop=["Q:", "\n"], # Stop generating just before the model would generate a new question
echo=True # Echo the prompt back in the output
) # Generate a completion, can also call create_completion
print(output)
# NO_GPU = 0
# GPU_LAYERS = 50
# llm = AutoModelForCausalLM.from_pretrained(file, model_type="llama", gpu_layers=NO_GPU)
# # model = AutoModelForCausalLM.from_pretrained("valencar/llamm",
# # model_file=file, model_type="llama", gpu_layers=NO_GPU)
# # access_token = os.getenv('HF_TOKEN2')
# # login(token = access_token)
# prompt = "AI is going to"
with st.container():
st.write('\n\n')
st.write(prompt)
answer = output
st.write(answer)
print(answer)