File size: 4,601 Bytes
c9fcb62 1572197 c9fcb62 a9fb2a7 0a69afb f67a7e7 0a69afb 1572197 0a69afb 00a0b15 0a69afb 00a0b15 0a69afb e33935a 0a69afb f67a7e7 e33935a c9fcb62 e33935a 0a69afb e33935a c9fcb62 7bd401e c9fcb62 288d500 c9fcb62 f624e9e c9fcb62 e4b2b11 c9fcb62 39450b7 c9fcb62 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 |
import streamlit as st
#LLAMA prep
from huggingface_hub import login
import torch
import transformers
from transformers import AutoTokenizer, AutoModelForCausalLM
from langchain import HuggingFacePipeline
from langchain import PromptTemplate, LLMChain
@st.cache_resource
def load_llm():
global pipe, llm
login("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")
tokenizer = AutoTokenizer.from_pretrained("meta-llama/Llama-2-13b-chat-hf",
use_auth_token=True,)
model = AutoModelForCausalLM.from_pretrained("meta-llama/Llama-2-13b-chat-hf",
device_map='auto',
torch_dtype=torch.float16,
use_auth_token=True,
# load_in_8bit=True,
# load_in_4bit=True
)
# Use a pipeline for later
from transformers import pipeline
pipe = pipeline("text-generation",
model=model,
tokenizer= tokenizer,
torch_dtype=torch.bfloat16,
device_map="auto",
max_new_tokens = 512,
do_sample=True,
top_k=30,
num_return_sequences=1,
eos_token_id=tokenizer.eos_token_id
)
llm = HuggingFacePipeline(pipeline = pipe, model_kwargs = {'temperature':0})
return pipe, llm
pipe, llm = load_llm()
import json
import textwrap
B_INST, E_INST = "[INST]", "[/INST]"
B_SYS, E_SYS = "<<SYS>>\n", "\n<</SYS>>\n\n"
DEFAULT_SYSTEM_PROMPT = """\
You are a helpful, respectful and honest assistant. Always answer as helpfully as possible, while being safe. Your answers should not include any harmful, unethical, racist, sexist, toxic, dangerous, or illegal content. Please ensure that your responses are socially unbiased and positive in nature.
If a question does not make any sense, or is not factually coherent, explain why instead of answering something not correct. If you don't know the answer to a question, please don't share false information."""
def get_prompt(instruction, new_system_prompt=DEFAULT_SYSTEM_PROMPT ):
SYSTEM_PROMPT = B_SYS + new_system_prompt + E_SYS
prompt_template = B_INST + SYSTEM_PROMPT + instruction + E_INST
return prompt_template
def cut_off_text(text, prompt):
cutoff_phrase = prompt
index = text.find(cutoff_phrase)
if index != -1:
return text[:index]
else:
return text
def remove_substring(string, substring):
return string.replace(substring, "")
def generate(text):
prompt = get_prompt(text)
with torch.autocast('cuda', dtype=torch.bfloat16):
inputs = tokenizer(prompt, return_tensors="pt").to('cuda')
outputs = model.generate(**inputs,
max_new_tokens=512,
eos_token_id=tokenizer.eos_token_id,
pad_token_id=tokenizer.eos_token_id,
)
final_outputs = tokenizer.batch_decode(outputs, skip_special_tokens=True)[0]
final_outputs = cut_off_text(final_outputs, '</s>')
final_outputs = remove_substring(final_outputs, prompt)
return final_outputs#, outputs
def parse_text(text):
wrapped_text = textwrap.fill(text, width=100)
print(wrapped_text +'\n\n')
# return assistant_text
return wrapped_text
def answer(context, question):
global llm
instruction = f"conversation: '''{context}'''"+"\n based on the provided conversation in triple quotes answer next question.\n Question: {text}"
system_prompt = "You are an expert and answer any question based on conversation. You analys the conversation in light of the question then you answer with yes, no or not clear only. You only output one or two words"
template = get_prompt(instruction, system_prompt)
print(template)
prompt = PromptTemplate(template=template, input_variables=["text"])
llm_chain = LLMChain(prompt=prompt, llm=llm)
output = llm_chain.run(question)
return parse_text(output)
question = st.sidebar.text_input('Question', 'Can she answer')
context = st.text_area('Context', 'conversation')
if st.sidebar.button('Answer'):
outputs = "none"
outputs = answer(context, question)
st.sidebar.write(f"Answer is {outputs}")
|