Spaces:
Runtime error
Runtime error
from langchain import HuggingFacePipeline | |
from langchain import PromptTemplate, LLMChain | |
from transformers import AutoTokenizer, AutoModelForCausalLM | |
import transformers | |
import os | |
import torch | |
import gradio as gr | |
# check if cuda is available | |
torch.cuda.is_available() | |
# define the model id | |
# model_id = "tiiuae/falcon-40b-instruct" | |
model_id = "tiiuae/falcon-7b-instruct" | |
# load the tokenizer | |
tokenizer = AutoTokenizer.from_pretrained(model_id) | |
# load the model | |
## params: | |
## cache_dir: Path to a directory in which a downloaded pretrained model configuration should be cached if the standard cache should not be used. \n | |
## device_map: ensures the model is moved to your GPU(s) | |
cache_dir = "./workspace/" | |
torch_dtype = torch.bfloat16 | |
trust_remote_code = True | |
device_map = "auto" | |
offload_folder = "offload" | |
model = AutoModelForCausalLM.from_pretrained(model_id, cache_dir=cache_dir, torch_dtype = torch_dtype, | |
trust_remote_code=trust_remote_code, device_map=device_map, offload_folder=offload_folder) | |
# set pt model to inference mode | |
model.eval() | |
# build the hf transformers pipeline | |
task = "text-generation" | |
max_length = 400 | |
do_sample = True | |
top_k = 10 | |
num_return_sequences = 1 | |
eos_token_id = tokenizer.eos_token_id | |
pipeline = transformers.pipeline("text-generation", model = model, tokenizer = tokenizer, | |
device_map = device_map, max_length = max_length, | |
do_sample = do_sample, top_k = top_k, | |
num_return_sequences = num_return_sequences, | |
eos_token_id = eos_token_id) | |
# setup promt template | |
template = PromptTemplate(input_variables = ['input'], template = f'{input}') | |
# pass hf pipeline to langhcain class | |
llm = HuggingFacePipeline(pipeline=pipeline) | |
# build stacked llm chain, ie prompt-formatting + llm | |
chain = LLMChain(llm=llm, prompt=template) | |
# create generate function | |
def generate(prommpt): | |
# the prompt will get passes to the llm chain | |
return chain.run(prompt) | |
# and will return responses | |
title = "Falcon 40-b-Instruct π¦ " | |
description = "Web app application using the open-source `Falcon-40b-Instruct` LLM" | |
# build gradio interface | |
gr.Interface(fn=generate, | |
input=["text"], | |
outputs=["text"], | |
title=title, | |
descrption=description).launch() | |