 # Install Dependencies

In [None]:
!pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu117 --upgrade

In [None]:
!pip install langchain einops accelerate transformers bitsandbytes

# Import Dependencies

In [10]:
from langchain import HuggingFacePipeline
from langchain import PromptTemplate, LLMChain
from transformers import AutoTokenizer, AutoModelForCausalLM
import transformers
import os 
import torch

 from .autonotebook import tqdm as notebook_tqdm


In [11]:
# Check if cuda is available 
torch.cuda.is_available()

True

# Build the Pipeline

In [None]:
# Define Model ID
model_id = "tiiuae/falcon-40b-instruct"
# Load Tokenizer
tokenizer = AutoTokenizer.from_pretrained(model_id)
# Load Model 
model = AutoModelForCausalLM.from_pretrained(model_id, cache_dir='./workspace/', 
 torch_dtype=torch.bfloat16, trust_remote_code=True, device_map="auto", offload_folder="offload")
# Set PT model to inference mode
model.eval()
# Build HF Transformers pipeline 
pipeline = transformers.pipeline(
 "text-generation", 
 model=model,
 tokenizer=tokenizer,
 device_map="auto",
 max_length=400,
 do_sample=True,
 top_k=10,
 num_return_sequences=1,
 eos_token_id=tokenizer.eos_token_id
)

In [None]:
# Test out the pipeline
pipeline('who is kim kardashian?')

# Pass it to Langchain

In [None]:
# Setup prompt template
template = PromptTemplate(input_variables=['input'], template='{input}') 
# Pass hugging face pipeline to langchain class
llm = HuggingFacePipeline(pipeline=pipeline) 
# Build stacked LLM chain i.e. prompt-formatting + LLM
chain = LLMChain(llm=llm, prompt=template)

In [None]:
# Test LLMChain 
response = chain.run('who is kim kardashian?')

# Build Gradio App

In [None]:
# Install Gradio for the UI component
!pip install gradio

In [None]:
# Import gradio for UI
import gradio as gr

In [None]:
# Create generate function - this will be called when a user runs the gradio app 
def generate(prompt): 
 # The prompt will get passed to the LLM Chain!
 return chain.run(prompt)
 # And will return responses 

In [None]:
# Define a string variable to hold the title of the app
title = '🦜🔗 Falcon-40b-Instruct'
# Define another string variable to hold the description of the app
description = 'This application demonstrates the use of the open-source `Falcon-40b-Instruct` LLM.'
# pls subscribe 🙏

In [None]:
# Build gradio interface, define inputs and outputs...just text in this
gr.Interface(fn=generate, inputs=["text"], outputs=["text"], 
 # Pass through title and description
 title=title, description=description, 
 # Set theme and launch parameters
 theme='finlaymacklon/boxy_violet').launch(server_port=8080, share=True)