lyimo's picture
Update app.py
2be2417
raw
history blame
2.28 kB
import os
import gradio as gr
from fastai.vision.all import *
import skimage
import copy
from llama_cpp import Llama
from huggingface_hub import hf_hub_download
# Load the FastAI vision model
learn = load_learner('export.pkl')
labels = learn.dls.vocab
# Load the Llama language model
llm = Llama(
model_path=hf_hub_download(
repo_id=os.environ.get("REPO_ID", "TheBloke/Llama-2-7B-Chat-GGML"),
filename=os.environ.get("MODEL_FILE", "llama-2-7b-chat.ggmlv3.q5_0.bin"),
),
n_ctx=2048,
n_gpu_layers=50,
)
history = []
system_message = """
You are a helpful, respectful and honest assistant. Always answer as helpfully as possible, while being safe.
"""
def generate_text(message, history):
temp = ""
input_prompt = f"[INST] <<SYS>>\n{system_message}\n<</SYS>>\n\n "
for interaction in history:
input_prompt = input_prompt + str(interaction[0]) + " [/INST] " + str(interaction[1]) + " </s><s> [INST] "
input_prompt = input_prompt + str(message) + " [/INST] "
output = llm(
input_prompt,
temperature=0.15,
top_p=0.1,
top_k=40,
repeat_penalty=1.1,
max_tokens=1024,
stop=[
"",
"",
" \n",
"ASSISTANT:",
"USER:",
"SYSTEM:",
],
stream=True,
)
for out in output:
stream = copy.deepcopy(out)
temp += stream["choices"][0]["text"]
yield temp
history.append(("USER:", message))
history.append(("ASSISTANT:", temp))
# Define the predict function for the FastAI model
def predict_with_llama_and_generate_text(img):
img = PILImage.create(img)
pred, pred_idx, probs = learn.predict(img)
detected_object = labels[pred_idx]
response = f"The system has detected {detected_object}. Do you want to know about {detected_object}?"
for llama_response in generate_text(response, history):
yield llama_response
# Define the Gradio interface
gr.Interface(
fn=predict_with_llama_and_generate_text,
inputs=gr.inputs.Image(shape=(512, 512)),
outputs=gr.outputs.Textbox(),
title="Multimodal Assistant",
description="An AI model that combines image classification with text generation.",
).launch()