Spaces:
Runtime error
Runtime error
File size: 5,846 Bytes
22cfb6e 43a8cd8 d592f4d 43a8cd8 8838db8 43a8cd8 40167a9 22cfb6e 43a8cd8 9cc7e25 43a8cd8 9cc7e25 b1f94d7 2fe908e b1f94d7 5ecd97e 9cc7e25 43a8cd8 9cc7e25 43a8cd8 d592f4d 9cc7e25 d592f4d 2cbda23 8324d73 2cbda23 43a8cd8 40167a9 bda7944 43a8cd8 40167a9 2cbda23 40167a9 43a8cd8 99cdb28 9ccd468 40167a9 43a8cd8 9ccd468 28fb49c 43a8cd8 8324d73 43a8cd8 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 |
import gradio as gr
import copy
from llama_cpp import Llama
from huggingface_hub import hf_hub_download
import chromadb
from datasets import load_dataset
from sentence_transformers import SentenceTransformer
# Initialize the Llama model
llm = Llama(
# model_path=hf_hub_download(
# repo_id="microsoft/Phi-3-mini-4k-instruct-gguf",
# filename="Phi-3-mini-4k-instruct-q4.gguf",
# ),
model_path=hf_hub_download(
repo_id="Ankitajadhav/Phi-3-mini-4k-instruct-q4.gguf",
filename="Phi-3-mini-4k-instruct-q4.gguf",
),
n_ctx=2048,
n_gpu_layers=50, # Adjust based on your VRAM
)
# Initialize ChromaDB Vector Store
class VectorStore:
def __init__(self, collection_name):
self.embedding_model = SentenceTransformer('sentence-transformers/multi-qa-MiniLM-L6-cos-v1')
self.chroma_client = chromadb.Client()
self.collection = self.chroma_client.create_collection(name=collection_name)
# def populate_vectors(self, texts):
# embeddings = self.embedding_model.encode(texts, batch_size=32).tolist()
# for text, embedding in zip(texts, embeddings, ids):
# self.collection.add(embeddings=[embedding], documents=[text], ids=[doc_id])
# Method to populate the vector store with embeddings from a dataset
def populate_vectors(self, dataset):
# Select the text columns to concatenate
# title = dataset['train']['title_cleaned'][:1000] # Limiting to 100 examples for the demo
recipe = dataset['train']['recipe_new'][:1000]
allergy = dataset['train']['allergy_type'][:1000]
ingredients = dataset['train']['ingredients_alternatives'][:1000]
# Concatenate the text from both columns
texts = [f"{rep} {ingr} {alle}" for rep, ingr,alle in zip(recipe, ingredients,allergy)]
for i, item in enumerate(texts):
embeddings = self.embedding_model.encode(item).tolist()
self.collection.add(embeddings=[embeddings], documents=[item], ids=[str(i)])
def search_context(self, query, n_results=1):
query_embedding = self.embedding_model.encode([query]).tolist()
results = self.collection.query(query_embeddings=query_embedding, n_results=n_results)
return results['documents']
# Example initialization (assuming you've already populated the vector store)
dataset = load_dataset('Thefoodprocessor/recipe_new_with_features_full')
vector_store = VectorStore("embedding_vector")
vector_store.populate_vectors(dataset)
def format_recipe(input_string):
# Clean up the input
cleaned_text = input_string.strip("[]'").replace('\\n', '\n')
# Split the text into lines
lines = cleaned_text.split('\n')
# Initialize sections
title = lines[0]
ingredients = []
instructions = []
substitutions = []
# Extract ingredients and instructions
in_instructions = False
for line in lines[1:]:
if line.startswith("Instructions:"):
in_instructions = True
continue
if in_instructions:
if line.strip(): # Check for non-empty lines
instructions.append(line.strip())
else:
if line.strip(): # Check for non-empty lines
ingredients.append(line.strip())
# Gather substitutions from the last few lines
for line in lines:
if ':' in line:
substitutions.append(line.strip())
# Format output
formatted_recipe = f"## {title}\n\n### Ingredients:\n"
formatted_recipe += '\n'.join(f"- {item}" for item in ingredients) + "\n\n"
formatted_recipe += "### Instructions:\n" + '\n'.join(f"{i + 1}. {line}" for i, line in enumerate(instructions)) + "\n\n"
if substitutions:
formatted_recipe += "### Substitutions:\n" + '\n'.join(f"- **{line.split(':')[0].strip()}**: {line.split(':')[1].strip()}" for line in substitutions) + "\n"
return formatted_recipe
# print(formatted_recipe)
def generate_text(
message,
history: list[tuple[str, str]],
system_message,
max_tokens,
temperature,
top_p,
):
# Retrieve context from vector store
context_results = vector_store.search_context(message, n_results=1)
context = context_results[0] if context_results else ""
input_prompt = f"[INST] <<SYS>>\n{system_message}\n<</SYS>>\n\n {context}\n"
for interaction in history:
input_prompt += f"{interaction[0]} [/INST] {interaction[1]} </s><s> [INST] "
input_prompt += f"{message} [/INST] "
print("Input prompt:", input_prompt) # Debugging output
temp = ""
output = llm(
input_prompt,
temperature=temperature,
top_p=top_p,
top_k=40,
repeat_penalty=1.1,
max_tokens=max_tokens,
stop=["", " \n", "ASSISTANT:", "USER:", "SYSTEM:"],
stream=True,
)
for out in output:
temp += format_recipe(out["choices"][0]["text"])
yield temp
# Define the Gradio interface
demo = gr.ChatInterface(
generate_text,
title="llama-cpp-python on GPU with ChromaDB",
description="Running LLM with context retrieval from ChromaDB",
examples=[
["I have leftover rice, what can I make out of it?"],
["Can I make lunch for two people with this?"],
],
cache_examples=False,
retry_btn=None,
undo_btn="Delete Previous",
clear_btn="Clear",
additional_inputs=[
gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p (nucleus sampling)"),
],
)
if __name__ == "__main__":
demo.launch()
|