Spaces:
Runtime error
Runtime error
Ankitajadhav
commited on
Create app.py
Browse files
app.py
ADDED
@@ -0,0 +1,56 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# import packages
|
2 |
+
__import__('pysqlite3')
|
3 |
+
import sys
|
4 |
+
sys.modules['sqlite3'] = sys.modules.pop('pysqlite3')
|
5 |
+
from sentence_transformers import SentenceTransformer
|
6 |
+
import chromadb
|
7 |
+
from datasets import load_dataset
|
8 |
+
from gpt4all import GPT4All
|
9 |
+
|
10 |
+
# Embedding vector
|
11 |
+
class VectorStore:
|
12 |
+
def __init__(self, collection_name):
|
13 |
+
# Initialize the embedding model
|
14 |
+
self.embedding_model = SentenceTransformer('sentence-transformers/multi-qa-MiniLM-L6-cos-v1')
|
15 |
+
self.chroma_client = chromadb.Client()
|
16 |
+
self.collection = self.chroma_client.create_collection(name=collection_name)
|
17 |
+
|
18 |
+
# Method to populate the vector store with embeddings from a dataset
|
19 |
+
def populate_vectors(self, dataset):
|
20 |
+
# Select the text columns to concatenate
|
21 |
+
title = dataset['train']['title_cleaned'][:5000] # Limiting to 100 examples for the demo
|
22 |
+
recipe = dataset['train']['recipe_new'][:5000]
|
23 |
+
meal_type = dataset['train']['meal_type'][:5000]
|
24 |
+
allergy = dataset['train']['allergy_type'][:5000]
|
25 |
+
ingredients_alternative = dataset['train']['ingredients_alternatives'][:5000]
|
26 |
+
|
27 |
+
# Concatenate the text from both columns
|
28 |
+
texts = [f"{tit} {rep} {meal} {alle} {ingr} " for tit, rep, meal,alle, ingr in zip(title,recipe,meal_type,allergy,ingredients_alternative)]
|
29 |
+
for i, item in enumerate(texts):
|
30 |
+
embeddings = self.embedding_model.encode(item).tolist()
|
31 |
+
self.collection.add(embeddings=[embeddings], documents=[item], ids=[str(i)])
|
32 |
+
|
33 |
+
# # Method to search the ChromaDB collection for relevant context based on a query
|
34 |
+
def search_context(self, query, n_results=1):
|
35 |
+
query_embeddings = self.embedding_model.encode(query).tolist()
|
36 |
+
return self.collection.query(query_embeddings=query_embeddings, n_results=n_results)
|
37 |
+
|
38 |
+
|
39 |
+
# importing dataset hosted on huggingface
|
40 |
+
# dataset details - https://huggingface.co/datasets/Thefoodprocessor/recipe_new_with_features_full
|
41 |
+
dataset = load_dataset('Thefoodprocessor/recipe_new_with_features_full')
|
42 |
+
|
43 |
+
# create a vector embedding
|
44 |
+
vector_store = VectorStore("embedding_vector")
|
45 |
+
vector_store.populate_vectors(dataset)
|
46 |
+
|
47 |
+
|
48 |
+
# loading gpt4all language model
|
49 |
+
# load model Chat based model mistral-7b-openorca.gguf2.Q4_0.gguf
|
50 |
+
# detail about gpt4all and model information - https://gpt4all.io/index.html
|
51 |
+
model_name = 'Meta-Llama-3-8B-Instruct.Q4_0.gguf' # .gguf represents quantized model
|
52 |
+
model_path = "gpt4all"
|
53 |
+
# add path to download load the model locally, download once and load for subsequent inference
|
54 |
+
model = GPT4All(model_name=model_name, model_path=model_path,device="cuda")
|
55 |
+
|
56 |
+
|