Ankitajadhav commited on
Commit
9cc7e25
·
verified ·
1 Parent(s): e575368

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +56 -0
app.py ADDED
@@ -0,0 +1,56 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # import packages
2
+ __import__('pysqlite3')
3
+ import sys
4
+ sys.modules['sqlite3'] = sys.modules.pop('pysqlite3')
5
+ from sentence_transformers import SentenceTransformer
6
+ import chromadb
7
+ from datasets import load_dataset
8
+ from gpt4all import GPT4All
9
+
10
+ # Embedding vector
11
+ class VectorStore:
12
+ def __init__(self, collection_name):
13
+ # Initialize the embedding model
14
+ self.embedding_model = SentenceTransformer('sentence-transformers/multi-qa-MiniLM-L6-cos-v1')
15
+ self.chroma_client = chromadb.Client()
16
+ self.collection = self.chroma_client.create_collection(name=collection_name)
17
+
18
+ # Method to populate the vector store with embeddings from a dataset
19
+ def populate_vectors(self, dataset):
20
+ # Select the text columns to concatenate
21
+ title = dataset['train']['title_cleaned'][:5000] # Limiting to 100 examples for the demo
22
+ recipe = dataset['train']['recipe_new'][:5000]
23
+ meal_type = dataset['train']['meal_type'][:5000]
24
+ allergy = dataset['train']['allergy_type'][:5000]
25
+ ingredients_alternative = dataset['train']['ingredients_alternatives'][:5000]
26
+
27
+ # Concatenate the text from both columns
28
+ texts = [f"{tit} {rep} {meal} {alle} {ingr} " for tit, rep, meal,alle, ingr in zip(title,recipe,meal_type,allergy,ingredients_alternative)]
29
+ for i, item in enumerate(texts):
30
+ embeddings = self.embedding_model.encode(item).tolist()
31
+ self.collection.add(embeddings=[embeddings], documents=[item], ids=[str(i)])
32
+
33
+ # # Method to search the ChromaDB collection for relevant context based on a query
34
+ def search_context(self, query, n_results=1):
35
+ query_embeddings = self.embedding_model.encode(query).tolist()
36
+ return self.collection.query(query_embeddings=query_embeddings, n_results=n_results)
37
+
38
+
39
+ # importing dataset hosted on huggingface
40
+ # dataset details - https://huggingface.co/datasets/Thefoodprocessor/recipe_new_with_features_full
41
+ dataset = load_dataset('Thefoodprocessor/recipe_new_with_features_full')
42
+
43
+ # create a vector embedding
44
+ vector_store = VectorStore("embedding_vector")
45
+ vector_store.populate_vectors(dataset)
46
+
47
+
48
+ # loading gpt4all language model
49
+ # load model Chat based model mistral-7b-openorca.gguf2.Q4_0.gguf
50
+ # detail about gpt4all and model information - https://gpt4all.io/index.html
51
+ model_name = 'Meta-Llama-3-8B-Instruct.Q4_0.gguf' # .gguf represents quantized model
52
+ model_path = "gpt4all"
53
+ # add path to download load the model locally, download once and load for subsequent inference
54
+ model = GPT4All(model_name=model_name, model_path=model_path,device="cuda")
55
+
56
+