Mbonea commited on
Commit
934d38c
1 Parent(s): 2fcf157

did it work?

Browse files
App/Embedding/utils/Initialize.py CHANGED
@@ -1,6 +1,6 @@
1
  from langchain.embeddings import HuggingFaceEmbeddings
2
  from langchain.docstore.document import Document
3
- from langchain.vectorstores import Pinecone
4
  from pinecone import ServerlessSpec
5
  import pinecone
6
  import os
@@ -18,28 +18,29 @@ def initDocument():
18
  model_name = "thenlper/gte-base"
19
  embeddings = HuggingFaceEmbeddings(model_name=model_name)
20
 
 
 
21
  try:
22
- pc=pinecone.Pinecone(api_key=PINECONE_API_KEY, environment=PINECONE_ENV)
23
- vector_index = pc.Index(index_name)
24
- except:
25
- pc.create_index(name=index_name,
26
- dimension=768,
27
- metric="cosine",
28
- spec=ServerlessSpec(cloud="aws", region="us-east-1")
29
  )
30
-
31
-
32
- #docsearch = Pinecone.from_existing_index(index_name, embeddings)
33
 
34
- return pc
 
35
 
36
 
37
 
38
 
39
  async def delete_documents(task_id):
40
- docsearch=initDocument()
41
 
42
- docsearch.delete(
43
  filter={
44
  "task_id": {"$eq": task_id},
45
  }
@@ -68,12 +69,12 @@ def generateChunks(chunks, task_id, n=100):
68
 
69
 
70
  def search(query: str, task_id: str):
71
- docsearch=initDocument()
72
 
73
  filtering_conditions = {
74
  "task_id": {"$eq": task_id},
75
  }
76
- data =docsearch.similarity_search(query, k=3, filter=filtering_conditions)
77
  return [
78
  {"text": d.page_content, "start": d.metadata["start"], "end": d.metadata["end"]}
79
  for d in data
@@ -82,8 +83,8 @@ def search(query: str, task_id: str):
82
 
83
 
84
  def encode(temp: list[Document]):
85
- docsearch=initDocument()
86
- docsearch.add_documents(temp)
87
  # return embeddings.embed_documents(texts = [d.page_content for d in temp])
88
 
89
 
 
1
  from langchain.embeddings import HuggingFaceEmbeddings
2
  from langchain.docstore.document import Document
3
+ from langchain_pinecone import PineconeVectorStore
4
  from pinecone import ServerlessSpec
5
  import pinecone
6
  import os
 
18
  model_name = "thenlper/gte-base"
19
  embeddings = HuggingFaceEmbeddings(model_name=model_name)
20
 
21
+ pc = pinecone.Pinecone(api_key=PINECONE_API_KEY, environment=PINECONE_ENV)
22
+
23
  try:
24
+ index = pc.Index(index_name)
25
+ except pinecone.core.client.exceptions.NotFoundException:
26
+ pc.create_index(
27
+ name=index_name,
28
+ dimension=768,
29
+ metric="cosine",
30
+ spec=ServerlessSpec(cloud="aws", region="us-east-1")
31
  )
32
+ index = pc.Index(index_name)
 
 
33
 
34
+ vector_store = PineconeVectorStore(index=index, embedding=embeddings)
35
+ return vector_store
36
 
37
 
38
 
39
 
40
  async def delete_documents(task_id):
41
+ vector_store = initDocument()
42
 
43
+ vector_store.delete(
44
  filter={
45
  "task_id": {"$eq": task_id},
46
  }
 
69
 
70
 
71
  def search(query: str, task_id: str):
72
+ vector_store = initDocument()
73
 
74
  filtering_conditions = {
75
  "task_id": {"$eq": task_id},
76
  }
77
+ data =vector_store.similarity_search(query, k=3, filter=filtering_conditions)
78
  return [
79
  {"text": d.page_content, "start": d.metadata["start"], "end": d.metadata["end"]}
80
  for d in data
 
83
 
84
 
85
  def encode(temp: list[Document]):
86
+ vector_store = initDocument()
87
+ vector_store.add_documents(temp)
88
  # return embeddings.embed_documents(texts = [d.page_content for d in temp])
89
 
90
 
requirements.txt CHANGED
@@ -1,6 +1,6 @@
1
  asyncpg==0.27.0
2
  databases==0.7.0
3
- fastapi==0.92.0
4
  orm==0.3.
5
  transformers
6
  faster-whisper
@@ -13,7 +13,7 @@ typesystem==0.3.1
13
  Werkzeug==2.2.2
14
  passlib # for password hashing
15
  pydantic[email]
16
- uvicorn==0.21.1
17
  ujson
18
  yt-dlp
19
  psutil
@@ -27,12 +27,12 @@ telethon
27
  fastapi-jwt-auth
28
  bcrypt
29
  aiomysql
30
-
31
  asyncpg
32
  sentence_transformers
33
  google-generativeai
34
  openai
35
  tiktoken
36
- langchain==0.0.336
37
  mysqlclient
38
- pinecone-client[grpc]==3.1.0
 
1
  asyncpg==0.27.0
2
  databases==0.7.0
3
+ fastapi
4
  orm==0.3.
5
  transformers
6
  faster-whisper
 
13
  Werkzeug==2.2.2
14
  passlib # for password hashing
15
  pydantic[email]
16
+ uvicorn
17
  ujson
18
  yt-dlp
19
  psutil
 
27
  fastapi-jwt-auth
28
  bcrypt
29
  aiomysql
30
+ langchain-pinecone
31
  asyncpg
32
  sentence_transformers
33
  google-generativeai
34
  openai
35
  tiktoken
36
+ langchain
37
  mysqlclient
38
+ pinecone-client[grpc]