ppsingh commited on
Commit
4f4f5f3
·
verified ·
1 Parent(s): 4bc18e1

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +7 -10
app.py CHANGED
@@ -66,6 +66,8 @@ def get_chunks():
66
  'language', 'start_year', 'end_year','poli_trager'], inplace=True)
67
  giz_df.rename(columns = {'project_name':'title_main','countries':'country_name',
68
  'client':'org','project_description':'description_main'}, inplace=True)
 
 
69
  df = pd.concat([projects_df,giz_df],ignore_index=True)
70
  print(df.columns)
71
 
@@ -75,12 +77,9 @@ def get_chunks():
75
  placeholder= []
76
  for i in range(len(giz_df)):
77
  placeholder.append(Document(page_content= giz_df.loc[i,'chunks'],
78
- metadata={"iati_id": giz_df.loc[i,'iati_id'],
79
- "iati_orga_id":giz_df.loc[i,'iati_orga_id'],
80
  "country_name":str(giz_df.loc[i,'country_name']),
81
- "crs_5_name": giz_df.loc[i,'crs_5_name'],
82
- "crs_3_name": giz_df.loc[i,'crs_3_name'],
83
- "sgd_pred_str":giz_df.loc[i,'sgd_pred_str'],
84
  "status":giz_df.loc[i,'status'],
85
  "title_main":giz_df.loc[i,'title_main'],}))
86
  return placeholder
@@ -112,17 +111,15 @@ def embed_chunks(chunks):
112
  encode_kwargs = {'normalize_embeddings': True},
113
  model_name='BAAI/bge-m3'
114
  )
115
- sparse_embeddings = FastEmbedSparse(model_name="Qdrant/bm25")
116
  # placeholder for collection
117
  print("starting embedding")
118
  qdrant_collections = {}
119
- qdrant_collections['iati'] = Qdrant.from_documents(
120
  chunks,
121
  embeddings,
122
- sparse_embeddings = sparse_embeddings,
123
  path="/data/local_qdrant",
124
- collection_name='iati',
125
- retrieval_mode=RetrievalMode.HYBRID,
126
  )
127
 
128
  print(qdrant_collections)
 
66
  'language', 'start_year', 'end_year','poli_trager'], inplace=True)
67
  giz_df.rename(columns = {'project_name':'title_main','countries':'country_name',
68
  'client':'org','project_description':'description_main'}, inplace=True)
69
+ giz_df['source'] = 'GIZ_WORLDWIDE'
70
+ giz_df['status'] = "None"
71
  df = pd.concat([projects_df,giz_df],ignore_index=True)
72
  print(df.columns)
73
 
 
77
  placeholder= []
78
  for i in range(len(giz_df)):
79
  placeholder.append(Document(page_content= giz_df.loc[i,'chunks'],
80
+ metadata={"id": giz_df.loc[i,'id'],
81
+ "org":giz_df.loc[i,'org'],
82
  "country_name":str(giz_df.loc[i,'country_name']),
 
 
 
83
  "status":giz_df.loc[i,'status'],
84
  "title_main":giz_df.loc[i,'title_main'],}))
85
  return placeholder
 
111
  encode_kwargs = {'normalize_embeddings': True},
112
  model_name='BAAI/bge-m3'
113
  )
114
+ #sparse_embeddings = FastEmbedSparse(model_name="Qdrant/bm25")
115
  # placeholder for collection
116
  print("starting embedding")
117
  qdrant_collections = {}
118
+ qdrant_collections['all'] = Qdrant.from_documents(
119
  chunks,
120
  embeddings,
 
121
  path="/data/local_qdrant",
122
+ collection_name='all',
 
123
  )
124
 
125
  print(qdrant_collections)