Spaces:
GIZ
/
Running on CPU Upgrade

prashant commited on
Commit
9dca2b8
1 Parent(s): 183851c

chnaging coherence model

Browse files
Files changed (2) hide show
  1. paramconfig.cfg +3 -3
  2. utils/ndc_explorer.py +63 -28
paramconfig.cfg CHANGED
@@ -36,11 +36,11 @@ TOP_N = 20
36
 
37
  [coherence]
38
  RETRIEVER_TOP_K = 10
39
- MAX_SEQ_LENGTH = 256
40
- RETRIEVER = all-MiniLM-L6-v2
41
  RETRIEVER_FORMAT = sentence_transformers
42
  RETRIEVER_EMB_LAYER = -1
43
- EMBEDDING_DIM = 384
44
  THRESHOLD = 0.55
45
  SPLIT_BY = word
46
  SPLIT_LENGTH = 120
 
36
 
37
  [coherence]
38
  RETRIEVER_TOP_K = 10
39
+ MAX_SEQ_LENGTH = 512
40
+ RETRIEVER = msmarco-distilbert-dot-v5
41
  RETRIEVER_FORMAT = sentence_transformers
42
  RETRIEVER_EMB_LAYER = -1
43
+ EMBEDDING_DIM = 768
44
  THRESHOLD = 0.55
45
  SPLIT_BY = word
46
  SPLIT_LENGTH = 120
utils/ndc_explorer.py CHANGED
@@ -3,38 +3,67 @@ import urllib.request
3
  import json
4
 
5
  link = "https://klimalog.die-gdi.de/ndc/open-data/dataset.json"
6
- def get_document(countryCode: str):
7
- with urllib.request.urlopen(link) as urlfile:
8
- data = json.loads(urlfile.read())
9
- categoriesData = {}
10
- categoriesData['categories']= data['categories']
11
- categoriesData['subcategories']= data['subcategories']
12
- keys_sub = categoriesData['subcategories'].keys()
13
- documentType= 'NDCs'
14
- if documentType in data.keys():
15
- if countryCode in data[documentType].keys():
16
- get_dict = {}
17
- for key, value in data[documentType][countryCode].items():
18
- if key not in ['country_name','region_id', 'region_name']:
19
- get_dict[key] = value['classification']
20
- else:
21
- get_dict[key] = value
22
- else:
23
- return None
 
 
 
 
24
  else:
25
- return None
 
 
 
 
26
 
27
- country = {}
28
- for key in categoriesData['categories']:
29
- country[key]= {}
30
- for key,value in categoriesData['subcategories'].items():
31
- country[value['category']][key] = get_dict[key]
32
-
33
- return country
34
 
35
- # country_ndc = get_document('NDCs', countryList[option])
36
 
37
- def countrySpecificCCA(cca_sent, threshold, countryCode):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
38
  temp = {}
39
  doc = get_document(countryCode)
40
  for key,value in cca_sent.items():
@@ -45,6 +74,12 @@ def countrySpecificCCA(cca_sent, threshold, countryCode):
45
 
46
 
47
  def countrySpecificCCM(ccm_sent, threshold, countryCode):
 
 
 
 
 
 
48
  temp = {}
49
  doc = get_document(countryCode)
50
  for key,value in ccm_sent.items():
 
3
  import json
4
 
5
  link = "https://klimalog.die-gdi.de/ndc/open-data/dataset.json"
6
+ def get_document(country_code: str):
7
+ """
8
+ read the country NDC data from
9
+ https://klimalog.die-gdi.de/ndc/open-data/dataset.json
10
+ using the country code.
11
+
12
+ Params
13
+ -------
14
+ country_code:"""
15
+ with urllib.request.urlopen(link) as urlfile:
16
+ data = json.loads(urlfile.read())
17
+ categoriesData = {}
18
+ categoriesData['categories']= data['categories']
19
+ categoriesData['subcategories']= data['subcategories']
20
+ keys_sub = categoriesData['subcategories'].keys()
21
+ documentType= 'NDCs'
22
+ if documentType in data.keys():
23
+ if country_code in data[documentType].keys():
24
+ get_dict = {}
25
+ for key, value in data[documentType][country_code].items():
26
+ if key not in ['country_name','region_id', 'region_name']:
27
+ get_dict[key] = value['classification']
28
  else:
29
+ get_dict[key] = value
30
+ else:
31
+ return None
32
+ else:
33
+ return None
34
 
35
+ country = {}
36
+ for key in categoriesData['categories']:
37
+ country[key]= {}
38
+ for key,value in categoriesData['subcategories'].items():
39
+ country[value['category']][key] = get_dict[key]
40
+
41
+ return country
42
 
 
43
 
44
+ def countrySpecificCCA(cca_sent:dict, threshold:int, countryCode:str):
45
+ """
46
+ based on the countrycode, reads the country data from
47
+ https://klimalog.die-gdi.de/ndc/open-data/dataset.json
48
+ using get_documents from utils.ndc_explorer.py
49
+ then based on thereshold value filters the Climate Change Adaptation
50
+ targets assigned by NDC explorer team to that country. Using the sentences
51
+ create by Data services team of GIZ for each target level, tries to find the
52
+ relevant passages from the document by doing the semantic search.
53
+
54
+ Params
55
+ -------
56
+ cca_sent: dictionary with key as 'target labels' and manufactured sentences
57
+ reflecting the target level. Please see the docStore/ndcs/cca.txt
58
+
59
+ threshold: NDC target have many categoriees ranging from [0-5], with 0
60
+ refelcting most relaxed attitude and 5 being most aggrisive towards Climate
61
+ change. We select the threshold value beyond which we need to focus on.
62
+
63
+ countryCode: standard country code to allow us to fetch the country specific
64
+ data.
65
+
66
+ """
67
  temp = {}
68
  doc = get_document(countryCode)
69
  for key,value in cca_sent.items():
 
74
 
75
 
76
  def countrySpecificCCM(ccm_sent, threshold, countryCode):
77
+ """
78
+ see the documentation of countrySpecificCCA. This is same instead of
79
+ this gets the data pertaining to Adaptation
80
+
81
+ """
82
+
83
  temp = {}
84
  doc = get_document(countryCode)
85
  for key,value in ccm_sent.items():