prashant
commited on
Commit
•
9dca2b8
1
Parent(s):
183851c
chnaging coherence model
Browse files- paramconfig.cfg +3 -3
- utils/ndc_explorer.py +63 -28
paramconfig.cfg
CHANGED
@@ -36,11 +36,11 @@ TOP_N = 20
|
|
36 |
|
37 |
[coherence]
|
38 |
RETRIEVER_TOP_K = 10
|
39 |
-
MAX_SEQ_LENGTH =
|
40 |
-
RETRIEVER =
|
41 |
RETRIEVER_FORMAT = sentence_transformers
|
42 |
RETRIEVER_EMB_LAYER = -1
|
43 |
-
EMBEDDING_DIM =
|
44 |
THRESHOLD = 0.55
|
45 |
SPLIT_BY = word
|
46 |
SPLIT_LENGTH = 120
|
|
|
36 |
|
37 |
[coherence]
|
38 |
RETRIEVER_TOP_K = 10
|
39 |
+
MAX_SEQ_LENGTH = 512
|
40 |
+
RETRIEVER = msmarco-distilbert-dot-v5
|
41 |
RETRIEVER_FORMAT = sentence_transformers
|
42 |
RETRIEVER_EMB_LAYER = -1
|
43 |
+
EMBEDDING_DIM = 768
|
44 |
THRESHOLD = 0.55
|
45 |
SPLIT_BY = word
|
46 |
SPLIT_LENGTH = 120
|
utils/ndc_explorer.py
CHANGED
@@ -3,38 +3,67 @@ import urllib.request
|
|
3 |
import json
|
4 |
|
5 |
link = "https://klimalog.die-gdi.de/ndc/open-data/dataset.json"
|
6 |
-
def get_document(
|
7 |
-
|
8 |
-
|
9 |
-
|
10 |
-
|
11 |
-
|
12 |
-
|
13 |
-
|
14 |
-
|
15 |
-
|
16 |
-
|
17 |
-
|
18 |
-
|
19 |
-
|
20 |
-
|
21 |
-
|
22 |
-
|
23 |
-
|
|
|
|
|
|
|
|
|
24 |
else:
|
25 |
-
|
|
|
|
|
|
|
|
|
26 |
|
27 |
-
|
28 |
-
|
29 |
-
|
30 |
-
|
31 |
-
|
32 |
-
|
33 |
-
|
34 |
|
35 |
-
# country_ndc = get_document('NDCs', countryList[option])
|
36 |
|
37 |
-
def countrySpecificCCA(cca_sent, threshold, countryCode):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
38 |
temp = {}
|
39 |
doc = get_document(countryCode)
|
40 |
for key,value in cca_sent.items():
|
@@ -45,6 +74,12 @@ def countrySpecificCCA(cca_sent, threshold, countryCode):
|
|
45 |
|
46 |
|
47 |
def countrySpecificCCM(ccm_sent, threshold, countryCode):
|
|
|
|
|
|
|
|
|
|
|
|
|
48 |
temp = {}
|
49 |
doc = get_document(countryCode)
|
50 |
for key,value in ccm_sent.items():
|
|
|
3 |
import json
|
4 |
|
5 |
link = "https://klimalog.die-gdi.de/ndc/open-data/dataset.json"
|
6 |
+
def get_document(country_code: str):
|
7 |
+
"""
|
8 |
+
read the country NDC data from
|
9 |
+
https://klimalog.die-gdi.de/ndc/open-data/dataset.json
|
10 |
+
using the country code.
|
11 |
+
|
12 |
+
Params
|
13 |
+
-------
|
14 |
+
country_code:"""
|
15 |
+
with urllib.request.urlopen(link) as urlfile:
|
16 |
+
data = json.loads(urlfile.read())
|
17 |
+
categoriesData = {}
|
18 |
+
categoriesData['categories']= data['categories']
|
19 |
+
categoriesData['subcategories']= data['subcategories']
|
20 |
+
keys_sub = categoriesData['subcategories'].keys()
|
21 |
+
documentType= 'NDCs'
|
22 |
+
if documentType in data.keys():
|
23 |
+
if country_code in data[documentType].keys():
|
24 |
+
get_dict = {}
|
25 |
+
for key, value in data[documentType][country_code].items():
|
26 |
+
if key not in ['country_name','region_id', 'region_name']:
|
27 |
+
get_dict[key] = value['classification']
|
28 |
else:
|
29 |
+
get_dict[key] = value
|
30 |
+
else:
|
31 |
+
return None
|
32 |
+
else:
|
33 |
+
return None
|
34 |
|
35 |
+
country = {}
|
36 |
+
for key in categoriesData['categories']:
|
37 |
+
country[key]= {}
|
38 |
+
for key,value in categoriesData['subcategories'].items():
|
39 |
+
country[value['category']][key] = get_dict[key]
|
40 |
+
|
41 |
+
return country
|
42 |
|
|
|
43 |
|
44 |
+
def countrySpecificCCA(cca_sent:dict, threshold:int, countryCode:str):
|
45 |
+
"""
|
46 |
+
based on the countrycode, reads the country data from
|
47 |
+
https://klimalog.die-gdi.de/ndc/open-data/dataset.json
|
48 |
+
using get_documents from utils.ndc_explorer.py
|
49 |
+
then based on thereshold value filters the Climate Change Adaptation
|
50 |
+
targets assigned by NDC explorer team to that country. Using the sentences
|
51 |
+
create by Data services team of GIZ for each target level, tries to find the
|
52 |
+
relevant passages from the document by doing the semantic search.
|
53 |
+
|
54 |
+
Params
|
55 |
+
-------
|
56 |
+
cca_sent: dictionary with key as 'target labels' and manufactured sentences
|
57 |
+
reflecting the target level. Please see the docStore/ndcs/cca.txt
|
58 |
+
|
59 |
+
threshold: NDC target have many categoriees ranging from [0-5], with 0
|
60 |
+
refelcting most relaxed attitude and 5 being most aggrisive towards Climate
|
61 |
+
change. We select the threshold value beyond which we need to focus on.
|
62 |
+
|
63 |
+
countryCode: standard country code to allow us to fetch the country specific
|
64 |
+
data.
|
65 |
+
|
66 |
+
"""
|
67 |
temp = {}
|
68 |
doc = get_document(countryCode)
|
69 |
for key,value in cca_sent.items():
|
|
|
74 |
|
75 |
|
76 |
def countrySpecificCCM(ccm_sent, threshold, countryCode):
|
77 |
+
"""
|
78 |
+
see the documentation of countrySpecificCCA. This is same instead of
|
79 |
+
this gets the data pertaining to Adaptation
|
80 |
+
|
81 |
+
"""
|
82 |
+
|
83 |
temp = {}
|
84 |
doc = get_document(countryCode)
|
85 |
for key,value in ccm_sent.items():
|