CarlosMalaga commited on
Commit
e8cd6b0
1 Parent(s): fd0f289

Upload 80 files

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. models/retriever/intervention/gpt+llama/db/document_index/config.yaml +5 -0
  2. models/retriever/intervention/gpt+llama/db/document_index/documents.jsonl +55 -0
  3. models/retriever/intervention/gpt+llama/db/document_index/embeddings.pt +3 -0
  4. models/retriever/intervention/gpt+llama/db/question_encoder/config.json +29 -0
  5. models/retriever/intervention/gpt+llama/db/question_encoder/hf.py +99 -0
  6. models/retriever/intervention/gpt+llama/db/question_encoder/model.safetensors +3 -0
  7. models/retriever/intervention/gpt+llama/db/question_encoder/special_tokens_map.json +37 -0
  8. models/retriever/intervention/gpt+llama/db/question_encoder/tokenizer.json +0 -0
  9. models/retriever/intervention/gpt+llama/db/question_encoder/tokenizer_config.json +59 -0
  10. models/retriever/intervention/gpt+llama/db/question_encoder/vocab.txt +0 -0
  11. models/retriever/intervention/gpt+llama/taxonomy/document_index/config.yaml +5 -0
  12. models/retriever/intervention/gpt+llama/taxonomy/document_index/documents.jsonl +0 -0
  13. models/retriever/intervention/gpt+llama/taxonomy/document_index/embeddings.pt +3 -0
  14. models/retriever/intervention/gpt+llama/taxonomy/question_encoder/config.json +29 -0
  15. models/retriever/intervention/gpt+llama/taxonomy/question_encoder/hf.py +99 -0
  16. models/retriever/intervention/gpt+llama/taxonomy/question_encoder/model.safetensors +3 -0
  17. models/retriever/intervention/gpt+llama/taxonomy/question_encoder/special_tokens_map.json +37 -0
  18. models/retriever/intervention/gpt+llama/taxonomy/question_encoder/tokenizer.json +0 -0
  19. models/retriever/intervention/gpt+llama/taxonomy/question_encoder/tokenizer_config.json +59 -0
  20. models/retriever/intervention/gpt+llama/taxonomy/question_encoder/vocab.txt +0 -0
  21. models/retriever/intervention/gpt/db/document_index/config.yaml +5 -0
  22. models/retriever/intervention/gpt/db/document_index/documents.jsonl +55 -0
  23. models/retriever/intervention/gpt/db/document_index/embeddings.pt +3 -0
  24. models/retriever/intervention/gpt/db/question_encoder/config.json +29 -0
  25. models/retriever/intervention/gpt/db/question_encoder/hf.py +99 -0
  26. models/retriever/intervention/gpt/db/question_encoder/model.safetensors +3 -0
  27. models/retriever/intervention/gpt/db/question_encoder/special_tokens_map.json +37 -0
  28. models/retriever/intervention/gpt/db/question_encoder/tokenizer.json +0 -0
  29. models/retriever/intervention/gpt/db/question_encoder/tokenizer_config.json +59 -0
  30. models/retriever/intervention/gpt/db/question_encoder/vocab.txt +0 -0
  31. models/retriever/intervention/gpt/taxonomy/document_index/config.yaml +5 -0
  32. models/retriever/intervention/gpt/taxonomy/document_index/documents.jsonl +0 -0
  33. models/retriever/intervention/gpt/taxonomy/document_index/embeddings.pt +3 -0
  34. models/retriever/intervention/gpt/taxonomy/question_encoder/config.json +29 -0
  35. models/retriever/intervention/gpt/taxonomy/question_encoder/hf.py +99 -0
  36. models/retriever/intervention/gpt/taxonomy/question_encoder/model.safetensors +3 -0
  37. models/retriever/intervention/gpt/taxonomy/question_encoder/special_tokens_map.json +37 -0
  38. models/retriever/intervention/gpt/taxonomy/question_encoder/tokenizer.json +0 -0
  39. models/retriever/intervention/gpt/taxonomy/question_encoder/tokenizer_config.json +59 -0
  40. models/retriever/intervention/gpt/taxonomy/question_encoder/vocab.txt +0 -0
  41. models/retriever/outcome/gpt+llama/db/document_index/config.yaml +5 -0
  42. models/retriever/outcome/gpt+llama/db/document_index/documents.jsonl +113 -0
  43. models/retriever/outcome/gpt+llama/db/document_index/embeddings.pt +3 -0
  44. models/retriever/outcome/gpt+llama/db/question_encoder/config.json +29 -0
  45. models/retriever/outcome/gpt+llama/db/question_encoder/hf.py +99 -0
  46. models/retriever/outcome/gpt+llama/db/question_encoder/model.safetensors +3 -0
  47. models/retriever/outcome/gpt+llama/db/question_encoder/special_tokens_map.json +37 -0
  48. models/retriever/outcome/gpt+llama/db/question_encoder/tokenizer.json +0 -0
  49. models/retriever/outcome/gpt+llama/db/question_encoder/tokenizer_config.json +59 -0
  50. models/retriever/outcome/gpt+llama/db/question_encoder/vocab.txt +0 -0
models/retriever/intervention/gpt+llama/db/document_index/config.yaml ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ _target_: goldenretriever.indexers.inmemory.InMemoryDocumentIndex
2
+ metadata_fields:
3
+ - definition
4
+ separator: ' <def> '
5
+ name_or_path: null
models/retriever/intervention/gpt+llama/db/document_index/documents.jsonl ADDED
@@ -0,0 +1,55 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {"text": "flocculation/disinfection", "id": 0, "metadata": {"definition": "intervention. these interventions provide, or promote the uptake of, flocculation/disinfection for use at home. these combined coagulant-chlorine disinfection systems are commercial kits that combine dry coagulant/flocculent and chlorine as tablets or sachets.", "level": 4, "type": "intervention"}}
2
+ {"text": "entrepreneurship training", "id": 1, "metadata": {"definition": "intervention. provision of entrepreneurship training focused on how to start up a business from scratch, which can include for example developing a business plan, day-to-day management of small enterprise, including bookkeeping, financial planning, etc.", "level": 4, "type": "intervention"}}
3
+ {"text": "general health counselling", "id": 2, "metadata": {"definition": "intervention. group or individual counselling to promote best practices and transmit knowledge.", "level": 4, "type": "intervention"}}
4
+ {"text": "apprenticeship programmes", "id": 3, "metadata": {"definition": "intervention. work experience programmes, often formal, to train a worker to become skilled in a particular trade.", "level": 4, "type": "intervention"}}
5
+ {"text": "technical and vocational education and training (tvet)", "id": 4, "metadata": {"definition": "intervention. education and training which provides knowledge and skills for employment. tvet uses formal, non-formal, and informal learning.", "level": 4, "type": "intervention"}}
6
+ {"text": "capacity building for conflict transformation", "id": 5, "metadata": {"definition": "intervention. interventions that build community and key actors' capacity to participate in or establish opportunities or platforms for community and subnational conflict transformation processes. these may also be referred to as building skills for mediation, negotiation, conflict resolution, dispute resolution or conflict prevention. this may include interventions involving political and military leaders unofficial dialogue and problem-solving activities with different civil society actors; grassroots level and activities including joint meetings to advocacy for marginalized groups.", "level": 4, "type": "intervention"}}
7
+ {"text": "food subsidies", "id": 6, "metadata": {"definition": "intervention. food subsidies in the form of entitled reduced price.", "level": 4, "type": "intervention"}}
8
+ {"text": "citizen feedback mechanisms", "id": 7, "metadata": {"definition": "intervention. interventions facilitating meetings between service users and providers, setting up grievance redress mechanisms and scorecards allowing users to rate providers.", "level": 4, "type": "intervention"}}
9
+ {"text": "subsidy for latrine", "id": 8, "metadata": {"definition": "intervention. these are interventions where a subsidy has been provided on cost of the materials to construct a latrine.", "level": 4, "type": "intervention"}}
10
+ {"text": "secondary water treatments", "id": 9, "metadata": {"definition": "intervention. interventions that provide, or promote the uptake of, secondary forms of water treatment or supply.", "level": 4, "type": "intervention"}}
11
+ {"text": "maize subsidies", "id": 10, "metadata": {"definition": "intervention. programmes that provide price subsidies for maize", "level": 4, "type": "intervention"}}
12
+ {"text": "community-driven development & reconstruction (cdd & cdr)", "id": 11, "metadata": {"definition": "intervention. development initiative that provides control of the development process, resources and decision making authority directly to groups in the community. this includes both community-driven development (cdd) and community driven reconstruction (cdr).", "level": 4, "type": "intervention"}}
13
+ {"text": "behavioural sanitation promotion", "id": 12, "metadata": {"definition": "intervention. these are interventions that encourage the practice of proper sanitation practices at the household or community level through behavioural promotion, such as using social pressure, environmental cues, nudges, or triggers of disgust and shame.", "level": 4, "type": "intervention"}}
14
+ {"text": "agricultural marketing training", "id": 13, "metadata": {"definition": "intervention. training in how to bring products to market, including price-setting and negotiation", "level": 4, "type": "intervention"}}
15
+ {"text": "life skills and employment training for conflict affected population", "id": 14, "metadata": {"definition": "intervention. this category provides training programmes primarily for jobs and livelihoods for conflict affected populations, but can also include programmes such as music instruction for youth, which may have less obvious monetary reward but aims to provide skills and an alternative to violence. this category does not include the development of peace skills (such as conflict management).", "level": 4, "type": "intervention"}}
16
+ {"text": "representation of women & minorities", "id": 15, "metadata": {"definition": "intervention. introduction of quotas, reserved seats or similar within political institutions - such as parliament, state legislatures, and village councils - for women, ethnic minorities, and castes.", "level": 4, "type": "intervention"}}
17
+ {"text": "civil society capacity building", "id": 16, "metadata": {"definition": "intervention. this encompasses a fairly broad range of interventions that work with civil society organisations (csos) to develop their capacity as a force of change (i.e. capacity building of csos to advocate for and engage with citizens and the government). this can include the establishment of community interest groups, such as women and youth committees.", "level": 4, "type": "intervention"}}
18
+ {"text": "communication between health care providers", "id": 17, "metadata": {"definition": "intervention. systems or strategies for improving the communication between health care providers, especially between regular healthcare providers/family doctors and other providers and specialised care.", "level": 4, "type": "intervention"}}
19
+ {"text": "providing care to groups versus individual patients", "id": 18, "metadata": {"definition": "intervention. comparisons of providing care to groups versus individual patients, for example intensive group therapy, group vs individual antenatal care.", "level": 4, "type": "intervention"}}
20
+ {"text": "other life skills", "id": 19, "metadata": {"definition": "intervention. programmes that train and enhance other life skills.", "level": 4, "type": "intervention"}}
21
+ {"text": "ecosystem conservation", "id": 20, "metadata": {"definition": "intervention. preservation of ecosystems, including through awareness campaigns.", "level": 4, "type": "intervention"}}
22
+ {"text": "labour market services and intermediation", "id": 21, "metadata": {"definition": "intervention. provision and promotion of employment services to jobseekers, such as job-matching, support, and counselling and advice.", "level": 4, "type": "intervention"}}
23
+ {"text": "peer support groups for menstrual hygiene and sensitisation", "id": 22, "metadata": {"definition": "intervention. interventions that encourage the formation of peer support groups for menstrual hygiene and sensitisation.", "level": 4, "type": "intervention"}}
24
+ {"text": "rotating/accumulated savings and credit associations", "id": 23, "metadata": {"definition": "intervention. a group of individuals acting as an informal financial institution in the form of an alternative financial vehicle. a rosca happens via set contributions and withdrawals to and from a common fund. an ascra happens via set contributions with the common fund used for extending credit within or outside of the group. these include savings and internal lending communities (silcs) and self-help groups (shg).", "level": 4, "type": "intervention"}}
25
+ {"text": "countering violent extremism (cve)", "id": 24, "metadata": {"definition": "intervention. cve is a security-focused approach that intends to address the conditions and reduce the factors that most likely contribute to violent extremism or terrorism. interventions within this group may try to counteract radicalization by providing information and resources to the general public, as well as training to local leaders to engage with different stakeholders to reduce the recruitment and radicalization by violent extremists.", "level": 4, "type": "intervention"}}
26
+ {"text": "microenterprise grant", "id": 25, "metadata": {"definition": "intervention. provision of a small unconditional grant intended specifically at supporting a microenterprise or starting up a microenterprise from scratch. this program can be implemented on its own, but is usually combined with other components of microenterprise support such as a business training, access to credit etc.", "level": 4, "type": "intervention"}}
27
+ {"text": "skills for conflict management", "id": 26, "metadata": {"definition": "intervention. interventions that aim at preventing disputes of scaling up at the community or village-level. this can include interventions promoting dialogue and negotiation skills or training on dispute resolution mechanisms to avoid the use of violence.", "level": 4, "type": "intervention"}}
28
+ {"text": "access and participation", "id": 27, "metadata": {"definition": "intervention. provision of support and services to increase access to services and participation (for example in education, employment, public life). this can be done for example through quotas in universities, public services and political institutions (affirmative positive actions) or through educational campaign and targeted encouragement (positive action) or other social inclusion interventions. for communication or behavioural change interventions relating to gender issues, please use the gender equality code.", "level": 4, "type": "intervention"}}
29
+ {"text": "unconditional cash transfers (ucts)", "id": 28, "metadata": {"definition": "intervention. cash transfer to families or individuals without any condition from the beneficiaries. this can be a regular cash transfer or a one-time cash transfer.", "level": 4, "type": "intervention"}}
30
+ {"text": "total sanitation campaign", "id": 29, "metadata": {"definition": "intervention. this is a specific intervention designed by the indian government. it involves an education component to drive-up the demand for sanitation services, and then in some areas also a subsidy for those below the poverty line.", "level": 4, "type": "intervention"}}
31
+ {"text": "peer-to-peer learning", "id": 30, "metadata": {"definition": "intervention. schemes to allow students to learn together, and from each other, such as study groups.", "level": 4, "type": "intervention"}}
32
+ {"text": "community latrine and toilet provision", "id": 31, "metadata": {"definition": "intervention. these interventions provide new or improved hardware for latrines, or other means of excreta disposal, for the community. a shared facility provided for a defined group of residents or an entire settlement / community. it is normally located in or near the community area and used by almost community members.", "level": 4, "type": "intervention"}}
33
+ {"text": "peace messaging and media", "id": 32, "metadata": {"definition": "intervention. these interventions focus on the capacity building of media organisations and supporting them to provide peace messaging to their local community, among other content.", "level": 4, "type": "intervention"}}
34
+ {"text": "job placement schemes", "id": 33, "metadata": {"definition": "intervention. work placement programmes that seek to put job-seekers into positions within business or trade. these include internship or apprenticeship programmes.", "level": 4, "type": "intervention"}}
35
+ {"text": "behavioural hand hygiene promotion", "id": 34, "metadata": {"definition": "intervention. these are interventions that encourage the practice of proper handwashing at the household or community level through behavioural promotion, such as using social pressure, environmental cues, nudges, or triggers of disgust and shame.", "level": 4, "type": "intervention"}}
36
+ {"text": "peace education", "id": 35, "metadata": {"definition": "intervention. peace education interventions promote the knowledge, skills and attitudes that shape the social environment to both prevent conflict from occurring and help people to resolve it peacefully. these interventions can be run at many scales (i.e. local versus national) and often involve promoting a community dialogue. they usually cover a range of topics including non-violent conflict resolution techniques, human rights, democracy, disarmament, gender equality, tolerance and communication skills.", "level": 4, "type": "intervention"}}
37
+ {"text": "public legal education campaigns", "id": 36, "metadata": {"definition": "intervention. using different forms of media to share information and increase awareness about laws, rights and services to the general population.", "level": 4, "type": "intervention"}}
38
+ {"text": "employment incentives and wage subsidies", "id": 37, "metadata": {"definition": "intervention. provision of employment incentives and wage subsidies for private sector work with the aim to create jobs.", "level": 4, "type": "intervention"}}
39
+ {"text": "handwashing stations", "id": 38, "metadata": {"definition": "intervention. provision or improving of handwashing stations.", "level": 4, "type": "intervention"}}
40
+ {"text": "educational sanitation promotion", "id": 39, "metadata": {"definition": "intervention. these are interventions that encourage the practice of proper sanitation practices at the household or community level through educational promotion, such as sanitation management education, teaching best practices, and information on sanitation and waste management.", "level": 4, "type": "intervention"}}
41
+ {"text": "public release of health provider performance data", "id": 40, "metadata": {"definition": "intervention. informing the public about healthcare providers by the release of performance data in written or electronic form.", "level": 4, "type": "intervention"}}
42
+ {"text": "conditional cash transfers (ccts)", "id": 41, "metadata": {"definition": "intervention. cash transfer to families or individuals provided on a specific condition, such as school enrolment or child vaccination. the transfer is made when and only if the condition is met, which is verify by a more or less intense monitoring.", "level": 4, "type": "intervention"}}
43
+ {"text": "community-led total sanitation (clts)", "id": 42, "metadata": {"definition": "intervention. clts is a participatory mechanism that mobilises communities by facilitating their own appraisal of, and action plan to address, open defecation. it often includes walks of shame, and the use of social pressures to elicit behaviour change.", "level": 4, "type": "intervention"}}
44
+ {"text": "financial literacy", "id": 43, "metadata": {"definition": "intervention. skills development in financial topics and products.", "level": 4, "type": "intervention"}}
45
+ {"text": "institutional capacity building of road agency", "id": 44, "metadata": {"definition": "intervention. these interventions work with the government department that oversees roads to strengthen their capacity. this can be in terms or planning, technical skills in construction, and knowledge on management.", "level": 4, "type": "intervention"}}
46
+ {"text": "sexual and gender-based violence (sgbv) - prevention", "id": 45, "metadata": {"definition": "intervention. this group of interventions aim to prevent sexual and gender-based violence (sgbv) through a combination of methods such as media campaigns, behaviour-change communication, and education. this also includes the establishment of legal protections, such as new laws to criminalise sgbv, training for the police and courts on how to investigate and handle sgbv.", "level": 4, "type": "intervention"}}
47
+ {"text": "general maintenance of latrines", "id": 46, "metadata": {"definition": "intervention. maintaining cleanliness and ensuring good level of hygiene for facilities", "level": 4, "type": "intervention"}}
48
+ {"text": "rice subsidies", "id": 47, "metadata": {"definition": "intervention. programmes that provide price subsidies for rice", "level": 4, "type": "intervention"}}
49
+ {"text": "health facility latrine and toilet provision", "id": 48, "metadata": {"definition": "intervention. these interventions provide new or improved hardware for latrines, or other means of excreta disposal, at a health facility.", "level": 4, "type": "intervention"}}
50
+ {"text": "intergroup dialogues", "id": 49, "metadata": {"definition": "intervention. these interventions aim to increase dialogue and social interaction between different groups, including different ethnic groups, displaced and host community groups, and people of different faiths. they are not part of formal peace processes, but rather processes that use engagement with key community leaders to bring different groups together. they may include purely dialogue-focused interventions or bring groups together through activities, such as arts or sports.", "level": 4, "type": "intervention"}}
51
+ {"text": "asset transfers", "id": 50, "metadata": {"definition": "intervention. provision of productive assets as in-kind support (non-cash goods or services)", "level": 4, "type": "intervention"}}
52
+ {"text": "simplified service delivery", "id": 51, "metadata": {"definition": "intervention. these are 'one stop shops' where governments provide access to a number of different administrative services, such as identity cards, marriage registration, licenses, etc. they enable better access to services by decreasing the time required to access a variety of basic services. also called 'service centres' or 'one stop centres'.", "level": 4, "type": "intervention"}}
53
+ {"text": "community observers", "id": 52, "metadata": {"definition": "intervention. members of the community charged with observing legal processes and conditions.", "level": 4, "type": "intervention"}}
54
+ {"text": "transparency and accountability initiatives", "id": 53, "metadata": {"definition": "intervention. promotion of transparency and accountability through citizens' participation and monitoring initiatives.", "level": 4, "type": "intervention"}}
55
+ {"text": "care environment", "id": 54, "metadata": {"definition": "intervention. changes to the physical or sensory healthcare environment, by adding or altering equipment or layout, providing music, art.", "level": 4, "type": "intervention"}}
models/retriever/intervention/gpt+llama/db/document_index/embeddings.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e9159217c0d31c68b01512a0849c6fb2a668a989c8138ea6950001035e3afa3a
3
+ size 85675
models/retriever/intervention/gpt+llama/db/question_encoder/config.json ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "/home/carlos/amr-parsing-master/sentence-similarity/retriever/wandb/wandb/retriever-small-intervention-gpt-llama/files/retriever/question_encoder",
3
+ "architectures": [
4
+ "GoldenRetrieverModel"
5
+ ],
6
+ "attention_probs_dropout_prob": 0.1,
7
+ "auto_map": {
8
+ "AutoModel": "hf.GoldenRetrieverModel"
9
+ },
10
+ "classifier_dropout": null,
11
+ "hidden_act": "gelu",
12
+ "hidden_dropout_prob": 0.1,
13
+ "hidden_size": 384,
14
+ "initializer_range": 0.02,
15
+ "intermediate_size": 1536,
16
+ "layer_norm_eps": 1e-12,
17
+ "max_position_embeddings": 512,
18
+ "model_type": "bert",
19
+ "num_attention_heads": 12,
20
+ "num_hidden_layers": 12,
21
+ "pad_token_id": 0,
22
+ "position_embedding_type": "absolute",
23
+ "projection_dim": null,
24
+ "torch_dtype": "float32",
25
+ "transformers_version": "4.37.2",
26
+ "type_vocab_size": 2,
27
+ "use_cache": true,
28
+ "vocab_size": 30522
29
+ }
models/retriever/intervention/gpt+llama/db/question_encoder/hf.py ADDED
@@ -0,0 +1,99 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from typing import Tuple, Union
2
+
3
+ import torch
4
+ from transformers import PretrainedConfig
5
+ from transformers.modeling_outputs import BaseModelOutputWithPoolingAndCrossAttentions
6
+ from transformers.models.bert.modeling_bert import BertModel
7
+
8
+
9
+ class GoldenRetrieverConfig(PretrainedConfig):
10
+ model_type = "bert"
11
+
12
+ def __init__(
13
+ self,
14
+ vocab_size=30522,
15
+ hidden_size=768,
16
+ num_hidden_layers=12,
17
+ num_attention_heads=12,
18
+ intermediate_size=3072,
19
+ hidden_act="gelu",
20
+ hidden_dropout_prob=0.1,
21
+ attention_probs_dropout_prob=0.1,
22
+ max_position_embeddings=512,
23
+ type_vocab_size=2,
24
+ initializer_range=0.02,
25
+ layer_norm_eps=1e-12,
26
+ pad_token_id=0,
27
+ position_embedding_type="absolute",
28
+ use_cache=True,
29
+ classifier_dropout=None,
30
+ projection_dim=None,
31
+ **kwargs,
32
+ ):
33
+ super().__init__(pad_token_id=pad_token_id, **kwargs)
34
+
35
+ self.vocab_size = vocab_size
36
+ self.hidden_size = hidden_size
37
+ self.num_hidden_layers = num_hidden_layers
38
+ self.num_attention_heads = num_attention_heads
39
+ self.hidden_act = hidden_act
40
+ self.intermediate_size = intermediate_size
41
+ self.hidden_dropout_prob = hidden_dropout_prob
42
+ self.attention_probs_dropout_prob = attention_probs_dropout_prob
43
+ self.max_position_embeddings = max_position_embeddings
44
+ self.type_vocab_size = type_vocab_size
45
+ self.initializer_range = initializer_range
46
+ self.layer_norm_eps = layer_norm_eps
47
+ self.position_embedding_type = position_embedding_type
48
+ self.use_cache = use_cache
49
+ self.classifier_dropout = classifier_dropout
50
+ self.projection_dim = projection_dim
51
+
52
+
53
+ class GoldenRetrieverModel(BertModel):
54
+ config_class = GoldenRetrieverConfig
55
+
56
+ def __init__(self, config, *args, **kwargs):
57
+ super().__init__(config)
58
+ self.layer_norm_layer = torch.nn.LayerNorm(
59
+ config.hidden_size, eps=config.layer_norm_eps
60
+ )
61
+ self.projection: torch.nn.Module | None = None
62
+ if config.projection_dim is not None:
63
+ self.projection = torch.nn.Sequential(
64
+ torch.nn.Linear(config.hidden_size, config.projection_dim),
65
+ torch.nn.LayerNorm(config.projection_dim),
66
+ )
67
+
68
+ def forward(
69
+ self, **kwargs
70
+ ) -> Union[Tuple[torch.Tensor], BaseModelOutputWithPoolingAndCrossAttentions]:
71
+ attention_mask = kwargs.get("attention_mask", None)
72
+ model_outputs = super().forward(**kwargs)
73
+ if attention_mask is None:
74
+ pooler_output = model_outputs.pooler_output
75
+ else:
76
+ token_embeddings = model_outputs.last_hidden_state
77
+ input_mask_expanded = (
78
+ attention_mask.unsqueeze(-1).expand(token_embeddings.size()).float()
79
+ )
80
+ pooler_output = torch.sum(
81
+ token_embeddings * input_mask_expanded, 1
82
+ ) / torch.clamp(input_mask_expanded.sum(1), min=1e-9)
83
+
84
+ pooler_output = self.layer_norm_layer(pooler_output)
85
+
86
+ if self.projection is not None:
87
+ pooler_output = self.projection(pooler_output)
88
+
89
+ if not kwargs.get("return_dict", True):
90
+ return (model_outputs[0], pooler_output) + model_outputs[2:]
91
+
92
+ return BaseModelOutputWithPoolingAndCrossAttentions(
93
+ last_hidden_state=model_outputs.last_hidden_state,
94
+ pooler_output=pooler_output,
95
+ past_key_values=model_outputs.past_key_values,
96
+ hidden_states=model_outputs.hidden_states,
97
+ attentions=model_outputs.attentions,
98
+ cross_attentions=model_outputs.cross_attentions,
99
+ )
models/retriever/intervention/gpt+llama/db/question_encoder/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4c04e66982d149e647c69e5d5ab522ed52e9ac75d36fd3f5a501639489cd6b00
3
+ size 133465384
models/retriever/intervention/gpt+llama/db/question_encoder/special_tokens_map.json ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cls_token": {
3
+ "content": "[CLS]",
4
+ "lstrip": false,
5
+ "normalized": false,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "mask_token": {
10
+ "content": "[MASK]",
11
+ "lstrip": false,
12
+ "normalized": false,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "pad_token": {
17
+ "content": "[PAD]",
18
+ "lstrip": false,
19
+ "normalized": false,
20
+ "rstrip": false,
21
+ "single_word": false
22
+ },
23
+ "sep_token": {
24
+ "content": "[SEP]",
25
+ "lstrip": false,
26
+ "normalized": false,
27
+ "rstrip": false,
28
+ "single_word": false
29
+ },
30
+ "unk_token": {
31
+ "content": "[UNK]",
32
+ "lstrip": false,
33
+ "normalized": false,
34
+ "rstrip": false,
35
+ "single_word": false
36
+ }
37
+ }
models/retriever/intervention/gpt+llama/db/question_encoder/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
models/retriever/intervention/gpt+llama/db/question_encoder/tokenizer_config.json ADDED
@@ -0,0 +1,59 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "0": {
4
+ "content": "[PAD]",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ },
11
+ "100": {
12
+ "content": "[UNK]",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "101": {
20
+ "content": "[CLS]",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ },
27
+ "102": {
28
+ "content": "[SEP]",
29
+ "lstrip": false,
30
+ "normalized": false,
31
+ "rstrip": false,
32
+ "single_word": false,
33
+ "special": true
34
+ },
35
+ "103": {
36
+ "content": "[MASK]",
37
+ "lstrip": false,
38
+ "normalized": false,
39
+ "rstrip": false,
40
+ "single_word": false,
41
+ "special": true
42
+ }
43
+ },
44
+ "clean_up_tokenization_spaces": true,
45
+ "cls_token": "[CLS]",
46
+ "do_lower_case": true,
47
+ "mask_token": "[MASK]",
48
+ "max_length": 64,
49
+ "model_max_length": 512,
50
+ "pad_token": "[PAD]",
51
+ "sep_token": "[SEP]",
52
+ "stride": 0,
53
+ "strip_accents": null,
54
+ "tokenize_chinese_chars": true,
55
+ "tokenizer_class": "BertTokenizer",
56
+ "truncation_side": "right",
57
+ "truncation_strategy": "longest_first",
58
+ "unk_token": "[UNK]"
59
+ }
models/retriever/intervention/gpt+llama/db/question_encoder/vocab.txt ADDED
The diff for this file is too large to render. See raw diff
 
models/retriever/intervention/gpt+llama/taxonomy/document_index/config.yaml ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ _target_: goldenretriever.indexers.inmemory.InMemoryDocumentIndex
2
+ metadata_fields:
3
+ - definition
4
+ separator: ' <def> '
5
+ name_or_path: null
models/retriever/intervention/gpt+llama/taxonomy/document_index/documents.jsonl ADDED
The diff for this file is too large to render. See raw diff
 
models/retriever/intervention/gpt+llama/taxonomy/document_index/embeddings.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e8c5f32aa1f5c81633cd484458b7bdb5b913b5baee05b42417bc1efc3f9fa9e7
3
+ size 1855147
models/retriever/intervention/gpt+llama/taxonomy/question_encoder/config.json ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "/home/carlos/amr-parsing-master/sentence-similarity/retriever/wandb/wandb/retriever-small-intervention-gpt-llama/files/retriever/question_encoder",
3
+ "architectures": [
4
+ "GoldenRetrieverModel"
5
+ ],
6
+ "attention_probs_dropout_prob": 0.1,
7
+ "auto_map": {
8
+ "AutoModel": "hf.GoldenRetrieverModel"
9
+ },
10
+ "classifier_dropout": null,
11
+ "hidden_act": "gelu",
12
+ "hidden_dropout_prob": 0.1,
13
+ "hidden_size": 384,
14
+ "initializer_range": 0.02,
15
+ "intermediate_size": 1536,
16
+ "layer_norm_eps": 1e-12,
17
+ "max_position_embeddings": 512,
18
+ "model_type": "bert",
19
+ "num_attention_heads": 12,
20
+ "num_hidden_layers": 12,
21
+ "pad_token_id": 0,
22
+ "position_embedding_type": "absolute",
23
+ "projection_dim": null,
24
+ "torch_dtype": "float32",
25
+ "transformers_version": "4.37.2",
26
+ "type_vocab_size": 2,
27
+ "use_cache": true,
28
+ "vocab_size": 30522
29
+ }
models/retriever/intervention/gpt+llama/taxonomy/question_encoder/hf.py ADDED
@@ -0,0 +1,99 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from typing import Tuple, Union
2
+
3
+ import torch
4
+ from transformers import PretrainedConfig
5
+ from transformers.modeling_outputs import BaseModelOutputWithPoolingAndCrossAttentions
6
+ from transformers.models.bert.modeling_bert import BertModel
7
+
8
+
9
+ class GoldenRetrieverConfig(PretrainedConfig):
10
+ model_type = "bert"
11
+
12
+ def __init__(
13
+ self,
14
+ vocab_size=30522,
15
+ hidden_size=768,
16
+ num_hidden_layers=12,
17
+ num_attention_heads=12,
18
+ intermediate_size=3072,
19
+ hidden_act="gelu",
20
+ hidden_dropout_prob=0.1,
21
+ attention_probs_dropout_prob=0.1,
22
+ max_position_embeddings=512,
23
+ type_vocab_size=2,
24
+ initializer_range=0.02,
25
+ layer_norm_eps=1e-12,
26
+ pad_token_id=0,
27
+ position_embedding_type="absolute",
28
+ use_cache=True,
29
+ classifier_dropout=None,
30
+ projection_dim=None,
31
+ **kwargs,
32
+ ):
33
+ super().__init__(pad_token_id=pad_token_id, **kwargs)
34
+
35
+ self.vocab_size = vocab_size
36
+ self.hidden_size = hidden_size
37
+ self.num_hidden_layers = num_hidden_layers
38
+ self.num_attention_heads = num_attention_heads
39
+ self.hidden_act = hidden_act
40
+ self.intermediate_size = intermediate_size
41
+ self.hidden_dropout_prob = hidden_dropout_prob
42
+ self.attention_probs_dropout_prob = attention_probs_dropout_prob
43
+ self.max_position_embeddings = max_position_embeddings
44
+ self.type_vocab_size = type_vocab_size
45
+ self.initializer_range = initializer_range
46
+ self.layer_norm_eps = layer_norm_eps
47
+ self.position_embedding_type = position_embedding_type
48
+ self.use_cache = use_cache
49
+ self.classifier_dropout = classifier_dropout
50
+ self.projection_dim = projection_dim
51
+
52
+
53
+ class GoldenRetrieverModel(BertModel):
54
+ config_class = GoldenRetrieverConfig
55
+
56
+ def __init__(self, config, *args, **kwargs):
57
+ super().__init__(config)
58
+ self.layer_norm_layer = torch.nn.LayerNorm(
59
+ config.hidden_size, eps=config.layer_norm_eps
60
+ )
61
+ self.projection: torch.nn.Module | None = None
62
+ if config.projection_dim is not None:
63
+ self.projection = torch.nn.Sequential(
64
+ torch.nn.Linear(config.hidden_size, config.projection_dim),
65
+ torch.nn.LayerNorm(config.projection_dim),
66
+ )
67
+
68
+ def forward(
69
+ self, **kwargs
70
+ ) -> Union[Tuple[torch.Tensor], BaseModelOutputWithPoolingAndCrossAttentions]:
71
+ attention_mask = kwargs.get("attention_mask", None)
72
+ model_outputs = super().forward(**kwargs)
73
+ if attention_mask is None:
74
+ pooler_output = model_outputs.pooler_output
75
+ else:
76
+ token_embeddings = model_outputs.last_hidden_state
77
+ input_mask_expanded = (
78
+ attention_mask.unsqueeze(-1).expand(token_embeddings.size()).float()
79
+ )
80
+ pooler_output = torch.sum(
81
+ token_embeddings * input_mask_expanded, 1
82
+ ) / torch.clamp(input_mask_expanded.sum(1), min=1e-9)
83
+
84
+ pooler_output = self.layer_norm_layer(pooler_output)
85
+
86
+ if self.projection is not None:
87
+ pooler_output = self.projection(pooler_output)
88
+
89
+ if not kwargs.get("return_dict", True):
90
+ return (model_outputs[0], pooler_output) + model_outputs[2:]
91
+
92
+ return BaseModelOutputWithPoolingAndCrossAttentions(
93
+ last_hidden_state=model_outputs.last_hidden_state,
94
+ pooler_output=pooler_output,
95
+ past_key_values=model_outputs.past_key_values,
96
+ hidden_states=model_outputs.hidden_states,
97
+ attentions=model_outputs.attentions,
98
+ cross_attentions=model_outputs.cross_attentions,
99
+ )
models/retriever/intervention/gpt+llama/taxonomy/question_encoder/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4c04e66982d149e647c69e5d5ab522ed52e9ac75d36fd3f5a501639489cd6b00
3
+ size 133465384
models/retriever/intervention/gpt+llama/taxonomy/question_encoder/special_tokens_map.json ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cls_token": {
3
+ "content": "[CLS]",
4
+ "lstrip": false,
5
+ "normalized": false,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "mask_token": {
10
+ "content": "[MASK]",
11
+ "lstrip": false,
12
+ "normalized": false,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "pad_token": {
17
+ "content": "[PAD]",
18
+ "lstrip": false,
19
+ "normalized": false,
20
+ "rstrip": false,
21
+ "single_word": false
22
+ },
23
+ "sep_token": {
24
+ "content": "[SEP]",
25
+ "lstrip": false,
26
+ "normalized": false,
27
+ "rstrip": false,
28
+ "single_word": false
29
+ },
30
+ "unk_token": {
31
+ "content": "[UNK]",
32
+ "lstrip": false,
33
+ "normalized": false,
34
+ "rstrip": false,
35
+ "single_word": false
36
+ }
37
+ }
models/retriever/intervention/gpt+llama/taxonomy/question_encoder/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
models/retriever/intervention/gpt+llama/taxonomy/question_encoder/tokenizer_config.json ADDED
@@ -0,0 +1,59 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "0": {
4
+ "content": "[PAD]",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ },
11
+ "100": {
12
+ "content": "[UNK]",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "101": {
20
+ "content": "[CLS]",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ },
27
+ "102": {
28
+ "content": "[SEP]",
29
+ "lstrip": false,
30
+ "normalized": false,
31
+ "rstrip": false,
32
+ "single_word": false,
33
+ "special": true
34
+ },
35
+ "103": {
36
+ "content": "[MASK]",
37
+ "lstrip": false,
38
+ "normalized": false,
39
+ "rstrip": false,
40
+ "single_word": false,
41
+ "special": true
42
+ }
43
+ },
44
+ "clean_up_tokenization_spaces": true,
45
+ "cls_token": "[CLS]",
46
+ "do_lower_case": true,
47
+ "mask_token": "[MASK]",
48
+ "max_length": 64,
49
+ "model_max_length": 512,
50
+ "pad_token": "[PAD]",
51
+ "sep_token": "[SEP]",
52
+ "stride": 0,
53
+ "strip_accents": null,
54
+ "tokenize_chinese_chars": true,
55
+ "tokenizer_class": "BertTokenizer",
56
+ "truncation_side": "right",
57
+ "truncation_strategy": "longest_first",
58
+ "unk_token": "[UNK]"
59
+ }
models/retriever/intervention/gpt+llama/taxonomy/question_encoder/vocab.txt ADDED
The diff for this file is too large to render. See raw diff
 
models/retriever/intervention/gpt/db/document_index/config.yaml ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ _target_: goldenretriever.indexers.inmemory.InMemoryDocumentIndex
2
+ metadata_fields:
3
+ - definition
4
+ separator: ' <def> '
5
+ name_or_path: null
models/retriever/intervention/gpt/db/document_index/documents.jsonl ADDED
@@ -0,0 +1,55 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {"text": "flocculation/disinfection", "id": 0, "metadata": {"definition": "intervention. these interventions provide, or promote the uptake of, flocculation/disinfection for use at home. these combined coagulant-chlorine disinfection systems are commercial kits that combine dry coagulant/flocculent and chlorine as tablets or sachets.", "level": 4, "type": "intervention"}}
2
+ {"text": "entrepreneurship training", "id": 1, "metadata": {"definition": "intervention. provision of entrepreneurship training focused on how to start up a business from scratch, which can include for example developing a business plan, day-to-day management of small enterprise, including bookkeeping, financial planning, etc.", "level": 4, "type": "intervention"}}
3
+ {"text": "general health counselling", "id": 2, "metadata": {"definition": "intervention. group or individual counselling to promote best practices and transmit knowledge.", "level": 4, "type": "intervention"}}
4
+ {"text": "apprenticeship programmes", "id": 3, "metadata": {"definition": "intervention. work experience programmes, often formal, to train a worker to become skilled in a particular trade.", "level": 4, "type": "intervention"}}
5
+ {"text": "technical and vocational education and training (tvet)", "id": 4, "metadata": {"definition": "intervention. education and training which provides knowledge and skills for employment. tvet uses formal, non-formal, and informal learning.", "level": 4, "type": "intervention"}}
6
+ {"text": "capacity building for conflict transformation", "id": 5, "metadata": {"definition": "intervention. interventions that build community and key actors' capacity to participate in or establish opportunities or platforms for community and subnational conflict transformation processes. these may also be referred to as building skills for mediation, negotiation, conflict resolution, dispute resolution or conflict prevention. this may include interventions involving political and military leaders unofficial dialogue and problem-solving activities with different civil society actors; grassroots level and activities including joint meetings to advocacy for marginalized groups.", "level": 4, "type": "intervention"}}
7
+ {"text": "food subsidies", "id": 6, "metadata": {"definition": "intervention. food subsidies in the form of entitled reduced price.", "level": 4, "type": "intervention"}}
8
+ {"text": "citizen feedback mechanisms", "id": 7, "metadata": {"definition": "intervention. interventions facilitating meetings between service users and providers, setting up grievance redress mechanisms and scorecards allowing users to rate providers.", "level": 4, "type": "intervention"}}
9
+ {"text": "subsidy for latrine", "id": 8, "metadata": {"definition": "intervention. these are interventions where a subsidy has been provided on cost of the materials to construct a latrine.", "level": 4, "type": "intervention"}}
10
+ {"text": "secondary water treatments", "id": 9, "metadata": {"definition": "intervention. interventions that provide, or promote the uptake of, secondary forms of water treatment or supply.", "level": 4, "type": "intervention"}}
11
+ {"text": "maize subsidies", "id": 10, "metadata": {"definition": "intervention. programmes that provide price subsidies for maize", "level": 4, "type": "intervention"}}
12
+ {"text": "community-driven development & reconstruction (cdd & cdr)", "id": 11, "metadata": {"definition": "intervention. development initiative that provides control of the development process, resources and decision making authority directly to groups in the community. this includes both community-driven development (cdd) and community driven reconstruction (cdr).", "level": 4, "type": "intervention"}}
13
+ {"text": "behavioural sanitation promotion", "id": 12, "metadata": {"definition": "intervention. these are interventions that encourage the practice of proper sanitation practices at the household or community level through behavioural promotion, such as using social pressure, environmental cues, nudges, or triggers of disgust and shame.", "level": 4, "type": "intervention"}}
14
+ {"text": "agricultural marketing training", "id": 13, "metadata": {"definition": "intervention. training in how to bring products to market, including price-setting and negotiation", "level": 4, "type": "intervention"}}
15
+ {"text": "life skills and employment training for conflict affected population", "id": 14, "metadata": {"definition": "intervention. this category provides training programmes primarily for jobs and livelihoods for conflict affected populations, but can also include programmes such as music instruction for youth, which may have less obvious monetary reward but aims to provide skills and an alternative to violence. this category does not include the development of peace skills (such as conflict management).", "level": 4, "type": "intervention"}}
16
+ {"text": "representation of women & minorities", "id": 15, "metadata": {"definition": "intervention. introduction of quotas, reserved seats or similar within political institutions - such as parliament, state legislatures, and village councils - for women, ethnic minorities, and castes.", "level": 4, "type": "intervention"}}
17
+ {"text": "civil society capacity building", "id": 16, "metadata": {"definition": "intervention. this encompasses a fairly broad range of interventions that work with civil society organisations (csos) to develop their capacity as a force of change (i.e. capacity building of csos to advocate for and engage with citizens and the government). this can include the establishment of community interest groups, such as women and youth committees.", "level": 4, "type": "intervention"}}
18
+ {"text": "communication between health care providers", "id": 17, "metadata": {"definition": "intervention. systems or strategies for improving the communication between health care providers, especially between regular healthcare providers/family doctors and other providers and specialised care.", "level": 4, "type": "intervention"}}
19
+ {"text": "providing care to groups versus individual patients", "id": 18, "metadata": {"definition": "intervention. comparisons of providing care to groups versus individual patients, for example intensive group therapy, group vs individual antenatal care.", "level": 4, "type": "intervention"}}
20
+ {"text": "other life skills", "id": 19, "metadata": {"definition": "intervention. programmes that train and enhance other life skills.", "level": 4, "type": "intervention"}}
21
+ {"text": "ecosystem conservation", "id": 20, "metadata": {"definition": "intervention. preservation of ecosystems, including through awareness campaigns.", "level": 4, "type": "intervention"}}
22
+ {"text": "labour market services and intermediation", "id": 21, "metadata": {"definition": "intervention. provision and promotion of employment services to jobseekers, such as job-matching, support, and counselling and advice.", "level": 4, "type": "intervention"}}
23
+ {"text": "peer support groups for menstrual hygiene and sensitisation", "id": 22, "metadata": {"definition": "intervention. interventions that encourage the formation of peer support groups for menstrual hygiene and sensitisation.", "level": 4, "type": "intervention"}}
24
+ {"text": "rotating/accumulated savings and credit associations", "id": 23, "metadata": {"definition": "intervention. a group of individuals acting as an informal financial institution in the form of an alternative financial vehicle. a rosca happens via set contributions and withdrawals to and from a common fund. an ascra happens via set contributions with the common fund used for extending credit within or outside of the group. these include savings and internal lending communities (silcs) and self-help groups (shg).", "level": 4, "type": "intervention"}}
25
+ {"text": "countering violent extremism (cve)", "id": 24, "metadata": {"definition": "intervention. cve is a security-focused approach that intends to address the conditions and reduce the factors that most likely contribute to violent extremism or terrorism. interventions within this group may try to counteract radicalization by providing information and resources to the general public, as well as training to local leaders to engage with different stakeholders to reduce the recruitment and radicalization by violent extremists.", "level": 4, "type": "intervention"}}
26
+ {"text": "microenterprise grant", "id": 25, "metadata": {"definition": "intervention. provision of a small unconditional grant intended specifically at supporting a microenterprise or starting up a microenterprise from scratch. this program can be implemented on its own, but is usually combined with other components of microenterprise support such as a business training, access to credit etc.", "level": 4, "type": "intervention"}}
27
+ {"text": "skills for conflict management", "id": 26, "metadata": {"definition": "intervention. interventions that aim at preventing disputes of scaling up at the community or village-level. this can include interventions promoting dialogue and negotiation skills or training on dispute resolution mechanisms to avoid the use of violence.", "level": 4, "type": "intervention"}}
28
+ {"text": "access and participation", "id": 27, "metadata": {"definition": "intervention. provision of support and services to increase access to services and participation (for example in education, employment, public life). this can be done for example through quotas in universities, public services and political institutions (affirmative positive actions) or through educational campaign and targeted encouragement (positive action) or other social inclusion interventions. for communication or behavioural change interventions relating to gender issues, please use the gender equality code.", "level": 4, "type": "intervention"}}
29
+ {"text": "unconditional cash transfers (ucts)", "id": 28, "metadata": {"definition": "intervention. cash transfer to families or individuals without any condition from the beneficiaries. this can be a regular cash transfer or a one-time cash transfer.", "level": 4, "type": "intervention"}}
30
+ {"text": "total sanitation campaign", "id": 29, "metadata": {"definition": "intervention. this is a specific intervention designed by the indian government. it involves an education component to drive-up the demand for sanitation services, and then in some areas also a subsidy for those below the poverty line.", "level": 4, "type": "intervention"}}
31
+ {"text": "peer-to-peer learning", "id": 30, "metadata": {"definition": "intervention. schemes to allow students to learn together, and from each other, such as study groups.", "level": 4, "type": "intervention"}}
32
+ {"text": "community latrine and toilet provision", "id": 31, "metadata": {"definition": "intervention. these interventions provide new or improved hardware for latrines, or other means of excreta disposal, for the community. a shared facility provided for a defined group of residents or an entire settlement / community. it is normally located in or near the community area and used by almost community members.", "level": 4, "type": "intervention"}}
33
+ {"text": "peace messaging and media", "id": 32, "metadata": {"definition": "intervention. these interventions focus on the capacity building of media organisations and supporting them to provide peace messaging to their local community, among other content.", "level": 4, "type": "intervention"}}
34
+ {"text": "job placement schemes", "id": 33, "metadata": {"definition": "intervention. work placement programmes that seek to put job-seekers into positions within business or trade. these include internship or apprenticeship programmes.", "level": 4, "type": "intervention"}}
35
+ {"text": "behavioural hand hygiene promotion", "id": 34, "metadata": {"definition": "intervention. these are interventions that encourage the practice of proper handwashing at the household or community level through behavioural promotion, such as using social pressure, environmental cues, nudges, or triggers of disgust and shame.", "level": 4, "type": "intervention"}}
36
+ {"text": "peace education", "id": 35, "metadata": {"definition": "intervention. peace education interventions promote the knowledge, skills and attitudes that shape the social environment to both prevent conflict from occurring and help people to resolve it peacefully. these interventions can be run at many scales (i.e. local versus national) and often involve promoting a community dialogue. they usually cover a range of topics including non-violent conflict resolution techniques, human rights, democracy, disarmament, gender equality, tolerance and communication skills.", "level": 4, "type": "intervention"}}
37
+ {"text": "public legal education campaigns", "id": 36, "metadata": {"definition": "intervention. using different forms of media to share information and increase awareness about laws, rights and services to the general population.", "level": 4, "type": "intervention"}}
38
+ {"text": "employment incentives and wage subsidies", "id": 37, "metadata": {"definition": "intervention. provision of employment incentives and wage subsidies for private sector work with the aim to create jobs.", "level": 4, "type": "intervention"}}
39
+ {"text": "handwashing stations", "id": 38, "metadata": {"definition": "intervention. provision or improving of handwashing stations.", "level": 4, "type": "intervention"}}
40
+ {"text": "educational sanitation promotion", "id": 39, "metadata": {"definition": "intervention. these are interventions that encourage the practice of proper sanitation practices at the household or community level through educational promotion, such as sanitation management education, teaching best practices, and information on sanitation and waste management.", "level": 4, "type": "intervention"}}
41
+ {"text": "public release of health provider performance data", "id": 40, "metadata": {"definition": "intervention. informing the public about healthcare providers by the release of performance data in written or electronic form.", "level": 4, "type": "intervention"}}
42
+ {"text": "conditional cash transfers (ccts)", "id": 41, "metadata": {"definition": "intervention. cash transfer to families or individuals provided on a specific condition, such as school enrolment or child vaccination. the transfer is made when and only if the condition is met, which is verify by a more or less intense monitoring.", "level": 4, "type": "intervention"}}
43
+ {"text": "community-led total sanitation (clts)", "id": 42, "metadata": {"definition": "intervention. clts is a participatory mechanism that mobilises communities by facilitating their own appraisal of, and action plan to address, open defecation. it often includes walks of shame, and the use of social pressures to elicit behaviour change.", "level": 4, "type": "intervention"}}
44
+ {"text": "financial literacy", "id": 43, "metadata": {"definition": "intervention. skills development in financial topics and products.", "level": 4, "type": "intervention"}}
45
+ {"text": "institutional capacity building of road agency", "id": 44, "metadata": {"definition": "intervention. these interventions work with the government department that oversees roads to strengthen their capacity. this can be in terms or planning, technical skills in construction, and knowledge on management.", "level": 4, "type": "intervention"}}
46
+ {"text": "sexual and gender-based violence (sgbv) - prevention", "id": 45, "metadata": {"definition": "intervention. this group of interventions aim to prevent sexual and gender-based violence (sgbv) through a combination of methods such as media campaigns, behaviour-change communication, and education. this also includes the establishment of legal protections, such as new laws to criminalise sgbv, training for the police and courts on how to investigate and handle sgbv.", "level": 4, "type": "intervention"}}
47
+ {"text": "general maintenance of latrines", "id": 46, "metadata": {"definition": "intervention. maintaining cleanliness and ensuring good level of hygiene for facilities", "level": 4, "type": "intervention"}}
48
+ {"text": "rice subsidies", "id": 47, "metadata": {"definition": "intervention. programmes that provide price subsidies for rice", "level": 4, "type": "intervention"}}
49
+ {"text": "health facility latrine and toilet provision", "id": 48, "metadata": {"definition": "intervention. these interventions provide new or improved hardware for latrines, or other means of excreta disposal, at a health facility.", "level": 4, "type": "intervention"}}
50
+ {"text": "intergroup dialogues", "id": 49, "metadata": {"definition": "intervention. these interventions aim to increase dialogue and social interaction between different groups, including different ethnic groups, displaced and host community groups, and people of different faiths. they are not part of formal peace processes, but rather processes that use engagement with key community leaders to bring different groups together. they may include purely dialogue-focused interventions or bring groups together through activities, such as arts or sports.", "level": 4, "type": "intervention"}}
51
+ {"text": "asset transfers", "id": 50, "metadata": {"definition": "intervention. provision of productive assets as in-kind support (non-cash goods or services)", "level": 4, "type": "intervention"}}
52
+ {"text": "simplified service delivery", "id": 51, "metadata": {"definition": "intervention. these are 'one stop shops' where governments provide access to a number of different administrative services, such as identity cards, marriage registration, licenses, etc. they enable better access to services by decreasing the time required to access a variety of basic services. also called 'service centres' or 'one stop centres'.", "level": 4, "type": "intervention"}}
53
+ {"text": "community observers", "id": 52, "metadata": {"definition": "intervention. members of the community charged with observing legal processes and conditions.", "level": 4, "type": "intervention"}}
54
+ {"text": "transparency and accountability initiatives", "id": 53, "metadata": {"definition": "intervention. promotion of transparency and accountability through citizens' participation and monitoring initiatives.", "level": 4, "type": "intervention"}}
55
+ {"text": "care environment", "id": 54, "metadata": {"definition": "intervention. changes to the physical or sensory healthcare environment, by adding or altering equipment or layout, providing music, art.", "level": 4, "type": "intervention"}}
models/retriever/intervention/gpt/db/document_index/embeddings.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bd650e701ee68809d3f1743dbfb1202ada00def909f9608731984644236f1a29
3
+ size 85675
models/retriever/intervention/gpt/db/question_encoder/config.json ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "/home/carlos/amr-parsing-master/sentence-similarity/retriever/wandb/wandb/retriever-small-intervention-gpt/files/retriever/question_encoder",
3
+ "architectures": [
4
+ "GoldenRetrieverModel"
5
+ ],
6
+ "attention_probs_dropout_prob": 0.1,
7
+ "auto_map": {
8
+ "AutoModel": "hf.GoldenRetrieverModel"
9
+ },
10
+ "classifier_dropout": null,
11
+ "hidden_act": "gelu",
12
+ "hidden_dropout_prob": 0.1,
13
+ "hidden_size": 384,
14
+ "initializer_range": 0.02,
15
+ "intermediate_size": 1536,
16
+ "layer_norm_eps": 1e-12,
17
+ "max_position_embeddings": 512,
18
+ "model_type": "bert",
19
+ "num_attention_heads": 12,
20
+ "num_hidden_layers": 12,
21
+ "pad_token_id": 0,
22
+ "position_embedding_type": "absolute",
23
+ "projection_dim": null,
24
+ "torch_dtype": "float32",
25
+ "transformers_version": "4.37.2",
26
+ "type_vocab_size": 2,
27
+ "use_cache": true,
28
+ "vocab_size": 30522
29
+ }
models/retriever/intervention/gpt/db/question_encoder/hf.py ADDED
@@ -0,0 +1,99 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from typing import Tuple, Union
2
+
3
+ import torch
4
+ from transformers import PretrainedConfig
5
+ from transformers.modeling_outputs import BaseModelOutputWithPoolingAndCrossAttentions
6
+ from transformers.models.bert.modeling_bert import BertModel
7
+
8
+
9
+ class GoldenRetrieverConfig(PretrainedConfig):
10
+ model_type = "bert"
11
+
12
+ def __init__(
13
+ self,
14
+ vocab_size=30522,
15
+ hidden_size=768,
16
+ num_hidden_layers=12,
17
+ num_attention_heads=12,
18
+ intermediate_size=3072,
19
+ hidden_act="gelu",
20
+ hidden_dropout_prob=0.1,
21
+ attention_probs_dropout_prob=0.1,
22
+ max_position_embeddings=512,
23
+ type_vocab_size=2,
24
+ initializer_range=0.02,
25
+ layer_norm_eps=1e-12,
26
+ pad_token_id=0,
27
+ position_embedding_type="absolute",
28
+ use_cache=True,
29
+ classifier_dropout=None,
30
+ projection_dim=None,
31
+ **kwargs,
32
+ ):
33
+ super().__init__(pad_token_id=pad_token_id, **kwargs)
34
+
35
+ self.vocab_size = vocab_size
36
+ self.hidden_size = hidden_size
37
+ self.num_hidden_layers = num_hidden_layers
38
+ self.num_attention_heads = num_attention_heads
39
+ self.hidden_act = hidden_act
40
+ self.intermediate_size = intermediate_size
41
+ self.hidden_dropout_prob = hidden_dropout_prob
42
+ self.attention_probs_dropout_prob = attention_probs_dropout_prob
43
+ self.max_position_embeddings = max_position_embeddings
44
+ self.type_vocab_size = type_vocab_size
45
+ self.initializer_range = initializer_range
46
+ self.layer_norm_eps = layer_norm_eps
47
+ self.position_embedding_type = position_embedding_type
48
+ self.use_cache = use_cache
49
+ self.classifier_dropout = classifier_dropout
50
+ self.projection_dim = projection_dim
51
+
52
+
53
+ class GoldenRetrieverModel(BertModel):
54
+ config_class = GoldenRetrieverConfig
55
+
56
+ def __init__(self, config, *args, **kwargs):
57
+ super().__init__(config)
58
+ self.layer_norm_layer = torch.nn.LayerNorm(
59
+ config.hidden_size, eps=config.layer_norm_eps
60
+ )
61
+ self.projection: torch.nn.Module | None = None
62
+ if config.projection_dim is not None:
63
+ self.projection = torch.nn.Sequential(
64
+ torch.nn.Linear(config.hidden_size, config.projection_dim),
65
+ torch.nn.LayerNorm(config.projection_dim),
66
+ )
67
+
68
+ def forward(
69
+ self, **kwargs
70
+ ) -> Union[Tuple[torch.Tensor], BaseModelOutputWithPoolingAndCrossAttentions]:
71
+ attention_mask = kwargs.get("attention_mask", None)
72
+ model_outputs = super().forward(**kwargs)
73
+ if attention_mask is None:
74
+ pooler_output = model_outputs.pooler_output
75
+ else:
76
+ token_embeddings = model_outputs.last_hidden_state
77
+ input_mask_expanded = (
78
+ attention_mask.unsqueeze(-1).expand(token_embeddings.size()).float()
79
+ )
80
+ pooler_output = torch.sum(
81
+ token_embeddings * input_mask_expanded, 1
82
+ ) / torch.clamp(input_mask_expanded.sum(1), min=1e-9)
83
+
84
+ pooler_output = self.layer_norm_layer(pooler_output)
85
+
86
+ if self.projection is not None:
87
+ pooler_output = self.projection(pooler_output)
88
+
89
+ if not kwargs.get("return_dict", True):
90
+ return (model_outputs[0], pooler_output) + model_outputs[2:]
91
+
92
+ return BaseModelOutputWithPoolingAndCrossAttentions(
93
+ last_hidden_state=model_outputs.last_hidden_state,
94
+ pooler_output=pooler_output,
95
+ past_key_values=model_outputs.past_key_values,
96
+ hidden_states=model_outputs.hidden_states,
97
+ attentions=model_outputs.attentions,
98
+ cross_attentions=model_outputs.cross_attentions,
99
+ )
models/retriever/intervention/gpt/db/question_encoder/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3c3e7630eebc67435972e67905e3c462fb3c0c7315f3ce943356956e6e2dcc53
3
+ size 133465384
models/retriever/intervention/gpt/db/question_encoder/special_tokens_map.json ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cls_token": {
3
+ "content": "[CLS]",
4
+ "lstrip": false,
5
+ "normalized": false,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "mask_token": {
10
+ "content": "[MASK]",
11
+ "lstrip": false,
12
+ "normalized": false,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "pad_token": {
17
+ "content": "[PAD]",
18
+ "lstrip": false,
19
+ "normalized": false,
20
+ "rstrip": false,
21
+ "single_word": false
22
+ },
23
+ "sep_token": {
24
+ "content": "[SEP]",
25
+ "lstrip": false,
26
+ "normalized": false,
27
+ "rstrip": false,
28
+ "single_word": false
29
+ },
30
+ "unk_token": {
31
+ "content": "[UNK]",
32
+ "lstrip": false,
33
+ "normalized": false,
34
+ "rstrip": false,
35
+ "single_word": false
36
+ }
37
+ }
models/retriever/intervention/gpt/db/question_encoder/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
models/retriever/intervention/gpt/db/question_encoder/tokenizer_config.json ADDED
@@ -0,0 +1,59 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "0": {
4
+ "content": "[PAD]",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ },
11
+ "100": {
12
+ "content": "[UNK]",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "101": {
20
+ "content": "[CLS]",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ },
27
+ "102": {
28
+ "content": "[SEP]",
29
+ "lstrip": false,
30
+ "normalized": false,
31
+ "rstrip": false,
32
+ "single_word": false,
33
+ "special": true
34
+ },
35
+ "103": {
36
+ "content": "[MASK]",
37
+ "lstrip": false,
38
+ "normalized": false,
39
+ "rstrip": false,
40
+ "single_word": false,
41
+ "special": true
42
+ }
43
+ },
44
+ "clean_up_tokenization_spaces": true,
45
+ "cls_token": "[CLS]",
46
+ "do_lower_case": true,
47
+ "mask_token": "[MASK]",
48
+ "max_length": 64,
49
+ "model_max_length": 512,
50
+ "pad_token": "[PAD]",
51
+ "sep_token": "[SEP]",
52
+ "stride": 0,
53
+ "strip_accents": null,
54
+ "tokenize_chinese_chars": true,
55
+ "tokenizer_class": "BertTokenizer",
56
+ "truncation_side": "right",
57
+ "truncation_strategy": "longest_first",
58
+ "unk_token": "[UNK]"
59
+ }
models/retriever/intervention/gpt/db/question_encoder/vocab.txt ADDED
The diff for this file is too large to render. See raw diff
 
models/retriever/intervention/gpt/taxonomy/document_index/config.yaml ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ _target_: goldenretriever.indexers.inmemory.InMemoryDocumentIndex
2
+ metadata_fields:
3
+ - definition
4
+ separator: ' <def> '
5
+ name_or_path: null
models/retriever/intervention/gpt/taxonomy/document_index/documents.jsonl ADDED
The diff for this file is too large to render. See raw diff
 
models/retriever/intervention/gpt/taxonomy/document_index/embeddings.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2e1b3bf62fdbbad4718983754313a7415cd76cb01b9ead85fa5e8fe697bbc8a9
3
+ size 1855147
models/retriever/intervention/gpt/taxonomy/question_encoder/config.json ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "/home/carlos/amr-parsing-master/sentence-similarity/retriever/wandb/wandb/retriever-small-intervention-gpt/files/retriever/question_encoder",
3
+ "architectures": [
4
+ "GoldenRetrieverModel"
5
+ ],
6
+ "attention_probs_dropout_prob": 0.1,
7
+ "auto_map": {
8
+ "AutoModel": "hf.GoldenRetrieverModel"
9
+ },
10
+ "classifier_dropout": null,
11
+ "hidden_act": "gelu",
12
+ "hidden_dropout_prob": 0.1,
13
+ "hidden_size": 384,
14
+ "initializer_range": 0.02,
15
+ "intermediate_size": 1536,
16
+ "layer_norm_eps": 1e-12,
17
+ "max_position_embeddings": 512,
18
+ "model_type": "bert",
19
+ "num_attention_heads": 12,
20
+ "num_hidden_layers": 12,
21
+ "pad_token_id": 0,
22
+ "position_embedding_type": "absolute",
23
+ "projection_dim": null,
24
+ "torch_dtype": "float32",
25
+ "transformers_version": "4.37.2",
26
+ "type_vocab_size": 2,
27
+ "use_cache": true,
28
+ "vocab_size": 30522
29
+ }
models/retriever/intervention/gpt/taxonomy/question_encoder/hf.py ADDED
@@ -0,0 +1,99 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from typing import Tuple, Union
2
+
3
+ import torch
4
+ from transformers import PretrainedConfig
5
+ from transformers.modeling_outputs import BaseModelOutputWithPoolingAndCrossAttentions
6
+ from transformers.models.bert.modeling_bert import BertModel
7
+
8
+
9
+ class GoldenRetrieverConfig(PretrainedConfig):
10
+ model_type = "bert"
11
+
12
+ def __init__(
13
+ self,
14
+ vocab_size=30522,
15
+ hidden_size=768,
16
+ num_hidden_layers=12,
17
+ num_attention_heads=12,
18
+ intermediate_size=3072,
19
+ hidden_act="gelu",
20
+ hidden_dropout_prob=0.1,
21
+ attention_probs_dropout_prob=0.1,
22
+ max_position_embeddings=512,
23
+ type_vocab_size=2,
24
+ initializer_range=0.02,
25
+ layer_norm_eps=1e-12,
26
+ pad_token_id=0,
27
+ position_embedding_type="absolute",
28
+ use_cache=True,
29
+ classifier_dropout=None,
30
+ projection_dim=None,
31
+ **kwargs,
32
+ ):
33
+ super().__init__(pad_token_id=pad_token_id, **kwargs)
34
+
35
+ self.vocab_size = vocab_size
36
+ self.hidden_size = hidden_size
37
+ self.num_hidden_layers = num_hidden_layers
38
+ self.num_attention_heads = num_attention_heads
39
+ self.hidden_act = hidden_act
40
+ self.intermediate_size = intermediate_size
41
+ self.hidden_dropout_prob = hidden_dropout_prob
42
+ self.attention_probs_dropout_prob = attention_probs_dropout_prob
43
+ self.max_position_embeddings = max_position_embeddings
44
+ self.type_vocab_size = type_vocab_size
45
+ self.initializer_range = initializer_range
46
+ self.layer_norm_eps = layer_norm_eps
47
+ self.position_embedding_type = position_embedding_type
48
+ self.use_cache = use_cache
49
+ self.classifier_dropout = classifier_dropout
50
+ self.projection_dim = projection_dim
51
+
52
+
53
+ class GoldenRetrieverModel(BertModel):
54
+ config_class = GoldenRetrieverConfig
55
+
56
+ def __init__(self, config, *args, **kwargs):
57
+ super().__init__(config)
58
+ self.layer_norm_layer = torch.nn.LayerNorm(
59
+ config.hidden_size, eps=config.layer_norm_eps
60
+ )
61
+ self.projection: torch.nn.Module | None = None
62
+ if config.projection_dim is not None:
63
+ self.projection = torch.nn.Sequential(
64
+ torch.nn.Linear(config.hidden_size, config.projection_dim),
65
+ torch.nn.LayerNorm(config.projection_dim),
66
+ )
67
+
68
+ def forward(
69
+ self, **kwargs
70
+ ) -> Union[Tuple[torch.Tensor], BaseModelOutputWithPoolingAndCrossAttentions]:
71
+ attention_mask = kwargs.get("attention_mask", None)
72
+ model_outputs = super().forward(**kwargs)
73
+ if attention_mask is None:
74
+ pooler_output = model_outputs.pooler_output
75
+ else:
76
+ token_embeddings = model_outputs.last_hidden_state
77
+ input_mask_expanded = (
78
+ attention_mask.unsqueeze(-1).expand(token_embeddings.size()).float()
79
+ )
80
+ pooler_output = torch.sum(
81
+ token_embeddings * input_mask_expanded, 1
82
+ ) / torch.clamp(input_mask_expanded.sum(1), min=1e-9)
83
+
84
+ pooler_output = self.layer_norm_layer(pooler_output)
85
+
86
+ if self.projection is not None:
87
+ pooler_output = self.projection(pooler_output)
88
+
89
+ if not kwargs.get("return_dict", True):
90
+ return (model_outputs[0], pooler_output) + model_outputs[2:]
91
+
92
+ return BaseModelOutputWithPoolingAndCrossAttentions(
93
+ last_hidden_state=model_outputs.last_hidden_state,
94
+ pooler_output=pooler_output,
95
+ past_key_values=model_outputs.past_key_values,
96
+ hidden_states=model_outputs.hidden_states,
97
+ attentions=model_outputs.attentions,
98
+ cross_attentions=model_outputs.cross_attentions,
99
+ )
models/retriever/intervention/gpt/taxonomy/question_encoder/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3c3e7630eebc67435972e67905e3c462fb3c0c7315f3ce943356956e6e2dcc53
3
+ size 133465384
models/retriever/intervention/gpt/taxonomy/question_encoder/special_tokens_map.json ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cls_token": {
3
+ "content": "[CLS]",
4
+ "lstrip": false,
5
+ "normalized": false,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "mask_token": {
10
+ "content": "[MASK]",
11
+ "lstrip": false,
12
+ "normalized": false,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "pad_token": {
17
+ "content": "[PAD]",
18
+ "lstrip": false,
19
+ "normalized": false,
20
+ "rstrip": false,
21
+ "single_word": false
22
+ },
23
+ "sep_token": {
24
+ "content": "[SEP]",
25
+ "lstrip": false,
26
+ "normalized": false,
27
+ "rstrip": false,
28
+ "single_word": false
29
+ },
30
+ "unk_token": {
31
+ "content": "[UNK]",
32
+ "lstrip": false,
33
+ "normalized": false,
34
+ "rstrip": false,
35
+ "single_word": false
36
+ }
37
+ }
models/retriever/intervention/gpt/taxonomy/question_encoder/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
models/retriever/intervention/gpt/taxonomy/question_encoder/tokenizer_config.json ADDED
@@ -0,0 +1,59 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "0": {
4
+ "content": "[PAD]",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ },
11
+ "100": {
12
+ "content": "[UNK]",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "101": {
20
+ "content": "[CLS]",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ },
27
+ "102": {
28
+ "content": "[SEP]",
29
+ "lstrip": false,
30
+ "normalized": false,
31
+ "rstrip": false,
32
+ "single_word": false,
33
+ "special": true
34
+ },
35
+ "103": {
36
+ "content": "[MASK]",
37
+ "lstrip": false,
38
+ "normalized": false,
39
+ "rstrip": false,
40
+ "single_word": false,
41
+ "special": true
42
+ }
43
+ },
44
+ "clean_up_tokenization_spaces": true,
45
+ "cls_token": "[CLS]",
46
+ "do_lower_case": true,
47
+ "mask_token": "[MASK]",
48
+ "max_length": 64,
49
+ "model_max_length": 512,
50
+ "pad_token": "[PAD]",
51
+ "sep_token": "[SEP]",
52
+ "stride": 0,
53
+ "strip_accents": null,
54
+ "tokenize_chinese_chars": true,
55
+ "tokenizer_class": "BertTokenizer",
56
+ "truncation_side": "right",
57
+ "truncation_strategy": "longest_first",
58
+ "unk_token": "[UNK]"
59
+ }
models/retriever/intervention/gpt/taxonomy/question_encoder/vocab.txt ADDED
The diff for this file is too large to render. See raw diff
 
models/retriever/outcome/gpt+llama/db/document_index/config.yaml ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ _target_: goldenretriever.indexers.inmemory.InMemoryDocumentIndex
2
+ metadata_fields:
3
+ - definition
4
+ separator: ' <def> '
5
+ name_or_path: null
models/retriever/outcome/gpt+llama/db/document_index/documents.jsonl ADDED
@@ -0,0 +1,113 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {"text": "overall household expenditure", "id": 0, "metadata": {"definition": "outcome. total amount spend by a household on consumption", "level": 4, "type": "outcome"}}
2
+ {"text": "marriage and teen child bearing as a barrier to education participation", "id": 1, "metadata": {"definition": "outcome. measures of marriage and teenage child bearing in the local context, which may act as a barrier to participation in education.", "level": 4, "type": "outcome"}}
3
+ {"text": "business knowledge", "id": 2, "metadata": {"definition": "outcome. any measure of business practices such as sharing business experiences, asking for business advice and interest in training. this may also include the understanding and ability to apply business concepts in real-life situation such as creating a business plan, understanding how to calculate profit etc.", "level": 4, "type": "outcome"}}
4
+ {"text": "wage levels", "id": 3, "metadata": {"definition": "outcome. measures for wage levels or wage level satisfaction", "level": 4, "type": "outcome"}}
5
+ {"text": "livelihoods and employment behaviours", "id": 4, "metadata": {"definition": "outcome. livelihoods and employment behaviours.", "level": 4, "type": "outcome"}}
6
+ {"text": "investment in productive assets", "id": 5, "metadata": {"definition": "outcome. total amount of investments made on equipment for productive purposes.", "level": 4, "type": "outcome"}}
7
+ {"text": "food security index", "id": 6, "metadata": {"definition": "outcome. composite/index measures of the extent to which households have adequate and reliable access to enough nutritious food to meet basic dietary needs.", "level": 4, "type": "outcome"}}
8
+ {"text": "sense of belonging", "id": 7, "metadata": {"definition": "outcome. a sense of belonging refers to individuals or groups sense of shared national, religious, ethnic or political identity. this may include measure of a sense of divisions or tensions between groups.", "level": 4, "type": "outcome"}}
9
+ {"text": "index of illness related to wash", "id": 8, "metadata": {"definition": "outcome. composite/index measures of other health indicators not otherwise captured related to wash topics.", "level": 4, "type": "outcome"}}
10
+ {"text": "deforestation", "id": 9, "metadata": {"definition": "outcome. rates of deforestation and loss of ecosystems.", "level": 4, "type": "outcome"}}
11
+ {"text": "other crop yield", "id": 10, "metadata": {"definition": "outcome. measure of other crop yield, typically measured as production per a given area (e.g. tonnes per hectare).", "level": 4, "type": "outcome"}}
12
+ {"text": "individual economic well-being", "id": 11, "metadata": {"definition": "outcome. metrics of economic well-being applied to individuals.", "level": 4, "type": "outcome"}}
13
+ {"text": "index of healthcare professional outcomes", "id": 12, "metadata": {"definition": "outcome. composite/index measures of health care professional staff availability, quality, and supervision.", "level": 4, "type": "outcome"}}
14
+ {"text": "profits and revenues", "id": 13, "metadata": {"definition": "outcome. amount of profit or revenues generated by a firm or business over a given period.", "level": 4, "type": "outcome"}}
15
+ {"text": "gendered access to credit", "id": 14, "metadata": {"definition": "outcome. women's access to credit through formal institutions or informal lenders.", "level": 4, "type": "outcome"}}
16
+ {"text": "job seeking", "id": 15, "metadata": {"definition": "outcome. measures of job seeking behaviour", "level": 4, "type": "outcome"}}
17
+ {"text": "attitudes about intimate partner violence", "id": 16, "metadata": {"definition": "outcome. measures of attitudes with respect to intimate partner violence, including identity formation, perception of gender roles, acceptability of sexist attitudes, acceptability of ipv, intimacy and self-efficacy. this indicator is applicable at an individual, family, or community level.", "level": 4, "type": "outcome"}}
18
+ {"text": "index of economic returns due to higher education", "id": 17, "metadata": {"definition": "outcome. composite/index measures of economic returns after higher education.", "level": 4, "type": "outcome"}}
19
+ {"text": "total savings", "id": 18, "metadata": {"definition": "outcome. total savings across all forms of value holdings.", "level": 4, "type": "outcome"}}
20
+ {"text": "index of health service quality & standards", "id": 19, "metadata": {"definition": "outcome. composite/index measures of health service quality and standards.", "level": 4, "type": "outcome"}}
21
+ {"text": "community participation in crime reporting/police oversight", "id": 20, "metadata": {"definition": "outcome. measures of community involvement in crime reporting and police oversight.", "level": 4, "type": "outcome"}}
22
+ {"text": "index of mortality", "id": 21, "metadata": {"definition": "outcome. composite/index measure of mortality rates", "level": 4, "type": "outcome"}}
23
+ {"text": "gender representation", "id": 22, "metadata": {"definition": "outcome. measures of representation by gender to improve access to services and increase social mobility, including reservations.", "level": 4, "type": "outcome"}}
24
+ {"text": "index of sanitation attitudes and knowledge", "id": 23, "metadata": {"definition": "outcome. composite/index measures of attitudes towards sanitation practices, and all measures of knowledge about sanitation practices and related topics (such as disease transmission).", "level": 4, "type": "outcome"}}
25
+ {"text": "cattle herd size", "id": 24, "metadata": {"definition": "outcome. overall cattle holdings for a farm/ranch.", "level": 4, "type": "outcome"}}
26
+ {"text": "empowerment", "id": 25, "metadata": {"definition": "outcome. process through which someone acquires the ability to access certain resources, make independent choices based on his/ her own values, and achieve things that correspond to his/her own values and objectives. this can be measured either through the observation of a set of behaviours that demonstrate an increased access to resources, agency and achievements, or through self-reported perception of someone's own empowerment.", "level": 4, "type": "outcome"}}
27
+ {"text": "willingness to help", "id": 26, "metadata": {"definition": "outcome. willingness to help refers to individuals willingness to engage in actions that benefit others based on altruistic motives. such actions might include community service, volunteering or donating to causes that benefit others. this may include specific measures of openness to help other, anxiety about helping others or the refusal to do so.", "level": 4, "type": "outcome"}}
28
+ {"text": "social participation and interaction", "id": 27, "metadata": {"definition": "outcome. socialisation and interaction measures.", "level": 4, "type": "outcome"}}
29
+ {"text": "index of hygiene attitudes and knowledge", "id": 28, "metadata": {"definition": "outcome. composite/index measures of attitudes towards hygiene practices, and all measures of knowledge about hygiene practices and related topics.", "level": 4, "type": "outcome"}}
30
+ {"text": "forest coverage", "id": 29, "metadata": {"definition": "outcome. measures of forested land, including rates of deforestation and decay, forest quality, damage and degradation.", "level": 4, "type": "outcome"}}
31
+ {"text": "sexual and intimate partner violence", "id": 30, "metadata": {"definition": "outcome. measures of sexual and intimate partner violence incidence", "level": 4, "type": "outcome"}}
32
+ {"text": "index of corruption", "id": 31, "metadata": {"definition": "outcome. composite/index measures of corruption.", "level": 4, "type": "outcome"}}
33
+ {"text": "vocational skills", "id": 32, "metadata": {"definition": "outcome. practical skills which allow a person to do tasks specific to a job and master all the aspects of a job.", "level": 4, "type": "outcome"}}
34
+ {"text": "handwashing behaviour - before handling food", "id": 33, "metadata": {"definition": "outcome. all measures of how consistently participants practice the handwashing behaviours that they are taught. only in relation to food preparation activities.", "level": 4, "type": "outcome"}}
35
+ {"text": "index of business knowledge and skills", "id": 34, "metadata": {"definition": "outcome. composite/index measures of business knowledge and skills.", "level": 4, "type": "outcome"}}
36
+ {"text": "tree replacement", "id": 35, "metadata": {"definition": "outcome. measures of amount and quality of tree replacement programs, and recovered timberland.", "level": 4, "type": "outcome"}}
37
+ {"text": "awareness of rights", "id": 36, "metadata": {"definition": "outcome. knowledge and understanding of one's political, civil, and human rights.", "level": 4, "type": "outcome"}}
38
+ {"text": "perceptions of personal and community safety", "id": 37, "metadata": {"definition": "outcome. this includes measures of perceived safety in the home, safety in the community and freedom of movement.", "level": 4, "type": "outcome"}}
39
+ {"text": "tax compliance", "id": 38, "metadata": {"definition": "outcome. measures of tax compliance. these include measures of tax evasion and the amount of taxes paid.", "level": 4, "type": "outcome"}}
40
+ {"text": "diarrhoea-related mortality", "id": 39, "metadata": {"definition": "outcome. any measure of the number of deaths from diarrhoeal diseases.", "level": 4, "type": "outcome"}}
41
+ {"text": "human rights", "id": 40, "metadata": {"definition": "outcome. extent to which a firm respects or violates human rights through its activity. this can be measured through an impact assessment of the effect of a firm on populations affected by its activity.", "level": 4, "type": "outcome"}}
42
+ {"text": "quality of life/life satisfaction", "id": 41, "metadata": {"definition": "outcome. extent to which an individual is satisfied with his/ her life, based on a complex assessment of different life factors such as education, career, income, relationships etc. there are different ways of measuring life satisfaction, such as for example an out of 10 scale.", "level": 4, "type": "outcome"}}
43
+ {"text": "police visibility", "id": 42, "metadata": {"definition": "outcome. measures of police involvement in the community.", "level": 4, "type": "outcome"}}
44
+ {"text": "access to public education", "id": 43, "metadata": {"definition": "outcome. any measure of the availability and capacity of public education services, including opening hours, waiting times and equitable access to services and resources made available by service providers.", "level": 4, "type": "outcome"}}
45
+ {"text": "acceptance of diversity", "id": 44, "metadata": {"definition": "outcome. acceptance of diversity refers to the extent to which individuals or groups recognise others rights to belong, be trusted, and/or be helped, even where there are differences in values, identities or lifestyles. acceptance of diversity may refer to people within ones own group as well as across groups. this may include measurements on attitudes towards diversity, pluralism or social and cultural tolerance.", "level": 4, "type": "outcome"}}
46
+ {"text": "index of social cohesion", "id": 45, "metadata": {"definition": "outcome. composite/index of measures of social cohesion within a community.", "level": 4, "type": "outcome"}}
47
+ {"text": "migration", "id": 46, "metadata": {"definition": "outcome. any measure of the amount of migration in our out of the area (usually a headcount).", "level": 4, "type": "outcome"}}
48
+ {"text": "sales", "id": 47, "metadata": {"definition": "outcome. volume of goods or services sold by a firm.", "level": 4, "type": "outcome"}}
49
+ {"text": "starting a business", "id": 48, "metadata": {"definition": "outcome. descriptive metrics for the process of starting a business; e.g. procedures (number), time (days), cost (% of income), minimum capital (% of income), etc.", "level": 4, "type": "outcome"}}
50
+ {"text": "index of latrine or toilet use", "id": 49, "metadata": {"definition": "outcome. composite/index measures of the latrine or toilet use.", "level": 4, "type": "outcome"}}
51
+ {"text": "government expenditure", "id": 50, "metadata": {"definition": "outcome. total amount spent by the government", "level": 4, "type": "outcome"}}
52
+ {"text": "index of faecal waste disposal", "id": 51, "metadata": {"definition": "outcome. composite/index measures of waste disposal methods.", "level": 4, "type": "outcome"}}
53
+ {"text": "individual knowledge", "id": 52, "metadata": {"definition": "outcome. individual knowledge.", "level": 4, "type": "outcome"}}
54
+ {"text": "regular latrine or toilet use", "id": 53, "metadata": {"definition": "outcome. all measures of how regularly participants use latrine or toilets.", "level": 4, "type": "outcome"}}
55
+ {"text": "sanitation-related mortality", "id": 54, "metadata": {"definition": "outcome. any measure of the number deaths from sanitation-related diseases.", "level": 4, "type": "outcome"}}
56
+ {"text": "formal employment due to higher education", "id": 55, "metadata": {"definition": "outcome. attainment of formal employment due to higher education.", "level": 4, "type": "outcome"}}
57
+ {"text": "open defecation", "id": 56, "metadata": {"definition": "outcome. all measures of how consistently participants openly defecate, or what proportion of participants who no longer openly defecate at all.", "level": 4, "type": "outcome"}}
58
+ {"text": "willingness to participate (social)", "id": 57, "metadata": {"definition": "outcome. this concept refers to individuals willingness to participate in civil society. this includes measures of individuals', or perceptions of others' willingness to participate within their communities, such as in community decision-making or conflict transformation procedures, or in civil society more broadly such as through membership with various social, religious, or economic groups / cooperatives / associations. this may include specific measures of openness to participate, anxiety about participation or the refusal to participate. to differentiate from the corresponding outcome within civic engagement, it does not include active measures of participation in community groups.", "level": 4, "type": "outcome"}}
59
+ {"text": "safe disposal of waste", "id": 58, "metadata": {"definition": "outcome. all measures of the number, or proportion, of participants who consistently participants practice the safe disposal of faeces.", "level": 4, "type": "outcome"}}
60
+ {"text": "new latrine or toilet construction", "id": 59, "metadata": {"definition": "outcome. number of new latrine or toilets constructed over a given period.", "level": 4, "type": "outcome"}}
61
+ {"text": "household expenditure on health", "id": 60, "metadata": {"definition": "outcome. amount or portion spent by the household on costs related to health services, including catastrophic care", "level": 4, "type": "outcome"}}
62
+ {"text": "adoption of good business practices from higher education", "id": 61, "metadata": {"definition": "outcome. measures of the adoption of business practices associated with good business outcomes. these practices may include regular bookkeeping, clear recordkeeping, good planning, and other practices that are taught during business trainings. we included studies that measured the adoption of these and other good business practices.", "level": 4, "type": "outcome"}}
63
+ {"text": "income from agriculture", "id": 62, "metadata": {"definition": "outcome. income stemming from all forms of agricultural production.", "level": 4, "type": "outcome"}}
64
+ {"text": "healthcare staff absenteeism", "id": 63, "metadata": {"definition": "outcome. any measure of health staff absenteeism.", "level": 4, "type": "outcome"}}
65
+ {"text": "social norms around sanitation", "id": 64, "metadata": {"definition": "outcome. all measures of the perceived social norms around sanitation practices.", "level": 4, "type": "outcome"}}
66
+ {"text": "household economic well-being", "id": 65, "metadata": {"definition": "outcome. metrics of economic well-being applied to household.", "level": 4, "type": "outcome"}}
67
+ {"text": "informal employment", "id": 66, "metadata": {"definition": "outcome. share of the working force that participates in the informal sector.", "level": 4, "type": "outcome"}}
68
+ {"text": "food consumption", "id": 67, "metadata": {"definition": "outcome. any measure of nutritional intake including caloric intake, meal frequency, minimum meal intake/acceptable diet, consumption of iron-rich or iron-fortified foods, vitamin supplementation, fruit and vegetable intake, micronutrient intake", "level": 4, "type": "outcome"}}
69
+ {"text": "community monitoring", "id": 68, "metadata": {"definition": "outcome. participation in community monitoring of public resources and projects.", "level": 4, "type": "outcome"}}
70
+ {"text": "index of social attitudes, behaviours, or norms.", "id": 69, "metadata": {"definition": "outcome. composite/index of measures of social attitudes, behaviours, or norms.", "level": 4, "type": "outcome"}}
71
+ {"text": "women's employment after higher education", "id": 70, "metadata": {"definition": "outcome. womens employment status and type of occupation, including whether women were employed, the status of their employment (full-time/part-time, permanent/temporary, formal/informal), and the type of occupation in which they were employed.", "level": 4, "type": "outcome"}}
72
+ {"text": "access to healthcare", "id": 71, "metadata": {"definition": "outcome. measures of access, physical and financial, to healthcare services.", "level": 4, "type": "outcome"}}
73
+ {"text": "index of sexual & reproductive health", "id": 72, "metadata": {"definition": "outcome. composite/index measures of sexual and reproductive health outcomes and services.", "level": 4, "type": "outcome"}}
74
+ {"text": "wealth and assets", "id": 73, "metadata": {"definition": "outcome. total amount of wealth and asset ownership.", "level": 4, "type": "outcome"}}
75
+ {"text": "customer satisfaction", "id": 74, "metadata": {"definition": "outcome. customer satisfaction measures for a product or service.", "level": 4, "type": "outcome"}}
76
+ {"text": "time collecting firewood", "id": 75, "metadata": {"definition": "outcome. overall or share of time allocated to collecting firewood", "level": 4, "type": "outcome"}}
77
+ {"text": "psychological empowerment", "id": 76, "metadata": {"definition": "outcome. psychological process contributing to increasing someone's ability to access resources, make independent choices based on his/her own values, and achieve things that correspond to his/her own values and objectives. this can be measured through an indicator combining a set of psychological outcomes such as self-confidence, self-worth, belief about the ability and right to influence decisions and have control over his/her own life etc.", "level": 4, "type": "outcome"}}
78
+ {"text": "perception of politicians' responsiveness", "id": 77, "metadata": {"definition": "outcome. public perception of politicians' responsiveness to constituents concerns and accessibility. this can include constituencies' feelings of being heard by leaders.", "level": 4, "type": "outcome"}}
79
+ {"text": "attitudes toward open defecation", "id": 78, "metadata": {"definition": "outcome. attitudes toward open defecation, including levels of acceptance.", "level": 4, "type": "outcome"}}
80
+ {"text": "customer retention", "id": 79, "metadata": {"definition": "outcome. measures of customer retention, including returning or continuous customers.", "level": 4, "type": "outcome"}}
81
+ {"text": "drop-out rate from educational establishments", "id": 80, "metadata": {"definition": "outcome. drop-out rate.", "level": 4, "type": "outcome"}}
82
+ {"text": "household expenditure on food", "id": 81, "metadata": {"definition": "outcome. amount or portion of expenditure on by the household.", "level": 4, "type": "outcome"}}
83
+ {"text": "perception of public service delivery", "id": 82, "metadata": {"definition": "outcome. public perception of service quality and accessibility.", "level": 4, "type": "outcome"}}
84
+ {"text": "index of healthcare utilisation", "id": 83, "metadata": {"definition": "outcome. composite/index measures of health system/service utilisation.", "level": 4, "type": "outcome"}}
85
+ {"text": "handwashing behaviour", "id": 84, "metadata": {"definition": "outcome. all measures of how consistently participants practice the handwashing behaviours that they are taught.", "level": 4, "type": "outcome"}}
86
+ {"text": "social norms regarding violence", "id": 85, "metadata": {"definition": "outcome. this includes norms and behaviours surrounding violence, including support for political violence or armed groups, and attitudes towards the use of violence", "level": 4, "type": "outcome"}}
87
+ {"text": "women's earning/income after higher education", "id": 86, "metadata": {"definition": "outcome. measures of income women earned through paid work, defined as earnings, wages, or salaries. measures of earnings or wages through administrative data, as well as self-reported data where women had been asked about their earnings, wages, or salaries.", "level": 4, "type": "outcome"}}
88
+ {"text": "knowledge of handwashing practices", "id": 87, "metadata": {"definition": "outcome. knowledge of handwashing practices.", "level": 4, "type": "outcome"}}
89
+ {"text": "social trust", "id": 88, "metadata": {"definition": "outcome. any measure of trust between communities, their members, and across different social groups. it may include specific measures of respondents' perception of others', trust towards each other and tolerance.", "level": 4, "type": "outcome"}}
90
+ {"text": "acute respiratory infections (aris)", "id": 89, "metadata": {"definition": "outcome. any measure of acute illness in the upper respiratory tract, trachea, bronchi, bronchioles, alveoli, pleura, pleural cavity, and the muscles associated with breathing", "level": 4, "type": "outcome"}}
91
+ {"text": "risk of violence", "id": 90, "metadata": {"definition": "outcome. risk perceived or objectively measured through a collection of facilitating/protecting factors, of experiencing any kind of violence.", "level": 4, "type": "outcome"}}
92
+ {"text": "wash all-cause mortality", "id": 91, "metadata": {"definition": "outcome. any measure of the number of deaths from any cause.", "level": 4, "type": "outcome"}}
93
+ {"text": "civic knowledge and understanding", "id": 92, "metadata": {"definition": "outcome. any measure of citizens' understanding and knowledge of political processes, public service availability and rights.", "level": 4, "type": "outcome"}}
94
+ {"text": "food insecurity prevalence", "id": 93, "metadata": {"definition": "outcome. measures of food insecurity in surrounding communities and within the household.", "level": 4, "type": "outcome"}}
95
+ {"text": "economic empowerment", "id": 94, "metadata": {"definition": "outcome. process through which someone acquires the ability to access economic resources, make independent choices based on his/her own values, and achieve things that correspond to his/her own values and objectives. this can be measured through an indicator combining a set of economic outcomes such as the person's control over his/her own income, decision making power over family finances etc.", "level": 4, "type": "outcome"}}
96
+ {"text": "adoption of recommended organic fertiliser practices", "id": 95, "metadata": {"definition": "outcome. adoption of recommended practices for using organic fertilisers like manure or urea", "level": 4, "type": "outcome"}}
97
+ {"text": "diarrhoeal disease", "id": 96, "metadata": {"definition": "outcome. all measures of diarrhoeal disease. all reasonable definitions of diarrhoea accepted (frequent, loose-watery stool). this is typically presented as number of incidences over a given period.", "level": 4, "type": "outcome"}}
98
+ {"text": "employee motivation", "id": 97, "metadata": {"definition": "outcome. any measure of employee motivation or the firm's ability to motivate employees.", "level": 4, "type": "outcome"}}
99
+ {"text": "employment share", "id": 98, "metadata": {"definition": "outcome. portion of the labour force in a particular sector.", "level": 4, "type": "outcome"}}
100
+ {"text": "index of empowerment", "id": 99, "metadata": {"definition": "outcome. composite/index measures of social or economic empowerment", "level": 4, "type": "outcome"}}
101
+ {"text": "participation and inclusion", "id": 100, "metadata": {"definition": "outcome. measures of participation and inclusion in social, business, and political activities.", "level": 4, "type": "outcome"}}
102
+ {"text": "resilience", "id": 101, "metadata": {"definition": "outcome. resilience is the ability to manage risks and recover from shocks, economically and psychologically, and can be measured either through observing a set of behaviours which demonstrate resilience or it can be a self-reported perception of resilience. the set of behaviours which demonstrates resilience are for example the diversification of business activity to manage risk, the strategies adopted to cope with unpredictable events such as a drought or an increase in input prices, or even mental health state after a loss.", "level": 4, "type": "outcome"}}
103
+ {"text": "self-employment earnings due to higher education", "id": 102, "metadata": {"definition": "outcome. self-employment earnings due to higher education.", "level": 4, "type": "outcome"}}
104
+ {"text": "business formalisation", "id": 103, "metadata": {"definition": "outcome. rate or number of businesses formally registered and tracked by government.", "level": 4, "type": "outcome"}}
105
+ {"text": "labour force participation", "id": 104, "metadata": {"definition": "outcome. portion of the population participating in the labour force.", "level": 4, "type": "outcome"}}
106
+ {"text": "index of forestry production", "id": 105, "metadata": {"definition": "outcome. composite/index measures of forest production.", "level": 4, "type": "outcome"}}
107
+ {"text": "regular cleaning", "id": 106, "metadata": {"definition": "outcome. measures of regular cleaning and maintenance behaviour.", "level": 4, "type": "outcome"}}
108
+ {"text": "overall paid employment due to higher education", "id": 107, "metadata": {"definition": "outcome. overall paid employment after higher education", "level": 4, "type": "outcome"}}
109
+ {"text": "index of household wealth or assets", "id": 108, "metadata": {"definition": "outcome. composite/index measures of household saving and asset accumulation.", "level": 4, "type": "outcome"}}
110
+ {"text": "index of confidence in public institutions", "id": 109, "metadata": {"definition": "outcome. composite/index measures of public confidence in public institutions", "level": 4, "type": "outcome"}}
111
+ {"text": "perception of corruption", "id": 110, "metadata": {"definition": "outcome. public perception of corruption levels of public servants and elected representatives.", "level": 4, "type": "outcome"}}
112
+ {"text": "sociocultural knowledge and awareness", "id": 111, "metadata": {"definition": "outcome. this includes measures of knowledge about current or historical socio-political contexts or cultural awareness, particularly of other cultures. this also includes measures of knowledge of concepts such as violence, peace and mediation.", "level": 4, "type": "outcome"}}
113
+ {"text": "employment hours", "id": 112, "metadata": {"definition": "outcome. number of hours worked in specific time frame, usually either in a week or a month.", "level": 4, "type": "outcome"}}
models/retriever/outcome/gpt+llama/db/document_index/embeddings.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:11009addd2ff8d82ab02e4af5128487e6caba65428769c75e3bd21b51df8eefb
3
+ size 174763
models/retriever/outcome/gpt+llama/db/question_encoder/config.json ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "/home/carlos/amr-parsing-master/sentence-similarity/retriever/wandb/wandb/retriever-small-outcome-llama-gpt/files/retriever/question_encoder",
3
+ "architectures": [
4
+ "GoldenRetrieverModel"
5
+ ],
6
+ "attention_probs_dropout_prob": 0.1,
7
+ "auto_map": {
8
+ "AutoModel": "hf.GoldenRetrieverModel"
9
+ },
10
+ "classifier_dropout": null,
11
+ "hidden_act": "gelu",
12
+ "hidden_dropout_prob": 0.1,
13
+ "hidden_size": 384,
14
+ "initializer_range": 0.02,
15
+ "intermediate_size": 1536,
16
+ "layer_norm_eps": 1e-12,
17
+ "max_position_embeddings": 512,
18
+ "model_type": "bert",
19
+ "num_attention_heads": 12,
20
+ "num_hidden_layers": 12,
21
+ "pad_token_id": 0,
22
+ "position_embedding_type": "absolute",
23
+ "projection_dim": null,
24
+ "torch_dtype": "float32",
25
+ "transformers_version": "4.37.2",
26
+ "type_vocab_size": 2,
27
+ "use_cache": true,
28
+ "vocab_size": 30522
29
+ }
models/retriever/outcome/gpt+llama/db/question_encoder/hf.py ADDED
@@ -0,0 +1,99 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from typing import Tuple, Union
2
+
3
+ import torch
4
+ from transformers import PretrainedConfig
5
+ from transformers.modeling_outputs import BaseModelOutputWithPoolingAndCrossAttentions
6
+ from transformers.models.bert.modeling_bert import BertModel
7
+
8
+
9
+ class GoldenRetrieverConfig(PretrainedConfig):
10
+ model_type = "bert"
11
+
12
+ def __init__(
13
+ self,
14
+ vocab_size=30522,
15
+ hidden_size=768,
16
+ num_hidden_layers=12,
17
+ num_attention_heads=12,
18
+ intermediate_size=3072,
19
+ hidden_act="gelu",
20
+ hidden_dropout_prob=0.1,
21
+ attention_probs_dropout_prob=0.1,
22
+ max_position_embeddings=512,
23
+ type_vocab_size=2,
24
+ initializer_range=0.02,
25
+ layer_norm_eps=1e-12,
26
+ pad_token_id=0,
27
+ position_embedding_type="absolute",
28
+ use_cache=True,
29
+ classifier_dropout=None,
30
+ projection_dim=None,
31
+ **kwargs,
32
+ ):
33
+ super().__init__(pad_token_id=pad_token_id, **kwargs)
34
+
35
+ self.vocab_size = vocab_size
36
+ self.hidden_size = hidden_size
37
+ self.num_hidden_layers = num_hidden_layers
38
+ self.num_attention_heads = num_attention_heads
39
+ self.hidden_act = hidden_act
40
+ self.intermediate_size = intermediate_size
41
+ self.hidden_dropout_prob = hidden_dropout_prob
42
+ self.attention_probs_dropout_prob = attention_probs_dropout_prob
43
+ self.max_position_embeddings = max_position_embeddings
44
+ self.type_vocab_size = type_vocab_size
45
+ self.initializer_range = initializer_range
46
+ self.layer_norm_eps = layer_norm_eps
47
+ self.position_embedding_type = position_embedding_type
48
+ self.use_cache = use_cache
49
+ self.classifier_dropout = classifier_dropout
50
+ self.projection_dim = projection_dim
51
+
52
+
53
+ class GoldenRetrieverModel(BertModel):
54
+ config_class = GoldenRetrieverConfig
55
+
56
+ def __init__(self, config, *args, **kwargs):
57
+ super().__init__(config)
58
+ self.layer_norm_layer = torch.nn.LayerNorm(
59
+ config.hidden_size, eps=config.layer_norm_eps
60
+ )
61
+ self.projection: torch.nn.Module | None = None
62
+ if config.projection_dim is not None:
63
+ self.projection = torch.nn.Sequential(
64
+ torch.nn.Linear(config.hidden_size, config.projection_dim),
65
+ torch.nn.LayerNorm(config.projection_dim),
66
+ )
67
+
68
+ def forward(
69
+ self, **kwargs
70
+ ) -> Union[Tuple[torch.Tensor], BaseModelOutputWithPoolingAndCrossAttentions]:
71
+ attention_mask = kwargs.get("attention_mask", None)
72
+ model_outputs = super().forward(**kwargs)
73
+ if attention_mask is None:
74
+ pooler_output = model_outputs.pooler_output
75
+ else:
76
+ token_embeddings = model_outputs.last_hidden_state
77
+ input_mask_expanded = (
78
+ attention_mask.unsqueeze(-1).expand(token_embeddings.size()).float()
79
+ )
80
+ pooler_output = torch.sum(
81
+ token_embeddings * input_mask_expanded, 1
82
+ ) / torch.clamp(input_mask_expanded.sum(1), min=1e-9)
83
+
84
+ pooler_output = self.layer_norm_layer(pooler_output)
85
+
86
+ if self.projection is not None:
87
+ pooler_output = self.projection(pooler_output)
88
+
89
+ if not kwargs.get("return_dict", True):
90
+ return (model_outputs[0], pooler_output) + model_outputs[2:]
91
+
92
+ return BaseModelOutputWithPoolingAndCrossAttentions(
93
+ last_hidden_state=model_outputs.last_hidden_state,
94
+ pooler_output=pooler_output,
95
+ past_key_values=model_outputs.past_key_values,
96
+ hidden_states=model_outputs.hidden_states,
97
+ attentions=model_outputs.attentions,
98
+ cross_attentions=model_outputs.cross_attentions,
99
+ )
models/retriever/outcome/gpt+llama/db/question_encoder/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6ac19984a673814bc31505bb2fffe861ce40b8203be1a8bf7b53a9fdbaba5676
3
+ size 133465384
models/retriever/outcome/gpt+llama/db/question_encoder/special_tokens_map.json ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cls_token": {
3
+ "content": "[CLS]",
4
+ "lstrip": false,
5
+ "normalized": false,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "mask_token": {
10
+ "content": "[MASK]",
11
+ "lstrip": false,
12
+ "normalized": false,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "pad_token": {
17
+ "content": "[PAD]",
18
+ "lstrip": false,
19
+ "normalized": false,
20
+ "rstrip": false,
21
+ "single_word": false
22
+ },
23
+ "sep_token": {
24
+ "content": "[SEP]",
25
+ "lstrip": false,
26
+ "normalized": false,
27
+ "rstrip": false,
28
+ "single_word": false
29
+ },
30
+ "unk_token": {
31
+ "content": "[UNK]",
32
+ "lstrip": false,
33
+ "normalized": false,
34
+ "rstrip": false,
35
+ "single_word": false
36
+ }
37
+ }
models/retriever/outcome/gpt+llama/db/question_encoder/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
models/retriever/outcome/gpt+llama/db/question_encoder/tokenizer_config.json ADDED
@@ -0,0 +1,59 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "0": {
4
+ "content": "[PAD]",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ },
11
+ "100": {
12
+ "content": "[UNK]",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "101": {
20
+ "content": "[CLS]",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ },
27
+ "102": {
28
+ "content": "[SEP]",
29
+ "lstrip": false,
30
+ "normalized": false,
31
+ "rstrip": false,
32
+ "single_word": false,
33
+ "special": true
34
+ },
35
+ "103": {
36
+ "content": "[MASK]",
37
+ "lstrip": false,
38
+ "normalized": false,
39
+ "rstrip": false,
40
+ "single_word": false,
41
+ "special": true
42
+ }
43
+ },
44
+ "clean_up_tokenization_spaces": true,
45
+ "cls_token": "[CLS]",
46
+ "do_lower_case": true,
47
+ "mask_token": "[MASK]",
48
+ "max_length": 64,
49
+ "model_max_length": 512,
50
+ "pad_token": "[PAD]",
51
+ "sep_token": "[SEP]",
52
+ "stride": 0,
53
+ "strip_accents": null,
54
+ "tokenize_chinese_chars": true,
55
+ "tokenizer_class": "BertTokenizer",
56
+ "truncation_side": "right",
57
+ "truncation_strategy": "longest_first",
58
+ "unk_token": "[UNK]"
59
+ }
models/retriever/outcome/gpt+llama/db/question_encoder/vocab.txt ADDED
The diff for this file is too large to render. See raw diff