CarlosMalaga commited on
Commit
b5db46c
1 Parent(s): b2b62d0

Delete models/retriever/level-4-small-no-negative-outcomes

Browse files
models/retriever/level-4-small-no-negative-outcomes/document_index/config.yaml DELETED
@@ -1,5 +0,0 @@
1
- _target_: relik.retriever.indexers.inmemory.InMemoryDocumentIndex
2
- metadata_fields:
3
- - definition
4
- separator: ' <def> '
5
- name_or_path: null
 
 
 
 
 
 
models/retriever/level-4-small-no-negative-outcomes/document_index/documents.jsonl DELETED
The diff for this file is too large to render. See raw diff
 
models/retriever/level-4-small-no-negative-outcomes/document_index/embeddings.pt DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:f48d0753bec8ae343e33355d98538895384ee9115d421e625f22420164a7c5bf
3
- size 1352875
 
 
 
 
models/retriever/level-4-small-no-negative-outcomes/document_index_db/config.yaml DELETED
@@ -1,5 +0,0 @@
1
- _target_: relik.retriever.indexers.inmemory.InMemoryDocumentIndex
2
- metadata_fields:
3
- - definition
4
- separator: ' <def> '
5
- name_or_path: null
 
 
 
 
 
 
models/retriever/level-4-small-no-negative-outcomes/document_index_db/documents.jsonl DELETED
@@ -1,2 +0,0 @@
1
- {"text": "financial literacy", "id": 0, "metadata": {"definition": "outcome. measures of skills or knowledge related to finance and reporting.", "level": 4, "type": "outcome"}}
2
- {"text": "secondary water treatments", "id": 1, "metadata": {"definition": "outcome. use of secondary/back-up forms of water treatment and supply.", "level": 4, "type": "outcome"}}
 
 
 
models/retriever/level-4-small-no-negative-outcomes/document_index_db/embeddings.pt DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:f38d784feae82d94bd56010427349604de367cf1d7547a0d6720070878e9bec3
3
- size 4267
 
 
 
 
models/retriever/level-4-small-no-negative-outcomes/question_encoder/config.json DELETED
@@ -1,29 +0,0 @@
1
- {
2
- "_name_or_path": "intfloat/e5-small-v2",
3
- "architectures": [
4
- "GoldenRetrieverModel"
5
- ],
6
- "attention_probs_dropout_prob": 0.1,
7
- "auto_map": {
8
- "AutoModel": "hf.GoldenRetrieverModel"
9
- },
10
- "classifier_dropout": null,
11
- "hidden_act": "gelu",
12
- "hidden_dropout_prob": 0.1,
13
- "hidden_size": 384,
14
- "initializer_range": 0.02,
15
- "intermediate_size": 1536,
16
- "layer_norm_eps": 1e-12,
17
- "max_position_embeddings": 512,
18
- "model_type": "bert",
19
- "num_attention_heads": 12,
20
- "num_hidden_layers": 12,
21
- "pad_token_id": 0,
22
- "position_embedding_type": "absolute",
23
- "projection_dim": null,
24
- "torch_dtype": "float32",
25
- "transformers_version": "4.33.3",
26
- "type_vocab_size": 2,
27
- "use_cache": true,
28
- "vocab_size": 30522
29
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
models/retriever/level-4-small-no-negative-outcomes/question_encoder/hf.py DELETED
@@ -1,99 +0,0 @@
1
- from typing import Tuple, Union
2
-
3
- import torch
4
- from transformers import PretrainedConfig
5
- from transformers.modeling_outputs import BaseModelOutputWithPoolingAndCrossAttentions
6
- from transformers.models.bert.modeling_bert import BertModel
7
-
8
-
9
- class GoldenRetrieverConfig(PretrainedConfig):
10
- model_type = "bert"
11
-
12
- def __init__(
13
- self,
14
- vocab_size=30522,
15
- hidden_size=768,
16
- num_hidden_layers=12,
17
- num_attention_heads=12,
18
- intermediate_size=3072,
19
- hidden_act="gelu",
20
- hidden_dropout_prob=0.1,
21
- attention_probs_dropout_prob=0.1,
22
- max_position_embeddings=512,
23
- type_vocab_size=2,
24
- initializer_range=0.02,
25
- layer_norm_eps=1e-12,
26
- pad_token_id=0,
27
- position_embedding_type="absolute",
28
- use_cache=True,
29
- classifier_dropout=None,
30
- projection_dim=None,
31
- **kwargs,
32
- ):
33
- super().__init__(pad_token_id=pad_token_id, **kwargs)
34
-
35
- self.vocab_size = vocab_size
36
- self.hidden_size = hidden_size
37
- self.num_hidden_layers = num_hidden_layers
38
- self.num_attention_heads = num_attention_heads
39
- self.hidden_act = hidden_act
40
- self.intermediate_size = intermediate_size
41
- self.hidden_dropout_prob = hidden_dropout_prob
42
- self.attention_probs_dropout_prob = attention_probs_dropout_prob
43
- self.max_position_embeddings = max_position_embeddings
44
- self.type_vocab_size = type_vocab_size
45
- self.initializer_range = initializer_range
46
- self.layer_norm_eps = layer_norm_eps
47
- self.position_embedding_type = position_embedding_type
48
- self.use_cache = use_cache
49
- self.classifier_dropout = classifier_dropout
50
- self.projection_dim = projection_dim
51
-
52
-
53
- class GoldenRetrieverModel(BertModel):
54
- config_class = GoldenRetrieverConfig
55
-
56
- def __init__(self, config, *args, **kwargs):
57
- super().__init__(config)
58
- self.layer_norm_layer = torch.nn.LayerNorm(
59
- config.hidden_size, eps=config.layer_norm_eps
60
- )
61
- self.projection: torch.nn.Module | None = None
62
- if config.projection_dim is not None:
63
- self.projection = torch.nn.Sequential(
64
- torch.nn.Linear(config.hidden_size, config.projection_dim),
65
- torch.nn.LayerNorm(config.projection_dim),
66
- )
67
-
68
- def forward(
69
- self, **kwargs
70
- ) -> Union[Tuple[torch.Tensor], BaseModelOutputWithPoolingAndCrossAttentions]:
71
- attention_mask = kwargs.get("attention_mask", None)
72
- model_outputs = super().forward(**kwargs)
73
- if attention_mask is None:
74
- pooler_output = model_outputs.pooler_output
75
- else:
76
- token_embeddings = model_outputs.last_hidden_state
77
- input_mask_expanded = (
78
- attention_mask.unsqueeze(-1).expand(token_embeddings.size()).float()
79
- )
80
- pooler_output = torch.sum(
81
- token_embeddings * input_mask_expanded, 1
82
- ) / torch.clamp(input_mask_expanded.sum(1), min=1e-9)
83
-
84
- pooler_output = self.layer_norm_layer(pooler_output)
85
-
86
- if self.projection is not None:
87
- pooler_output = self.projection(pooler_output)
88
-
89
- if not kwargs.get("return_dict", True):
90
- return (model_outputs[0], pooler_output) + model_outputs[2:]
91
-
92
- return BaseModelOutputWithPoolingAndCrossAttentions(
93
- last_hidden_state=model_outputs.last_hidden_state,
94
- pooler_output=pooler_output,
95
- past_key_values=model_outputs.past_key_values,
96
- hidden_states=model_outputs.hidden_states,
97
- attentions=model_outputs.attentions,
98
- cross_attentions=model_outputs.cross_attentions,
99
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
models/retriever/level-4-small-no-negative-outcomes/question_encoder/pytorch_model.bin DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:a2ff7f63276ef10722db26f66a406bc4aa6f5a345dd177d243cb826bb7db48b7
3
- size 133510894
 
 
 
 
models/retriever/level-4-small-no-negative-outcomes/question_encoder/special_tokens_map.json DELETED
@@ -1,7 +0,0 @@
1
- {
2
- "cls_token": "[CLS]",
3
- "mask_token": "[MASK]",
4
- "pad_token": "[PAD]",
5
- "sep_token": "[SEP]",
6
- "unk_token": "[UNK]"
7
- }
 
 
 
 
 
 
 
 
models/retriever/level-4-small-no-negative-outcomes/question_encoder/tokenizer.json DELETED
The diff for this file is too large to render. See raw diff
 
models/retriever/level-4-small-no-negative-outcomes/question_encoder/tokenizer_config.json DELETED
@@ -1,13 +0,0 @@
1
- {
2
- "clean_up_tokenization_spaces": true,
3
- "cls_token": "[CLS]",
4
- "do_lower_case": true,
5
- "mask_token": "[MASK]",
6
- "model_max_length": 512,
7
- "pad_token": "[PAD]",
8
- "sep_token": "[SEP]",
9
- "strip_accents": null,
10
- "tokenize_chinese_chars": true,
11
- "tokenizer_class": "BertTokenizer",
12
- "unk_token": "[UNK]"
13
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
models/retriever/level-4-small-no-negative-outcomes/question_encoder/vocab.txt DELETED
The diff for this file is too large to render. See raw diff