--- language: - ar - fr - es - de - el - bg - ru - tr - vi - th - zh - hi - sw - ur datasets: - xnli widget: - text: >- The Red Hot Chili Peppers were formed in Los Angeles by Kiedis, Flea, Hillel Slovak and Jack Irons. [SEP] Jack Irons place of birth Los Angeles --- # Model Card for mdeberta-v3-base-triplet-critic-xnli This is the Triplit Critic model presented in the ACL 2023 paper RED^{FM}: a Filtered and Multilingual Relation Extraction Dataset. It is based on mdeberta-v3-base and it was trained as a multitask system to filter triplets as well as on the XNLI dataset. The model weights contain the two classification heads, however loading it using the huggingface library will only load those for Triplet filtering (ie. a binary classification head), if one wants to use it for XNLI it needs a custom script. While it is defined and trained as a classification system, we use the positive score (ie. Label_1) as the confidence score for a triplet. For SREDFM the confidence score thresshold was set at 0.75. To load the multitask model: ```python from transformers import DebertaV2PreTrainedModel, DebertaV2Model from torch import nn from transformers.models.deberta_v2.modeling_deberta_v2 import * from transformers.file_utils import ModelOutput @dataclass class TXNLIClassifierOutput(ModelOutput): """ Base class for outputs of sentence classification models. Args: loss (:obj:`torch.FloatTensor` of shape :obj:`(1,)`, `optional`, returned when :obj:`labels` is provided): Classification (or regression if config.num_labels==1) loss. logits (:obj:`torch.FloatTensor` of shape :obj:`(batch_size, config.num_labels)`): Classification (or regression if config.num_labels==1) scores (before SoftMax). logits_xnli (:obj:`torch.FloatTensor` of shape :obj:`(batch_size, config.num_labels)`): Classification (or regression if config.num_labels==1) scores (before SoftMax). hidden_states (:obj:`tuple(torch.FloatTensor)`, `optional`, returned when ``output_hidden_states=True`` is passed or when ``config.output_hidden_states=True``): Tuple of :obj:`torch.FloatTensor` (one for the output of the embeddings + one for the output of each layer) of shape :obj:`(batch_size, sequence_length, hidden_size)`. Hidden-states of the model at the output of each layer plus the initial embedding outputs. attentions (:obj:`tuple(torch.FloatTensor)`, `optional`, returned when ``output_attentions=True`` is passed or when ``config.output_attentions=True``): Tuple of :obj:`torch.FloatTensor` (one for each layer) of shape :obj:`(batch_size, num_heads, sequence_length, sequence_length)`. Attentions weights after the attention softmax, used to compute the weighted average in the self-attention heads. """ loss: Optional[torch.FloatTensor] = None logits: torch.FloatTensor = None logits_xnli: torch.FloatTensor = None hidden_states: Optional[Tuple[torch.FloatTensor]] = None attentions: Optional[Tuple[torch.FloatTensor]] = None class DebertaV2ForTripletClassification(DebertaV2PreTrainedModel): def __init__(self, config): super().__init__(config) num_labels = getattr(config, "num_labels", 2) self.num_labels = num_labels self.deberta = DebertaV2Model(config) self.pooler = ContextPooler(config) output_dim = self.pooler.output_dim self.classifier = nn.Linear(output_dim, num_labels) drop_out = getattr(config, "cls_dropout", None) drop_out = self.config.hidden_dropout_prob if drop_out is None else drop_out self.dropout = StableDropout(drop_out) self.classifier_xnli = nn.Linear(output_dim, 3) # Initialize weights and apply final processing self.post_init() def get_input_embeddings(self): return self.deberta.get_input_embeddings() def set_input_embeddings(self, new_embeddings): self.deberta.set_input_embeddings(new_embeddings) @add_start_docstrings_to_model_forward(DEBERTA_INPUTS_DOCSTRING.format("batch_size, sequence_length")) def forward( self, input_ids=None, attention_mask=None, token_type_ids=None, position_ids=None, inputs_embeds=None, labels=None, output_attentions=None, output_hidden_states=None, return_dict=None, ): r""" labels (`torch.LongTensor` of shape `(batch_size,)`, *optional*): Labels for computing the sequence classification/regression loss. Indices should be in `[0, ..., config.num_labels - 1]`. If `config.num_labels == 1` a regression loss is computed (Mean-Square loss), If `config.num_labels > 1` a classification loss is computed (Cross-Entropy). """ return_dict = return_dict if return_dict is not None else self.config.use_return_dict outputs = self.deberta( input_ids, token_type_ids=token_type_ids, attention_mask=attention_mask, position_ids=position_ids, inputs_embeds=inputs_embeds, output_attentions=output_attentions, output_hidden_states=output_hidden_states, return_dict=return_dict, ) encoder_layer = outputs[0] pooled_output = self.pooler(encoder_layer) pooled_output = self.dropout(pooled_output) logits = self.classifier(pooled_output) logits_xnli = self.classifier_xnli(pooled_output) loss = None if labels is not None: if labels.dtype != torch.bool: loss_fct = CrossEntropyLoss() loss = loss_fct(logits.view(-1, self.num_labels), labels.view(-1)) else: loss_fct = BCEWithLogitsLoss() loss = loss_fct(logits_xnli.view(-1, 3), labels.view(-1).long()) if not return_dict: output = (logits,) + outputs[1:] return ((loss,) + output) if loss is not None else output return TXNLIClassifierOutput( loss=loss, logits=logits, logits_xnli=logits_xnli, hidden_states=outputs.hidden_states, attentions=outputs.attentions ) ```