Source code for DeBERTa.deberta.pooling

#
# Author: penhe@microsoft.com
# Date: 01/25/2019
#
"""
Pooling functions
"""

from torch import nn
import copy
import json
from .bert import ACT2FN
from .ops import StableDropout
from .config import AbsModelConfig

__all__ = ['PoolConfig', 'ContextPooler']

[docs]class PoolConfig(AbsModelConfig):
    """Configuration class to store the configuration of `pool layer`.

        Parameters:
        
            config (:class:`~DeBERTa.deberta.ModelConfig`): The model config. The field of pool config will be initalized with the `pooling` field in model config.

        Attributes:

            hidden_size (int): Size of the encoder layers and the pooler layer, default: `768`.

            dropout (float): The dropout rate applied on the output of `[CLS]` token,

            hidden_act (:obj:`str`): The activation function of the projection layer, it can be one of ['gelu', 'tanh'].

        Example::

            # Here is the content of an exmple model config file in json format

                {
                  "hidden_size": 768,
                  "num_hidden_layers" 12,
                  "num_attention_heads": 12,
                  "intermediate_size": 3072,
                  ...
                  "pooling": {
                    "hidden_size":  768,
                    "hidden_act": "gelu",
                    "dropout": 0.1
                  }
                }

    """
    def __init__(self, config=None):
        """Constructs PoolConfig.

        Args:
           `config`: the config of the model. The field of pool config will be initalized with the 'pooling' field in model config.
        """
        
        self.hidden_size = 768
        self.dropout = 0
        self.hidden_act = 'gelu'
        if config:
            pool_config = getattr(config, 'pooling', config)
            if isinstance(pool_config, dict):
                pool_config = AbsModelConfig.from_dict(pool_config)
            self.hidden_size = getattr(pool_config, 'hidden_size', config.hidden_size)
            self.dropout = getattr(pool_config, 'dropout', 0)
            self.hidden_act = getattr(pool_config, 'hidden_act', 'gelu')

[docs]class ContextPooler(nn.Module):
    def __init__(self, config):
        super().__init__()
        self.dense = nn.Linear(config.hidden_size, config.hidden_size)
        self.dropout = StableDropout(config.dropout)
        self.config = config

    def forward(self, hidden_states, mask = None):
        # We "pool" the model by simply taking the hidden state corresponding
        # to the first token.
        
        context_token = hidden_states[:, 0]
        context_token = self.dropout(context_token)
        pooled_output = self.dense(context_token)
        pooled_output = ACT2FN[self.config.hidden_act](pooled_output)
        return pooled_output

    def output_dim(self):
        return self.config.hidden_size