Source code for DeBERTa.deberta.nnmodule

import pdb
import os
import torch
import copy
from torch import nn
from .config import ModelConfig
from ..utils import xtqdm as tqdm
from .cache_utils import load_model_state

from ..utils import get_logger
logger = get_logger()

__all__ = ['NNModule']

[docs]class NNModule(nn.Module): """ An abstract class to handle weights initialization and \ a simple interface for dowloading and loading pretrained models. Args: config (:obj:`~DeBERTa.deberta.ModelConfig`): The model config to the module """ def __init__(self, config, *inputs, **kwargs): super().__init__() self.config = config
[docs] def init_weights(self, module): """ Apply Gaussian(mean=0, std=`config.initializer_range`) initialization to the module. Args: module (:obj:`torch.nn.Module`): The module to apply the initialization. Example:: class MyModule(NNModule): def __init__(self, config): # Add construction instructions self.bert = DeBERTa(config) # Add other modules ... # Apply initialization self.apply(self.init_weights) """ if isinstance(module, (nn.Linear, nn.Embedding)): module.weight.data.normal_(mean=0.0, std=self.config.initializer_range) if isinstance(module, nn.Linear) and module.bias is not None: module.bias.data.zero_()
[docs] @classmethod def load_model(cls, model_path, model_config=None, tag=None, no_cache=False, cache_dir=None , *inputs, **kwargs): """ Instantiate a sub-class of NNModule from a pre-trained model file. Args: model_path (:obj:`str`): Path or name of the pre-trained model which can be either, - The path of pre-trained model - The pre-trained DeBERTa model name in `DeBERTa GitHub releases <https://github.com/microsoft/DeBERTa/releases>`_, i.e. [**base, base_mnli, large, large_mnli**]. If `model_path` is `None` or `-`, then the method will create a new sub-class without initialing from pre-trained models. model_config (:obj:`str`): The path of model config file. If it's `None`, then the method will try to find the the config in order: 1. ['config'] in the model state dictionary. 2. `model_config.json` aside the `model_path`. If it failed to find a config the method will fail. tag (:obj:`str`, optional): The release tag of DeBERTa, default: `None`. no_cache (:obj:`bool`, optional): Disable local cache of downloaded models, default: `False`. cache_dir (:obj:`str`, optional): The cache directory used to save the downloaded models, default: `None`. If it's `None`, then the models will be saved at `$HOME/.~DeBERTa` Return: :obj:`NNModule` : The sub-class object. """ # Load config if model_config: config = ModelConfig.from_json_file(model_config) else: config = None model_config = None model_state = None if model_path and model_path.strip() == '-' or model_path.strip()=='': model_path = None try: model_state, model_config = load_model_state(model_path, tag=tag, no_cache=no_cache, cache_dir=cache_dir) except Exception as exp: raise Exception(f'Failed to get model {model_path}. Exception: {exp}') if config is not None and model_config is not None: for k in config.__dict__: if k not in ['hidden_size', 'intermediate_size', 'num_attention_heads', 'num_hidden_layers', 'vocab_size', 'max_position_embeddings']: model_config.__dict__[k] = config.__dict__[k] if model_config is not None: config = copy.copy(model_config) vocab_size = config.vocab_size # Instantiate model. model = cls(config, *inputs, **kwargs) if not model_state: return model # copy state_dict so _load_from_state_dict can modify it state_dict = model_state.copy() missing_keys = [] unexpected_keys = [] error_msgs = [] metadata = getattr(state_dict, '_metadata', None) def load(module, prefix=''): local_metadata = {} if metadata is None else metadata.get(prefix[:-1], {}) module._load_from_state_dict( state_dict, prefix, local_metadata, True, missing_keys, unexpected_keys, error_msgs) for name, child in module._modules.items(): if child is not None: load(child, prefix + name + '.') load(model) return model