File size: 888 Bytes
d5175d3 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 |
# Copyright (c) Facebook, Inc. and its affiliates.
#
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.
#
import torch
class ScalarBias(torch.autograd.Function):
"""
Adds a vector of scalars, used in self-attention mechanism to allow
the model to optionally attend to this vector instead of the past
"""
@staticmethod
def forward(ctx, input, dim, bias_init):
size = list(input.size())
size[dim] += 1
output = input.new(*size).fill_(bias_init)
output.narrow(dim, 1, size[dim] - 1).copy_(input)
ctx.dim = dim
return output
@staticmethod
def backward(ctx, grad):
return grad.narrow(ctx.dim, 1, grad.size(ctx.dim) - 1), None, None
def scalar_bias(input, dim, bias_init=0):
return ScalarBias.apply(input, dim, bias_init)
|