from fastai.text.models.awdlstm import *
Core text modules
Language models
LinearDecoder
LinearDecoder (n_out:int, n_hid:int, output_p:float=0.1, tie_encoder:nn.Module=None, bias:bool=True)
To go on top of a RNNCore module and create a Language Model.
Type | Default | Details | |
---|---|---|---|
n_out | int | Number of output channels | |
n_hid | int | Number of features in encoder last layer output | |
output_p | float | 0.1 | Input dropout probability |
tie_encoder | Module | None | If module is supplied will tie decoder weight to tie_encoder.weight |
bias | bool | True | If False the layer will not learn additive bias |
= AWD_LSTM(100, 20, 10, 2)
enc = torch.randint(0, 100, (10,5))
x = enc(x)
r
= LinearDecoder(100, 20, 0.1)
tst = tst(r)
y 1], r)
test_eq(y[2].shape, r.shape)
test_eq(y[0].shape, [10, 5, 100])
test_eq(y[
= LinearDecoder(100, 20, 0.1, tie_encoder=enc.encoder)
tst test_eq(tst.decoder.weight, enc.encoder.weight)
SequentialRNN
SequentialRNN (*args)
A sequential module that passes the reset call to its children.
class _TstMod(Module):
def reset(self): print('reset')
= SequentialRNN(_TstMod(), _TstMod())
tst 'reset\nreset') test_stdout(tst.reset,
get_language_model
get_language_model (arch, vocab_sz:int, config:dict=None, drop_mult:float=1.0)
Create a language model from arch
and its config
.
Type | Default | Details | |
---|---|---|---|
arch | Function or class that can generate a language model architecture | ||
vocab_sz | int | Size of the vocabulary | |
config | dict | None | Model configuration dictionary |
drop_mult | float | 1.0 | Multiplicative factor to scale all dropout probabilities in config |
Returns | SequentialRNN | Language model with arch encoder and linear decoder |
The default config
used can be found in _model_meta[arch]['config_lm']
. drop_mult
is applied to all the probabilities of dropout in that config.
= awd_lstm_lm_config.copy()
config 'n_hid':10, 'emb_sz':20})
config.update({
= get_language_model(AWD_LSTM, 100, config=config)
tst = torch.randint(0, 100, (10,5))
x = tst(x)
y 0].shape, [10, 5, 100])
test_eq(y[1].shape, [10, 5, 20])
test_eq(y[2].shape, [10, 5, 20])
test_eq(y[1].decoder.weight, tst[0].encoder.weight) test_eq(tst[
#test drop_mult
= get_language_model(AWD_LSTM, 100, config=config, drop_mult=0.5)
tst 1].output_dp.p, config['output_p']*0.5)
test_eq(tst[for rnn in tst[0].rnns: test_eq(rnn.weight_p, config['weight_p']*0.5)
for dp in tst[0].hidden_dps: test_eq(dp.p, config['hidden_p']*0.5)
0].encoder_dp.embed_p, config['embed_p']*0.5)
test_eq(tst[0].input_dp.p, config['input_p']*0.5) test_eq(tst[
Classification models
SentenceEncoder
SentenceEncoder (bptt:int, module:nn.Module, pad_idx:int=1, max_len:int=None)
Create an encoder over module
that can process a full sentence.
Type | Default | Details | |
---|---|---|---|
bptt | int | Backpropagation through time | |
module | Module | A module that can process up to [bs , bptt ] tokens |
|
pad_idx | int | 1 | Padding token id |
max_len | int | None | Maximal output length |
This module expects the inputs padded with most of the padding first, with the sequence beginning at a round multiple of bptt
(and the rest of the padding at the end). Use pad_input_chunk
to get your data in a suitable format.
= nn.Embedding(5, 10)
mod = SentenceEncoder(5, mod, pad_idx=0)
tst = torch.randint(1, 5, (3, 15))
x 2,:5]=0
x[= tst(x)
out,mask
1], mod(x)[:1])
test_eq(out[:2,5:], mod(x)[2,5:])
test_eq(out[==0) test_eq(mask, x
masked_concat_pool
masked_concat_pool (output:torch.Tensor, mask:torch.Tensor, bptt:int)
Pool MultiBatchEncoder
outputs into one vector [last_hidden, max_pool, avg_pool]
Type | Details | |
---|---|---|
output | Tensor | Output of sentence encoder |
mask | Tensor | Boolean mask as returned by sentence encoder |
bptt | int | Backpropagation through time |
Returns | Tensor | Concatenation of [last_hidden, max_pool, avg_pool] |
= torch.randn(2,4,5)
out = tensor([[True,True,False,False], [False,False,False,True]])
mask = masked_concat_pool(out, mask, 2)
x
0,:5], out[0,-1])
test_close(x[1,:5], out[1,-2])
test_close(x[0,5:10], out[0,2:].max(dim=0)[0])
test_close(x[1,5:10], out[1,:3].max(dim=0)[0])
test_close(x[0,10:], out[0,2:].mean(dim=0))
test_close(x[1,10:], out[1,:3].mean(dim=0)) test_close(x[
#Test the result is independent of padding by replacing the padded part by some random content
= torch.randn(2,4,5)
out1 0,2:] = out[0,2:].clone()
out1[1,:3] = out[1,:3].clone()
out1[= masked_concat_pool(out1, mask, 2)
x1 test_eq(x, x1)
PoolingLinearClassifier
PoolingLinearClassifier (dims:list, ps:list, bptt:int, y_range:tuple=None)
Create a linear classifier with pooling
Type | Default | Details | |
---|---|---|---|
dims | list | List of hidden sizes for MLP as int s |
|
ps | list | List of dropout probabilities as float s |
|
bptt | int | Backpropagation through time | |
y_range | tuple | None | Tuple of (low, high) output value bounds |
= nn.Embedding(5, 10)
mod = SentenceEncoder(5, mod, pad_idx=0)
tst = torch.randint(1, 5, (3, 15))
x 2,:5]=0
x[= tst(x)
out,mask
1], mod(x)[:1])
test_eq(out[:2,5:], mod(x)[2,5:])
test_eq(out[==0) test_eq(mask, x
get_text_classifier
get_text_classifier (arch:<built-infunctioncallable>, vocab_sz:int, n_class:int, seq_len:int=72, config:dict=None, drop_mult:float=1.0, lin_ftrs:list=None, ps:list=None, pad_idx:int=1, max_len:int=1440, y_range:tuple=None)
Create a text classifier from arch
and its config
, maybe pretrained
Type | Default | Details | |
---|---|---|---|
arch | callable | Function or class that can generate a language model architecture | |
vocab_sz | int | Size of the vocabulary | |
n_class | int | Number of classes | |
seq_len | int | 72 | Backpropagation through time |
config | dict | None | Encoder configuration dictionary |
drop_mult | float | 1.0 | Multiplicative factor to scale all dropout probabilities in config |
lin_ftrs | list | None | List of hidden sizes for classifier head as int s |
ps | list | None | List of dropout probabilities for classifier head as float s |
pad_idx | int | 1 | Padding token id |
max_len | int | 1440 | Maximal output length for SentenceEncoder |
y_range | tuple | None | Tuple of (low, high) output value bounds |
= awd_lstm_clas_config.copy()
config 'n_hid':10, 'emb_sz':20})
config.update({
= get_text_classifier(AWD_LSTM, 100, 3, config=config)
tst = torch.randint(2, 100, (10,5))
x = tst(x)
y 0].shape, [10, 3])
test_eq(y[1].shape, [10, 5, 20])
test_eq(y[2].shape, [10, 5, 20]) test_eq(y[
#test padding gives same results
eval()
tst.= tst(x)
y = torch.cat([x, tensor([2,1,1,1,1,1,1,1,1,1])[:,None]], dim=1)
x1 = tst(x1)
y1 0][1:],y1[0][1:]) test_close(y[
#test drop_mult
= get_text_classifier(AWD_LSTM, 100, 3, config=config, drop_mult=0.5)
tst 1].layers[1][1].p, 0.1)
test_eq(tst[1].layers[0][1].p, config['output_p']*0.5)
test_eq(tst[for rnn in tst[0].module.rnns: test_eq(rnn.weight_p, config['weight_p']*0.5)
for dp in tst[0].module.hidden_dps: test_eq(dp.p, config['hidden_p']*0.5)
0].module.encoder_dp.embed_p, config['embed_p']*0.5)
test_eq(tst[0].module.input_dp.p, config['input_p']*0.5) test_eq(tst[