# Core text modules


<!-- WARNING: THIS FILE WAS AUTOGENERATED! DO NOT EDIT! -->

## Language models

------------------------------------------------------------------------

<a
href="https://github.com/fastai/fastai/blob/main/fastai/text/models/core.py#L26"
target="_blank" style="float:right; font-size:smaller">source</a>

### LinearDecoder

``` python

def LinearDecoder(
    n_out:int, # Number of output channels
    n_hid:int, # Number of features in encoder last layer output
    output_p:float=0.1, # Input dropout probability
    tie_encoder:Module=None, # If module is supplied will tie decoder weight to `tie_encoder.weight`
    bias:bool=True, # If `False` the layer will not learn additive bias
):

```

*To go on top of a RNNCore module and create a Language Model.*

``` python
from fastai.text.models.awdlstm import *
```

``` python
enc = AWD_LSTM(100, 20, 10, 2)
x = torch.randint(0, 100, (10,5))
r = enc(x)

tst = LinearDecoder(100, 20, 0.1)
y = tst(r)
test_eq(y[1], r)
test_eq(y[2].shape, r.shape)
test_eq(y[0].shape, [10, 5, 100])

tst = LinearDecoder(100, 20, 0.1, tie_encoder=enc.encoder)
test_eq(tst.decoder.weight, enc.encoder.weight)
```

------------------------------------------------------------------------

<a
href="https://github.com/fastai/fastai/blob/main/fastai/text/models/core.py#L48"
target="_blank" style="float:right; font-size:smaller">source</a>

### SequentialRNN

``` python

def SequentialRNN(
    args:VAR_POSITIONAL
):

```

*A sequential module that passes the reset call to its children.*

``` python
class _TstMod(Module):
    def reset(self): print('reset')

tst = SequentialRNN(_TstMod(), _TstMod())
test_stdout(tst.reset, 'reset\nreset')
```

------------------------------------------------------------------------

<a
href="https://github.com/fastai/fastai/blob/main/fastai/text/models/core.py#L54"
target="_blank" style="float:right; font-size:smaller">source</a>

### get_language_model

``` python

def get_language_model(
    arch, # Function or class that can generate a language model architecture
    vocab_sz:int, # Size of the vocabulary
    config:dict=None, # Model configuration dictionary
    drop_mult:float=1.0, # Multiplicative factor to scale all dropout probabilities in `config`
)->SequentialRNN: # Language model with `arch` encoder and linear decoder

```

*Create a language model from `arch` and its `config`.*

The default `config` used can be found in
`_model_meta[arch]['config_lm']`. `drop_mult` is applied to all the
probabilities of dropout in that config.

``` python
config = awd_lstm_lm_config.copy()
config.update({'n_hid':10, 'emb_sz':20})

tst = get_language_model(AWD_LSTM, 100, config=config)
x = torch.randint(0, 100, (10,5))
y = tst(x)
test_eq(y[0].shape, [10, 5, 100])
test_eq(y[1].shape, [10, 5, 20])
test_eq(y[2].shape, [10, 5, 20])
test_eq(tst[1].decoder.weight, tst[0].encoder.weight)
```

``` python
#test drop_mult
tst = get_language_model(AWD_LSTM, 100, config=config, drop_mult=0.5)
test_eq(tst[1].output_dp.p, config['output_p']*0.5)
for rnn in tst[0].rnns: test_eq(rnn.weight_p, config['weight_p']*0.5)
for dp in tst[0].hidden_dps: test_eq(dp.p, config['hidden_p']*0.5)
test_eq(tst[0].encoder_dp.embed_p, config['embed_p']*0.5)
test_eq(tst[0].input_dp.p, config['input_p']*0.5)
```

## Classification models

------------------------------------------------------------------------

<a
href="https://github.com/fastai/fastai/blob/main/fastai/text/models/core.py#L79"
target="_blank" style="float:right; font-size:smaller">source</a>

### SentenceEncoder

``` python

def SentenceEncoder(
    bptt:int, # Backpropagation through time
    module:Module, # A module that can process up to [`bs`, `bptt`] tokens
    pad_idx:int=1, # Padding token id
    max_len:int=None, # Maximal output length
):

```

*Create an encoder over
[`module`](https://docs.fast.ai/layers.html#module) that can process a
full sentence.*

<div>

> **Warning**
>
> This module expects the inputs padded with most of the padding first,
> with the sequence beginning at a round multiple of `bptt` (and the
> rest of the padding at the end). Use
> [`pad_input_chunk`](https://docs.fast.ai/text.data.html#pad_input_chunk)
> to get your data in a suitable format.

</div>

``` python
mod = nn.Embedding(5, 10)
tst = SentenceEncoder(5, mod, pad_idx=0)
x = torch.randint(1, 5, (3, 15))
x[2,:5]=0
out,mask = tst(x)

test_eq(out[:1], mod(x)[:1])
test_eq(out[2,5:], mod(x)[2,5:])
test_eq(mask, x==0)
```

------------------------------------------------------------------------

<a
href="https://github.com/fastai/fastai/blob/main/fastai/text/models/core.py#L108"
target="_blank" style="float:right; font-size:smaller">source</a>

### masked_concat_pool

``` python

def masked_concat_pool(
    output:Tensor, # Output of sentence encoder
    mask:Tensor, # Boolean mask as returned by sentence encoder
    bptt:int, # Backpropagation through time
)->Tensor: # Concatenation of [last_hidden, max_pool, avg_pool]

```

*Pool `MultiBatchEncoder` outputs into one vector \[last_hidden,
max_pool, avg_pool\]*

``` python
out = torch.randn(2,4,5)
mask = tensor([[True,True,False,False], [False,False,False,True]])
x = masked_concat_pool(out, mask, 2)

test_close(x[0,:5], out[0,-1])
test_close(x[1,:5], out[1,-2])
test_close(x[0,5:10], out[0,2:].max(dim=0)[0])
test_close(x[1,5:10], out[1,:3].max(dim=0)[0])
test_close(x[0,10:], out[0,2:].mean(dim=0))
test_close(x[1,10:], out[1,:3].mean(dim=0))
```

``` python
#Test the result is independent of padding by replacing the padded part by some random content
out1 = torch.randn(2,4,5)
out1[0,2:] = out[0,2:].clone()
out1[1,:3] = out[1,:3].clone()
x1 = masked_concat_pool(out1, mask, 2)
test_eq(x, x1)
```

------------------------------------------------------------------------

<a
href="https://github.com/fastai/fastai/blob/main/fastai/text/models/core.py#L123"
target="_blank" style="float:right; font-size:smaller">source</a>

### PoolingLinearClassifier

``` python

def PoolingLinearClassifier(
    dims:list, # List of hidden sizes for MLP as `int`s
    ps:list, # List of dropout probabilities as `float`s
    bptt:int, # Backpropagation through time
    y_range:tuple=None, # Tuple of (low, high) output value bounds
):

```

*Create a linear classifier with pooling*

``` python
mod = nn.Embedding(5, 10)
tst = SentenceEncoder(5, mod, pad_idx=0)
x = torch.randint(1, 5, (3, 15))
x[2,:5]=0
out,mask = tst(x)

test_eq(out[:1], mod(x)[:1])
test_eq(out[2,5:], mod(x)[2,5:])
test_eq(mask, x==0)
```

------------------------------------------------------------------------

<a
href="https://github.com/fastai/fastai/blob/main/fastai/text/models/core.py#L145"
target="_blank" style="float:right; font-size:smaller">source</a>

### get_text_classifier

``` python

def get_text_classifier(
    arch:Callable, # Function or class that can generate a language model architecture
    vocab_sz:int, # Size of the vocabulary
    n_class:int, # Number of classes
    seq_len:int=72, # Backpropagation through time
    config:dict=None, # Encoder configuration dictionary
    drop_mult:float=1.0, # Multiplicative factor to scale all dropout probabilities in `config`
    lin_ftrs:list=None, # List of hidden sizes for classifier head as `int`s
    ps:list=None, # List of dropout probabilities for classifier head as `float`s
    pad_idx:int=1, # Padding token id
    max_len:int=1440, # Maximal output length for [`SentenceEncoder`](https://docs.fast.ai/text.models.core.html#sentenceencoder)
    y_range:tuple=None, # Tuple of (low, high) output value bounds
):

```

*Create a text classifier from `arch` and its `config`, maybe
`pretrained`*

``` python
config = awd_lstm_clas_config.copy()
config.update({'n_hid':10, 'emb_sz':20})

tst = get_text_classifier(AWD_LSTM, 100, 3, config=config)
x = torch.randint(2, 100, (10,5))
y = tst(x)
test_eq(y[0].shape, [10, 3])
test_eq(y[1].shape, [10, 5, 20])
test_eq(y[2].shape, [10, 5, 20])
```

``` python
#test padding gives same results
tst.eval()
y = tst(x)
x1 = torch.cat([x, tensor([2,1,1,1,1,1,1,1,1,1])[:,None]], dim=1)
y1 = tst(x1)
test_close(y[0][1:],y1[0][1:])
```

``` python
#test drop_mult
tst = get_text_classifier(AWD_LSTM, 100, 3, config=config, drop_mult=0.5)
test_eq(tst[1].layers[1][1].p, 0.1)
test_eq(tst[1].layers[0][1].p, config['output_p']*0.5)
for rnn in tst[0].module.rnns: test_eq(rnn.weight_p, config['weight_p']*0.5)
for dp in tst[0].module.hidden_dps: test_eq(dp.p, config['hidden_p']*0.5)
test_eq(tst[0].module.encoder_dp.embed_p, config['embed_p']*0.5)
test_eq(tst[0].module.input_dp.p, config['input_p']*0.5)
```
