learn = synth_learner()
learn.fit(10, lr=100, cbs=TerminateOnNaNCallback())| epoch | train_loss | valid_loss | time |
|---|
def TerminateOnNaNCallback(
after_create:NoneType=None, before_fit:NoneType=None, before_epoch:NoneType=None, before_train:NoneType=None,
before_batch:NoneType=None, after_pred:NoneType=None, after_loss:NoneType=None, before_backward:NoneType=None,
after_cancel_backward:NoneType=None, after_backward:NoneType=None, before_step:NoneType=None,
after_cancel_step:NoneType=None, after_step:NoneType=None, after_cancel_batch:NoneType=None,
after_batch:NoneType=None, after_cancel_train:NoneType=None, after_train:NoneType=None,
before_validate:NoneType=None, after_cancel_validate:NoneType=None, after_validate:NoneType=None,
after_cancel_epoch:NoneType=None, after_epoch:NoneType=None, after_cancel_fit:NoneType=None,
after_fit:NoneType=None
):
A Callback that terminates training if loss is NaN.
| epoch | train_loss | valid_loss | time |
|---|
def TrackerCallback(
monitor:str='valid_loss', # value (usually loss or metric) being monitored.
comp:NoneType=None, # numpy comparison operator; np.less if monitor is loss, np.greater if monitor is metric.
min_delta:float=0.0, # minimum delta between the last monitor value and the best monitor value.
reset_on_fit:bool=True, # before model fitting, reset value being monitored to -infinity (if monitor is metric) or +infinity (if monitor is loss).
):
A Callback that keeps track of the best value in monitor.
When implementing a Callback that has behavior that depends on the best value of a metric or loss, subclass this Callback and use its best (for best value so far) and new_best (there was a new best value this epoch) attributes. If you want to maintain best over subsequent calls to fit (e.g., Learner.fit_one_cycle), set reset_on_fit = True.
comp is the comparison operator used to determine if a value is best than another (defaults to np.less if ‘loss’ is in the name passed in monitor, np.greater otherwise) and min_delta is an optional float that requires a new value to go over the current best (depending on comp) by at least that amount.
def EarlyStoppingCallback(
monitor:str='valid_loss', # value (usually loss or metric) being monitored.
comp:NoneType=None, # numpy comparison operator; np.less if monitor is loss, np.greater if monitor is metric.
min_delta:float=0.0, # minimum delta between the last monitor value and the best monitor value.
patience:int=1, # number of epochs to wait when training has not improved model.
reset_on_fit:bool=True, # before model fitting, reset value being monitored to -infinity (if monitor is metric) or +infinity (if monitor is loss).
):
A TrackerCallback that terminates training when monitored quantity stops improving.
comp is the comparison operator used to determine if a value is best than another (defaults to np.less if ‘loss’ is in the name passed in monitor, np.greater otherwise) and min_delta is an optional float that requires a new value to go over the current best (depending on comp) by at least that amount. patience is the number of epochs you’re willing to wait without improvement.
| epoch | train_loss | valid_loss | mse_loss | time |
|---|---|---|---|---|
| 0 | 20.437918 | 26.406773 | 26.406773 | 00:00 |
| 1 | 20.418514 | 26.406715 | 26.406715 | 00:00 |
| 2 | 20.410892 | 26.406639 | 26.406639 | 00:00 |
No improvement since epoch 0: early stopping
| epoch | train_loss | valid_loss | time |
|---|---|---|---|
| 0 | 13.408870 | 19.617222 | 00:00 |
| 1 | 13.403553 | 19.617184 | 00:00 |
| 2 | 13.403143 | 19.617126 | 00:00 |
No improvement since epoch 0: early stopping
def SaveModelCallback(
monitor:str='valid_loss', # value (usually loss or metric) being monitored.
comp:NoneType=None, # numpy comparison operator; np.less if monitor is loss, np.greater if monitor is metric.
min_delta:float=0.0, # minimum delta between the last monitor value and the best monitor value.
fname:str='model', # model name to be used when saving model.
every_epoch:bool=False, # if true, save model after every epoch; else save only when model is better than existing best.
at_end:bool=False, # if true, save model when training ends; else load best model if there is only one saved model.
with_opt:bool=False, # if true, save optimizer state (if any available) when saving model.
reset_on_fit:bool=True, # before model fitting, reset value being monitored to -infinity (if monitor is metric) or +infinity (if monitor is loss).
):
A TrackerCallback that saves the model’s best during training and loads it at the end.
comp is the comparison operator used to determine if a value is best than another (defaults to np.less if ‘loss’ is in the name passed in monitor, np.greater otherwise) and min_delta is an optional float that requires a new value to go over the current best (depending on comp) by at least that amount. Model will be saved in learn.path/learn.model_dir/name.pth, maybe every_epoch if True, every nth epoch if an integer is passed to every_epoch or at each improvement of the monitored quantity.
learn = synth_learner(n_trn=2, path=Path.cwd()/'tmp')
learn.fit(n_epoch=2, cbs=SaveModelCallback())
assert (Path.cwd()/'tmp/models/model.pth').exists()
learn = synth_learner(n_trn=2, path=Path.cwd()/'tmp')
learn.fit(n_epoch=2, cbs=SaveModelCallback(fname='end',at_end=True))
assert (Path.cwd()/'tmp/models/end.pth').exists()
learn.fit(n_epoch=2, cbs=SaveModelCallback(every_epoch=True))
for i in range(2): assert (Path.cwd()/f'tmp/models/model_{i}.pth').exists()
shutil.rmtree(Path.cwd()/'tmp')
learn.fit(n_epoch=4, cbs=SaveModelCallback(every_epoch=2))
for i in range(4):
if not i%2: assert (Path.cwd()/f'tmp/models/model_{i}.pth').exists()
else: assert not (Path.cwd()/f'tmp/models/model_{i}.pth').exists()
shutil.rmtree(Path.cwd()/'tmp')| epoch | train_loss | valid_loss | time |
|---|---|---|---|
| 0 | 19.453270 | 12.539286 | 00:00 |
| 1 | 19.248507 | 12.123456 | 00:00 |
Better model found at epoch 0 with valid_loss value: 12.539285659790039.
Better model found at epoch 1 with valid_loss value: 12.123456001281738.
| epoch | train_loss | valid_loss | time |
|---|---|---|---|
| 0 | 5.197007 | 5.579152 | 00:00 |
| 1 | 5.154862 | 5.445522 | 00:00 |
Better model found at epoch 0 with valid_loss value: 5.5791521072387695.
Better model found at epoch 1 with valid_loss value: 5.445522308349609.
| epoch | train_loss | valid_loss | time |
|---|---|---|---|
| 0 | 4.982775 | 5.264440 | 00:00 |
| 1 | 4.887252 | 5.038480 | 00:00 |
| epoch | train_loss | valid_loss | time |
|---|---|---|---|
| 0 | 4.578584 | 4.781651 | 00:00 |
| 1 | 4.454868 | 4.507101 | 00:00 |
| 2 | 4.322047 | 4.232390 | 00:00 |
| 3 | 4.186467 | 3.957614 | 00:00 |
def ReduceLROnPlateau(
monitor:str='valid_loss', # value (usually loss or metric) being monitored.
comp:NoneType=None, # numpy comparison operator; np.less if monitor is loss, np.greater if monitor is metric.
min_delta:float=0.0, # minimum delta between the last monitor value and the best monitor value.
patience:int=1, # number of epochs to wait when training has not improved model.
factor:float=10.0, # the denominator to divide the learning rate by, when reducing the learning rate.
min_lr:int=0, # the minimum learning rate allowed; learning rate cannot be reduced below this minimum.
reset_on_fit:bool=True, # before model fitting, reset value being monitored to -infinity (if monitor is metric) or +infinity (if monitor is loss).
):
A TrackerCallback that reduces learning rate when a metric has stopped improving.
| epoch | train_loss | valid_loss | time |
|---|---|---|---|
| 0 | 6.122743 | 7.348515 | 00:00 |
| 1 | 6.119377 | 7.348499 | 00:00 |
| 2 | 6.125790 | 7.348477 | 00:00 |
| 3 | 6.131386 | 7.348475 | 00:00 |
Epoch 2: reducing lr to 1e-08
| epoch | train_loss | valid_loss | time |
|---|---|---|---|
| 0 | 16.747515 | 15.265999 | 00:00 |
| 1 | 16.725756 | 15.265974 | 00:00 |
| 2 | 16.735016 | 15.265943 | 00:00 |
| 3 | 16.733360 | 15.265934 | 00:00 |
| 4 | 16.733513 | 15.265925 | 00:00 |
| 5 | 16.730352 | 15.265915 | 00:00 |
Epoch 2: reducing lr to 1e-08
Each of these three derived TrackerCallbacks (SaveModelCallback, ReduceLROnPlateu, and EarlyStoppingCallback) all have an adjusted order so they can each run with each other without interference. That order is as follows:
in parenthesis is the actual Callback order number
TrackerCallback (60)SaveModelCallback (61)ReduceLrOnPlateu (62)EarlyStoppingCallback (63)