#For testing: a fake learner and a metric that isn't an average
@delegates()
class TstLearner(Learner):
def __init__(self,dls=None,model=None,**kwargs): self.pred,self.xb,self.yb = None,None,None
Metrics
Core metric
This is where the function that converts scikit-learn metrics to fastai metrics is defined. You should skip this section unless you want to know all about the internals of fastai.
AccumMetric
AccumMetric (func, dim_argmax=None, activation='no', thresh=None, to_np=False, invert_arg=False, flatten=True, name=None, **kwargs)
Stores predictions and targets on CPU in accumulate to perform final calculations with func
.
func
is only applied to the accumulated predictions/targets when the value
attribute is asked for (so at the end of a validation/training phase, in use with Learner
and its Recorder
).The signature of func
should be inp,targ
(where inp
are the predictions of the model and targ
the corresponding labels).
For classification problems with single label, predictions need to be transformed with a softmax then an argmax before being compared to the targets. Since a softmax doesn’t change the order of the numbers, we can just apply the argmax. Pass along dim_argmax
to have this done by AccumMetric
(usually -1 will work pretty well). If you need to pass to your metrics the probabilities and not the predictions, use softmax=True
.
For classification problems with multiple labels, or if your targets are one-hot encoded, predictions may need to pass through a sigmoid (if it wasn’t included in your model) then be compared to a given threshold (to decide between 0 and 1), this is done by AccumMetric
if you pass sigmoid=True
and/or a value for thresh
.
If you want to use a metric function sklearn.metrics, you will need to convert predictions and labels to numpy arrays with to_np=True
. Also, scikit-learn metrics adopt the convention y_true
, y_preds
which is the opposite from us, so you will need to pass invert_arg=True
to make AccumMetric
do the inversion for you.
def _l2_mean(x,y): return torch.sqrt((x.float()-y.float()).pow(2).mean())
#Go through a fake cycle with various batch sizes and computes the value of met
def compute_val(met, x1, x2):
met.reset()= [0,6,15,20]
vals = TstLearner()
learn for i in range(3):
= x1[vals[i]:vals[i+1]],(x2[vals[i]:vals[i+1]],)
learn.pred,learn.yb
met.accumulate(learn)return met.value
= torch.randn(20,5),torch.randn(20,5) x1,x2
= AccumMetric(_l2_mean)
tst
test_close(compute_val(tst, x1, x2), _l2_mean(x1, x2))-1))
test_eq(torch.cat(tst.preds), x1.view(-1))
test_eq(torch.cat(tst.targs), x2.view(
#test argmax
= torch.randn(20,5),torch.randint(0, 5, (20,))
x1,x2 = AccumMetric(_l2_mean, dim_argmax=-1)
tst =-1), x2))
test_close(compute_val(tst, x1, x2), _l2_mean(x1.argmax(dim
#test thresh
= torch.randn(20,5),torch.randint(0, 2, (20,5)).bool()
x1,x2 = AccumMetric(_l2_mean, thresh=0.5)
tst >= 0.5), x2))
test_close(compute_val(tst, x1, x2), _l2_mean((x1
#test sigmoid
= torch.randn(20,5),torch.randn(20,5)
x1,x2 = AccumMetric(_l2_mean, activation=ActivationType.Sigmoid)
tst
test_close(compute_val(tst, x1, x2), _l2_mean(torch.sigmoid(x1), x2))
#test to_np
= torch.randn(20,5),torch.randn(20,5)
x1,x2 = AccumMetric(lambda x,y: isinstance(x, np.ndarray) and isinstance(y, np.ndarray), to_np=True)
tst assert compute_val(tst, x1, x2)
#test invert_arg
= torch.randn(20,5),torch.randn(20,5)
x1,x2 = AccumMetric(lambda x,y: torch.sqrt(x.pow(2).mean()))
tst pow(2).mean()))
test_close(compute_val(tst, x1, x2), torch.sqrt(x1.= AccumMetric(lambda x,y: torch.sqrt(x.pow(2).mean()), invert_arg=True)
tst pow(2).mean())) test_close(compute_val(tst, x1, x2), torch.sqrt(x2.
skm_to_fastai
skm_to_fastai (func, is_class=True, thresh=None, axis=-1, activation=None, **kwargs)
Convert func
from sklearn.metrics to a fastai metric
This is the quickest way to use a scikit-learn metric in a fastai training loop. is_class
indicates if you are in a classification problem or not. In this case:
- leaving
thresh
toNone
indicates it’s a single-label classification problem and predictions will pass through an argmax overaxis
before being compared to the targets - setting a value for
thresh
indicates it’s a multi-label classification problem and predictions will pass through a sigmoid (can be deactivated withsigmoid=False
) and be compared tothresh
before being compared to the targets
If is_class=False
, it indicates you are in a regression problem, and predictions are compared to the targets without being modified. In all cases, kwargs
are extra keyword arguments passed to func
.
= skm_to_fastai(skm.precision_score)
tst_single = torch.randn(20,2),torch.randint(0, 2, (20,))
x1,x2 =-1))) test_close(compute_val(tst_single, x1, x2), skm.precision_score(x2, x1.argmax(dim
= skm_to_fastai(skm.precision_score, thresh=0.2)
tst_multi = torch.randn(20),torch.randint(0, 2, (20,))
x1,x2 >= 0.2))
test_close(compute_val(tst_multi, x1, x2), skm.precision_score(x2, torch.sigmoid(x1)
= skm_to_fastai(skm.precision_score, thresh=0.2, activation=ActivationType.No)
tst_multi = torch.randn(20),torch.randint(0, 2, (20,))
x1,x2 >= 0.2)) test_close(compute_val(tst_multi, x1, x2), skm.precision_score(x2, x1
= skm_to_fastai(skm.r2_score, is_class=False)
tst_reg = torch.randn(20,5),torch.randn(20,5)
x1,x2 -1).numpy(), x1.view(-1).numpy())) test_close(compute_val(tst_reg, x1, x2), skm.r2_score(x2.view(
-1).numpy(), x1.view(-1).numpy())) test_close(tst_reg(x1, x2), skm.r2_score(x2.view(
optim_metric
optim_metric (f, argname, bounds, tol=0.01, do_neg=True, get_x=False)
Replace metric f
with a version that optimizes argument argname
Single-label classification
All functions defined in this section are intended for single-label classification and targets that are not one-hot encoded. For multi-label problems or one-hot encoded targets, use the version suffixed with multi.
Many metrics in fastai are thin wrappers around sklearn functionality. However, sklearn metrics can handle python list strings, amongst other things, whereas fastai metrics work with PyTorch, and thus require tensors. The arguments that are passed to metrics are after all transformations, such as categories being converted to indices, have occurred. This means that when you pass a label of a metric, for instance, that you must pass indices, not strings. This can be converted with vocab.map_obj
.
accuracy
accuracy (inp, targ, axis=-1)
Compute accuracy with targ
when pred
is bs n_classes*
#For testing
def change_targ(targ, n, c):
= torch.randperm(len(targ))[:n]
idx = targ.clone()
res for i in idx: res[i] = (res[i]+random.randint(1,c-1))%c
return res
= torch.randn(4,5)
x = x.argmax(dim=1)
y 1)
test_eq(accuracy(x,y), = change_targ(y, 2, 5)
y1 0.5)
test_eq(accuracy(x,y1), 1).expand(4,2,5), torch.stack([y,y1], dim=1)), 0.75) test_eq(accuracy(x.unsqueeze(
error_rate
error_rate (inp, targ, axis=-1)
1 - accuracy
= torch.randn(4,5)
x = x.argmax(dim=1)
y 0)
test_eq(error_rate(x,y), = change_targ(y, 2, 5)
y1 0.5)
test_eq(error_rate(x,y1), 1).expand(4,2,5), torch.stack([y,y1], dim=1)), 0.25) test_eq(error_rate(x.unsqueeze(
top_k_accuracy
top_k_accuracy (inp, targ, k=5, axis=-1)
Computes the Top-k accuracy (targ
is in the top k
predictions of inp
)
= torch.randn(6,5)
x = torch.arange(0,6)
y 5],y[:5]), 1)
test_eq(top_k_accuracy(x[:5/6) test_eq(top_k_accuracy(x, y),
APScoreBinary
APScoreBinary (axis=-1, average='macro', pos_label=1, sample_weight=None)
Average Precision for single-label binary classification problems
See the scikit-learn documentation for more details.
BalancedAccuracy
BalancedAccuracy (axis=-1, sample_weight=None, adjusted=False)
Balanced Accuracy for single-label binary classification problems
See the scikit-learn documentation for more details.
BrierScore
BrierScore (axis=-1, sample_weight=None, pos_label=None)
Brier score for single-label classification problems
See the scikit-learn documentation for more details.
CohenKappa
CohenKappa (axis=-1, labels=None, weights=None, sample_weight=None)
Cohen kappa for single-label classification problems
See the scikit-learn documentation for more details.
F1Score
F1Score (axis=-1, labels=None, pos_label=1, average='binary', sample_weight=None)
F1 score for single-label classification problems
See the scikit-learn documentation for more details.
FBeta
FBeta (beta, axis=-1, labels=None, pos_label=1, average='binary', sample_weight=None)
FBeta score with beta
for single-label classification problems
See the scikit-learn documentation for more details.
HammingLoss
HammingLoss (axis=-1, sample_weight=None)
Hamming loss for single-label classification problems
See the scikit-learn documentation for more details.
Jaccard
Jaccard (axis=-1, labels=None, pos_label=1, average='binary', sample_weight=None)
Jaccard score for single-label classification problems
See the scikit-learn documentation for more details.
Precision
Precision (axis=-1, labels=None, pos_label=1, average='binary', sample_weight=None)
Precision for single-label classification problems
See the scikit-learn documentation for more details.
Recall
Recall (axis=-1, labels=None, pos_label=1, average='binary', sample_weight=None)
Recall for single-label classification problems
See the scikit-learn documentation for more details.
RocAuc
RocAuc (axis=-1, average='macro', sample_weight=None, max_fpr=None, multi_class='ovr')
Area Under the Receiver Operating Characteristic Curve for single-label multiclass classification problems
See the scikit-learn documentation for more details.
RocAucBinary
RocAucBinary (axis=-1, average='macro', sample_weight=None, max_fpr=None, multi_class='raise')
Area Under the Receiver Operating Characteristic Curve for single-label binary classification problems
See the scikit-learn documentation for more details.
MatthewsCorrCoef
MatthewsCorrCoef (sample_weight=None, **kwargs)
Matthews correlation coefficient for single-label classification problems
See the scikit-learn documentation for more details.
Multi-label classification
accuracy_multi
accuracy_multi (inp, targ, thresh=0.5, sigmoid=True)
Compute accuracy when inp
and targ
are the same size.
#For testing
def change_1h_targ(targ, n):
= torch.randperm(targ.numel())[:n]
idx = targ.clone().view(-1)
res for i in idx: res[i] = 1-res[i]
return res.view(targ.shape)
= torch.randn(4,5)
x = (torch.sigmoid(x) >= 0.5).byte()
y 1)
test_eq(accuracy_multi(x,y), 1-y), 0)
test_eq(accuracy_multi(x,= change_1h_targ(y, 5)
y1 0.75)
test_eq(accuracy_multi(x,y1),
#Different thresh
= (torch.sigmoid(x) >= 0.2).byte()
y =0.2), 1)
test_eq(accuracy_multi(x,y, thresh1-y, thresh=0.2), 0)
test_eq(accuracy_multi(x,= change_1h_targ(y, 5)
y1 =0.2), 0.75)
test_eq(accuracy_multi(x,y1, thresh
#No sigmoid
= (x >= 0.5).byte()
y =False), 1)
test_eq(accuracy_multi(x,y, sigmoid1-y, sigmoid=False), 0)
test_eq(accuracy_multi(x,= change_1h_targ(y, 5)
y1 =False), 0.75) test_eq(accuracy_multi(x,y1, sigmoid
APScoreMulti
APScoreMulti (sigmoid=True, average='macro', pos_label=1, sample_weight=None)
Average Precision for multi-label classification problems
See the scikit-learn documentation for more details.
BrierScoreMulti
BrierScoreMulti (thresh=0.5, sigmoid=True, sample_weight=None, pos_label=None)
Brier score for multi-label classification problems
See the scikit-learn documentation for more details.
F1ScoreMulti
F1ScoreMulti (thresh=0.5, sigmoid=True, labels=None, pos_label=1, average='macro', sample_weight=None)
F1 score for multi-label classification problems
See the scikit-learn documentation for more details.
FBetaMulti
FBetaMulti (beta, thresh=0.5, sigmoid=True, labels=None, pos_label=1, average='macro', sample_weight=None)
FBeta score with beta
for multi-label classification problems
See the scikit-learn documentation for more details.
HammingLossMulti
HammingLossMulti (thresh=0.5, sigmoid=True, labels=None, sample_weight=None)
Hamming loss for multi-label classification problems
See the scikit-learn documentation for more details.
JaccardMulti
JaccardMulti (thresh=0.5, sigmoid=True, labels=None, pos_label=1, average='macro', sample_weight=None)
Jaccard score for multi-label classification problems
See the scikit-learn documentation for more details.
MatthewsCorrCoefMulti
MatthewsCorrCoefMulti (thresh=0.5, sigmoid=True, sample_weight=None)
Matthews correlation coefficient for multi-label classification problems
See the scikit-learn documentation for more details.
PrecisionMulti
PrecisionMulti (thresh=0.5, sigmoid=True, labels=None, pos_label=1, average='macro', sample_weight=None)
Precision for multi-label classification problems
See the scikit-learn documentation for more details.
RecallMulti
RecallMulti (thresh=0.5, sigmoid=True, labels=None, pos_label=1, average='macro', sample_weight=None)
Recall for multi-label classification problems
See the scikit-learn documentation for more details.
RocAucMulti
RocAucMulti (sigmoid=True, average='macro', sample_weight=None, max_fpr=None)
Area Under the Receiver Operating Characteristic Curve for multi-label binary classification problems
= RocAucMulti(sigmoid=False)
roc_auc_metric = torch.tensor([np.arange(start=0, stop=0.2, step=0.04)]*20), torch.tensor([0, 0, 1, 1]).repeat(5)
x,y assert compute_val(roc_auc_metric, x, y) == 0.5
/var/folders/ss/34z569j921v58v8n1n_8z7h40000gn/T/ipykernel_38355/1899176771.py:2: UserWarning: Creating a tensor from a list of numpy.ndarrays is extremely slow. Please consider converting the list to a single numpy.ndarray with numpy.array() before converting to a tensor. (Triggered internally at /Users/runner/work/_temp/anaconda/conda-bld/pytorch_1712608632396/work/torch/csrc/utils/tensor_new.cpp:277.)
x,y = torch.tensor([np.arange(start=0, stop=0.2, step=0.04)]*20), torch.tensor([0, 0, 1, 1]).repeat(5)
See the scikit-learn documentation for more details.
Regression
mse
mse (inp, targ)
Mean squared error between inp
and targ
.
= torch.randn(4,5),torch.randn(4,5)
x1,x2 -x2).pow(2).mean()) test_close(mse(x1,x2), (x1
rmse
rmse (preds, targs)
Root mean squared error
= torch.randn(20,5),torch.randn(20,5)
x1,x2 test_eq(compute_val(rmse, x1, x2), torch.sqrt(F.mse_loss(x1,x2)))
mae
mae (inp, targ)
Mean absolute error between inp
and targ
.
= torch.randn(4,5),torch.randn(4,5)
x1,x2 abs(x1-x2).mean()) test_eq(mae(x1,x2), torch.
msle
msle (inp, targ)
Mean squared logarithmic error between inp
and targ
.
= torch.randn(4,5),torch.randn(4,5)
x1,x2 = torch.relu(x1),torch.relu(x2)
x1,x2 +1)-torch.log(x2+1)).pow(2).mean()) test_close(msle(x1,x2), (torch.log(x1
exp_rmspe
exp_rmspe (preds, targs)
Root mean square percentage error of the exponential of predictions and targets
= torch.randn(20,5),torch.randn(20,5)
x1,x2 - torch.exp(x1))/torch.exp(x2))**2).mean())) test_eq(compute_val(exp_rmspe, x1, x2), torch.sqrt((((torch.exp(x2)
ExplainedVariance
ExplainedVariance (sample_weight=None)
Explained variance between predictions and targets
See the scikit-learn documentation for more details.
R2Score
R2Score (sample_weight=None)
R2 score between predictions and targets
See the scikit-learn documentation for more details.
PearsonCorrCoef
PearsonCorrCoef (dim_argmax=None, activation='no', thresh=None, to_np=False, invert_arg=False, flatten=True, name=None)
Pearson correlation coefficient for regression problem
See the scipy documentation for more details.
= torch.randint(-999, 999,(20,))
x = torch.randint(-999, 999,(20,))
y -1), y.view(-1))[0]) test_eq(compute_val(PearsonCorrCoef(), x, y), scs.pearsonr(x.view(
SpearmanCorrCoef
SpearmanCorrCoef (dim_argmax=None, axis=0, nan_policy='propagate', activation='no', thresh=None, to_np=False, invert_arg=False, flatten=True, name=None)
Spearman correlation coefficient for regression problem
See the scipy documentation for more details.
= torch.randint(-999, 999,(20,))
x = torch.randint(-999, 999,(20,))
y -1), y.view(-1))[0]) test_eq(compute_val(SpearmanCorrCoef(), x, y), scs.spearmanr(x.view(
Segmentation
from fastai.vision.all import *
= resnet34() model
= cast(torch.rand(1,3,128,128), TensorImage) x
type(model(x))
fastai.torch_core.TensorImage
foreground_acc
foreground_acc (inp, targ, bkg_idx=0, axis=1)
Computes non-background accuracy for multiclass segmentation
= cast(torch.randn(4,5,3,3), TensorImage)
x = cast(x, TensorMask).argmax(dim=1)[:,None]
y 1)
test_eq(foreground_acc(x,y), 0] = 0 #the 0s are ignored so we get the same value
y[1) test_eq(foreground_acc(x,y),
Dice
Dice (axis=1)
Dice coefficient metric for binary target in segmentation
= cast(torch.randn(20,2,3,3), TensorImage)
x1 = cast(torch.randint(0, 2, (20, 3, 3)), TensorMask)
x2 = x1.argmax(1)
pred = (pred*x2).float().sum().item()
inter = (pred+x2).float().sum().item()
union 2*inter/union) test_eq(compute_val(Dice(), x1, x2),
DiceMulti
DiceMulti (axis=1)
Averaged Dice metric (Macro F1) for multiclass target in segmentation
The DiceMulti method implements the “Averaged F1: arithmetic mean over harmonic means” described in this publication: https://arxiv.org/pdf/1911.03347.pdf
= torch.ones(20,1,1,1)
x1a = torch.clone(x1a)*0.5
x1b = torch.clone(x1a)*0.3
x1c = torch.cat((x1a,x1b,x1c),dim=1) # Prediction: 20xClass0
x1 = torch.zeros(20,1,1) # Target: 20xClass0
x2 1.)
test_eq(compute_val(DiceMulti(), x1, x2),
= torch.ones(20,1,1) # Target: 20xClass1
x2 0.)
test_eq(compute_val(DiceMulti(), x1, x2),
= torch.zeros(10,1,1)
x2a = torch.ones(5,1,1)
x2b = torch.ones(5,1,1) * 2
x2c = torch.cat((x2a,x2b,x2c),dim=0) # Target: 10xClass0, 5xClass1, 5xClass2
x2 = (2*10)/(2*10+10) # Dice: 2*TP/(2*TP+FP+FN)
dice1 = 0
dice2 = 0
dice3 +dice2+dice3)/3) test_eq(compute_val(DiceMulti(), x1, x2), (dice1
JaccardCoeff
JaccardCoeff (axis=1)
Implementation of the Jaccard coefficient that is lighter in RAM
= cast(torch.randn(20,2,3,3), TensorImage)
x1 = cast(torch.randint(0, 2, (20, 3, 3)), TensorMask)
x2 = x1.argmax(1)
pred = (pred*x2).float().sum().item()
inter = (pred+x2).float().sum().item()
union /(union-inter)) test_eq(compute_val(JaccardCoeff(), x1, x2), inter
JaccardCoeffMulti
JaccardCoeffMulti (axis=1)
Averaged Jaccard coefficient metric (mIoU) for multiclass target in segmentation
= torch.ones(20,1,1,1)
x1a = torch.clone(x1a)*0.5
x1b = torch.clone(x1a)*0.3
x1c = torch.cat((x1a,x1b,x1c), dim=1) # Prediction: 20xClass0
x1 = torch.zeros(20,1,1) # Target: 20xClass0
x2 1.)
test_eq(compute_val(JaccardCoeffMulti(), x1, x2),
= torch.ones(20,1,1) # Target: 20xClass1
x2 0.)
test_eq(compute_val(JaccardCoeffMulti(), x1, x2),
= torch.zeros(10,1,1)
x2a = torch.ones(5,1,1)
x2b = torch.ones(5,1,1) * 2
x2c = torch.cat((x2a,x2b,x2c), dim=0) # Target: 10xClass0, 5xClass1, 5xClass2
x2 = 10/(10+10) # Jaccard: TP/(TP+FP+FN)
jcrd1 = 0
jcrd2 = 0
jcrd3 +jcrd2+jcrd3)/3) test_eq(compute_val(JaccardCoeffMulti(), x1, x2), (jcrd1
NLP
CorpusBLEUMetric
CorpusBLEUMetric (vocab_sz=5000, axis=-1)
Blueprint for defining a metric
def create_vcb_emb(pred, targ):
# create vocab "embedding" for predictions
= max(torch.unique(torch.cat([pred, targ])))+1
vcb_sz =torch.zeros(pred.size()[0], pred.size()[1] ,vcb_sz)
pred_embfor i,v in enumerate(pred):
1, v.view(len(v),1),1)
pred_emb[i].scatter_(return pred_emb
def compute_bleu_val(met, x1, x2):
met.reset()= TstLearner()
learn =False
learn.trainingfor i in range(len(x1)):
= x1, (x2,)
learn.pred,learn.yb
met.accumulate(learn)return met.value
= torch.tensor([[1,2,3,4,5,6,1,7,8]])
targ = torch.tensor([[1,9,3,4,5,6,1,10,8]])
pred = create_vcb_emb(pred, targ)
pred_emb 0.48549)
test_close(compute_bleu_val(CorpusBLEUMetric(), pred_emb, targ),
= torch.tensor([[1,2,3,4,5,6,1,7,8],[1,2,3,4,5,6,1,7,8]])
targ = torch.tensor([[1,9,3,4,5,6,1,10,8],[1,9,3,4,5,6,1,10,8]])
pred = create_vcb_emb(pred, targ)
pred_emb 0.48549) test_close(compute_bleu_val(CorpusBLEUMetric(), pred_emb, targ),
The BLEU metric was introduced in this article to come up with a way to evaluate the performance of translation models. It’s based on the precision of n-grams in your prediction compared to your target. See the fastai NLP course BLEU notebook for a more detailed description of BLEU.
The smoothing used in the precision calculation is the same as in SacreBLEU, which in turn is “method 3” from the Chen & Cherry, 2014 paper.
Perplexity
Perplexity ()
Perplexity (exponential of cross-entropy loss) for Language Models
= torch.randn(20,5),torch.randint(0, 5, (20,))
x1,x2 = perplexity
tst
tst.reset()= [0,6,15,20]
vals = TstLearner()
learn for i in range(3):
= (x2[vals[i]:vals[i+1]],)
learn.yb = F.cross_entropy(x1[vals[i]:vals[i+1]],x2[vals[i]:vals[i+1]])
learn.loss
tst.accumulate(learn) test_close(tst.value, torch.exp(F.cross_entropy(x1,x2)))
LossMetric
LossMetric (attr, nm=None)
Create a metric from loss_func.attr
named nm
LossMetrics
LossMetrics (attrs, nms=None)
List of LossMetric
for each of attrs
and nms
class CombineL1L2(Module):
def forward(self, out, targ):
self.l1 = F.l1_loss(out, targ)
self.l2 = F.mse_loss(out, targ)
return self.l1+self.l2
= synth_learner(metrics=LossMetrics('l1,l2'))
learn = CombineL1L2()
learn.loss_func 2) learn.fit(
epoch | train_loss | valid_loss | l1 | l2 | time |
---|---|---|---|---|---|
0 | 15.296746 | 12.515826 | 3.019884 | 9.495943 | 00:00 |
1 | 13.290909 | 8.719325 | 2.454751 | 6.264574 | 00:00 |