Notebook Launcher examples

from fastai.vision.all import *
from fastai.text.all import *
from fastai.tabular.all import *
from fastai.collab import *

from accelerate import notebook_launcher
from fastai.distributed import *
Important

Before running, ensure that Accelerate has been configured through either accelerate config in the command line or by running write_basic_config

# from accelerate.utils import write_basic_config
# write_basic_config()

Image Classification

path = untar_data(URLs.PETS)/'images'

def train():
    dls = ImageDataLoaders.from_name_func(
        path, get_image_files(path), valid_pct=0.2,
        label_func=lambda x: x[0].isupper(), item_tfms=Resize(224))
    learn = vision_learner(dls, resnet34, metrics=error_rate).to_fp16()
    with learn.distrib_ctx(in_notebook=True, sync_bn=False):
        learn.fine_tune(1)

notebook_launcher(train, num_processes=2)
Launching training on 2 GPUs.
Training Learner...
epoch train_loss valid_loss error_rate time
0 0.258557 0.024234 0.008119 00:13
epoch train_loss valid_loss error_rate time
0 0.039532 0.019273 0.005413 00:15

Image Segmentation

path = untar_data(URLs.CAMVID_TINY)

def train():
    dls = SegmentationDataLoaders.from_label_func(
        path, bs=8, fnames = get_image_files(path/"images"),
        label_func = lambda o: path/'labels'/f'{o.stem}_P{o.suffix}',
        codes = np.loadtxt(path/'codes.txt', dtype=str)
    )
    learn = unet_learner(dls, resnet34)
    with learn.distrib_ctx(in_notebook=True, sync_bn=False):
        learn.fine_tune(8)
        
notebook_launcher(train, num_processes=2)
Launching training on 2 GPUs.
Training Learner...
/opt/conda/lib/python3.7/site-packages/torch/_tensor.py:1142: UserWarning: __floordiv__ is deprecated, and its behavior will change in a future version of pytorch. It currently rounds toward 0 (like the 'trunc' function NOT 'floor'). This results in incorrect rounding for negative values. To keep the current behavior, use torch.div(a, b, rounding_mode='trunc'), or for actual floor division, use torch.div(a, b, rounding_mode='floor').
  ret = func(*args, **kwargs)
/opt/conda/lib/python3.7/site-packages/torch/_tensor.py:1142: UserWarning: __floordiv__ is deprecated, and its behavior will change in a future version of pytorch. It currently rounds toward 0 (like the 'trunc' function NOT 'floor'). This results in incorrect rounding for negative values. To keep the current behavior, use torch.div(a, b, rounding_mode='trunc'), or for actual floor division, use torch.div(a, b, rounding_mode='floor').
  ret = func(*args, **kwargs)
epoch train_loss valid_loss time
0 4.339367 2.756470 00:03
epoch train_loss valid_loss time
0 2.405208 2.095044 00:02
1 2.113619 1.692979 00:02
2 1.931254 1.333691 00:02
3 1.753757 1.187579 00:02
4 1.612463 1.097649 00:02
5 1.493950 0.992424 00:02
6 1.393139 0.949843 00:02
7 1.312021 0.942510 00:02

Text Classification

path = untar_data(URLs.IMDB_SAMPLE)
df = pd.read_csv(path/'texts.csv')

def train():
    imdb_clas = DataBlock(blocks=(TextBlock.from_df('text', seq_len=72), CategoryBlock),
                      get_x=ColReader('text'), get_y=ColReader('label'), splitter=ColSplitter())
    dls = imdb_clas.dataloaders(df, bs=64)
    learn = rank0_first(lambda: text_classifier_learner(dls, AWD_LSTM, drop_mult=0.5, metrics=accuracy))
    with learn.distrib_ctx(in_notebook=True):
        learn.fine_tune(4, 1e-2)
        
notebook_launcher(train, num_processes=2)
Launching training on 2 GPUs.
Training Learner...
epoch train_loss valid_loss accuracy time
0 0.683830 0.640674 0.710000 00:06
epoch train_loss valid_loss accuracy time
0 0.505055 0.618315 0.650000 00:10
1 0.433232 0.522627 0.785000 00:11
2 0.391711 0.460229 0.810000 00:11
3 0.347983 0.450882 0.805000 00:11

Tabular

path = untar_data(URLs.ADULT_SAMPLE)
df = pd.read_csv(path/'adult.csv')


def train():
    dls = TabularDataLoaders.from_csv(path/'adult.csv', path=path, y_names="salary",
            cat_names = ['workclass', 'education', 'marital-status', 'occupation',
                         'relationship', 'race'],
            cont_names = ['age', 'fnlwgt', 'education-num'],
            procs = [Categorify, FillMissing, Normalize])

    learn = tabular_learner(dls, metrics=accuracy)
    with learn.distrib_ctx(in_notebook=True):
        learn.fit_one_cycle(3)
        
notebook_launcher(train, num_processes=2)
Launching training on 2 GPUs.
Training Learner...
epoch train_loss valid_loss accuracy time
0 0.436493 0.383866 0.832463 00:03
1 0.359663 0.352825 0.837224 00:02
2 0.349231 0.350312 0.839988 00:02

Collab Filtering

path = untar_data(URLs.ML_SAMPLE)
df = pd.read_csv(path/'ratings.csv')

def train():
    dls = CollabDataLoaders.from_df(df)
    learn = collab_learner(dls, y_range=(0.5,5.5))
    with learn.distrib_ctx(in_notebook=True):
        learn.fine_tune(6)
        
notebook_launcher(train, num_processes=2)
Launching training on 2 GPUs.
Training Learner...
epoch train_loss valid_loss time
0 1.553747 1.430443 00:00
epoch train_loss valid_loss time
0 1.484851 1.394805 00:00
1 1.424410 1.255329 00:00
2 1.277911 1.028214 00:00
3 1.099660 0.882485 00:00
4 0.969005 0.835191 00:00
5 0.894699 0.828167 00:00

Keypoints

path = untar_data(URLs.BIWI_HEAD_POSE)
def img2pose(x): return Path(f'{str(x)[:-7]}pose.txt')
def get_ctr(f):
    ctr = np.genfromtxt(img2pose(f), skip_header=3)
    c1 = ctr[0] * cal[0][0]/ctr[2] + cal[0][2]
    c2 = ctr[1] * cal[1][1]/ctr[2] + cal[1][2]
    return tensor([c1,c2])

img_files = get_image_files(path)
cal = np.genfromtxt(path/'01'/'rgb.cal', skip_footer=6)


def train():
    biwi = DataBlock(
            blocks=(ImageBlock, PointBlock),
            get_items=get_image_files,
            get_y=get_ctr,
            splitter=FuncSplitter(lambda o: o.parent.name=='13'),
            batch_tfms=[*aug_transforms(size=(240,320)), 
                        Normalize.from_stats(*imagenet_stats)])
    dls = biwi.dataloaders(path)
    learn = vision_learner(dls, resnet18, y_range=(-1,1))
    with learn.distrib_ctx(in_notebook=True, sync_bn=False):
        learn.fine_tune(1)
        
notebook_launcher(train, num_processes=2)
Launching training on 2 GPUs.
Training Learner...
epoch train_loss valid_loss time
0 0.247702 0.066427 00:47
epoch train_loss valid_loss time
0 0.052143 0.007451 00:55