Notebook Launcher examples

from fastai.vision.all import *
from fastai.text.all import *
from fastai.tabular.all import *
from fastai.collab import *

from accelerate import notebook_launcher
from fastai.distributed import *

Important

Before running, ensure that Accelerate has been configured through either accelerate config in the command line or by running write_basic_config

# from accelerate.utils import write_basic_config
# write_basic_config()

Image Classification

path = untar_data(URLs.PETS)/'images'

def train():
    dls = ImageDataLoaders.from_name_func(
        path, get_image_files(path), valid_pct=0.2,
        label_func=lambda x: x[0].isupper(), item_tfms=Resize(224))
    learn = vision_learner(dls, resnet34, metrics=error_rate).to_fp16()
    with learn.distrib_ctx(in_notebook=True, sync_bn=False):
        learn.fine_tune(1)

notebook_launcher(train, num_processes=2)

Launching training on 2 GPUs.
Training Learner...

epoch	train_loss	valid_loss	error_rate	time
0	0.258557	0.024234	0.008119	00:13

epoch	train_loss	valid_loss	error_rate	time
0	0.039532	0.019273	0.005413	00:15

Image Segmentation

path = untar_data(URLs.CAMVID_TINY)

def train():
    dls = SegmentationDataLoaders.from_label_func(
        path, bs=8, fnames = get_image_files(path/"images"),
        label_func = lambda o: path/'labels'/f'{o.stem}_P{o.suffix}',
        codes = np.loadtxt(path/'codes.txt', dtype=str)
    )
    learn = unet_learner(dls, resnet34)
    with learn.distrib_ctx(in_notebook=True, sync_bn=False):
        learn.fine_tune(8)
        
notebook_launcher(train, num_processes=2)

Launching training on 2 GPUs.

/opt/conda/lib/python3.7/site-packages/torch/_tensor.py:1142: UserWarning: __floordiv__ is deprecated, and its behavior will change in a future version of pytorch. It currently rounds toward 0 (like the 'trunc' function NOT 'floor'). This results in incorrect rounding for negative values. To keep the current behavior, use torch.div(a, b, rounding_mode='trunc'), or for actual floor division, use torch.div(a, b, rounding_mode='floor').
  ret = func(*args, **kwargs)
/opt/conda/lib/python3.7/site-packages/torch/_tensor.py:1142: UserWarning: __floordiv__ is deprecated, and its behavior will change in a future version of pytorch. It currently rounds toward 0 (like the 'trunc' function NOT 'floor'). This results in incorrect rounding for negative values. To keep the current behavior, use torch.div(a, b, rounding_mode='trunc'), or for actual floor division, use torch.div(a, b, rounding_mode='floor').
  ret = func(*args, **kwargs)

Training Learner...

epoch	train_loss	valid_loss	time
0	4.339367	2.756470	00:03

epoch	train_loss	valid_loss	time
0	2.405208	2.095044	00:02
1	2.113619	1.692979	00:02
2	1.931254	1.333691	00:02
3	1.753757	1.187579	00:02
4	1.612463	1.097649	00:02
5	1.493950	0.992424	00:02
6	1.393139	0.949843	00:02
7	1.312021	0.942510	00:02

Text Classification

path = untar_data(URLs.IMDB_SAMPLE)
df = pd.read_csv(path/'texts.csv')

def train():
    imdb_clas = DataBlock(blocks=(TextBlock.from_df('text', seq_len=72), CategoryBlock),
                      get_x=ColReader('text'), get_y=ColReader('label'), splitter=ColSplitter())
    dls = imdb_clas.dataloaders(df, bs=64)
    learn = rank0_first(lambda: text_classifier_learner(dls, AWD_LSTM, drop_mult=0.5, metrics=accuracy))
    with learn.distrib_ctx(in_notebook=True):
        learn.fine_tune(4, 1e-2)
        
notebook_launcher(train, num_processes=2)

Launching training on 2 GPUs.
Training Learner...

epoch	train_loss	valid_loss	accuracy	time
0	0.683830	0.640674	0.710000	00:06

epoch	train_loss	valid_loss	accuracy	time
0	0.505055	0.618315	0.650000	00:10
1	0.433232	0.522627	0.785000	00:11
2	0.391711	0.460229	0.810000	00:11
3	0.347983	0.450882	0.805000	00:11

Tabular

path = untar_data(URLs.ADULT_SAMPLE)
df = pd.read_csv(path/'adult.csv')


def train():
    dls = TabularDataLoaders.from_csv(path/'adult.csv', path=path, y_names="salary",
            cat_names = ['workclass', 'education', 'marital-status', 'occupation',
                         'relationship', 'race'],
            cont_names = ['age', 'fnlwgt', 'education-num'],
            procs = [Categorify, FillMissing, Normalize])

    learn = tabular_learner(dls, metrics=accuracy)
    with learn.distrib_ctx(in_notebook=True):
        learn.fit_one_cycle(3)
        
notebook_launcher(train, num_processes=2)

Launching training on 2 GPUs.
Training Learner...

epoch	train_loss	valid_loss	accuracy	time
0	0.436493	0.383866	0.832463	00:03
1	0.359663	0.352825	0.837224	00:02
2	0.349231	0.350312	0.839988	00:02

Collab Filtering

path = untar_data(URLs.ML_SAMPLE)
df = pd.read_csv(path/'ratings.csv')

def train():
    dls = CollabDataLoaders.from_df(df)
    learn = collab_learner(dls, y_range=(0.5,5.5))
    with learn.distrib_ctx(in_notebook=True):
        learn.fine_tune(6)
        
notebook_launcher(train, num_processes=2)

Launching training on 2 GPUs.
Training Learner...

epoch	train_loss	valid_loss	time
0	1.553747	1.430443	00:00

epoch	train_loss	valid_loss	time
0	1.484851	1.394805	00:00
1	1.424410	1.255329	00:00
2	1.277911	1.028214	00:00
3	1.099660	0.882485	00:00
4	0.969005	0.835191	00:00
5	0.894699	0.828167	00:00

Keypoints

path = untar_data(URLs.BIWI_HEAD_POSE)
def img2pose(x): return Path(f'{str(x)[:-7]}pose.txt')
def get_ctr(f):
    ctr = np.genfromtxt(img2pose(f), skip_header=3)
    c1 = ctr[0] * cal[0][0]/ctr[2] + cal[0][2]
    c2 = ctr[1] * cal[1][1]/ctr[2] + cal[1][2]
    return tensor([c1,c2])

img_files = get_image_files(path)
cal = np.genfromtxt(path/'01'/'rgb.cal', skip_footer=6)


def train():
    biwi = DataBlock(
            blocks=(ImageBlock, PointBlock),
            get_items=get_image_files,
            get_y=get_ctr,
            splitter=FuncSplitter(lambda o: o.parent.name=='13'),
            batch_tfms=[*aug_transforms(size=(240,320)), 
                        Normalize.from_stats(*imagenet_stats)])
    dls = biwi.dataloaders(path)
    learn = vision_learner(dls, resnet18, y_range=(-1,1))
    with learn.distrib_ctx(in_notebook=True, sync_bn=False):
        learn.fine_tune(1)
        
notebook_launcher(train, num_processes=2)

Launching training on 2 GPUs.
Training Learner...

epoch	train_loss	valid_loss	time
0	0.247702	0.066427	00:47

epoch	train_loss	valid_loss	time
0	0.052143	0.007451	00:55