#For example, so not exported
from fastai.vision.core import *
from fastai.vision.data import *Data block
DataLoaders
📘 Note: Several domain-specific blocks such as
ImageBlock,BBoxBlock,PointBlock, andCategoryBlockare implemented on top ofTransformBlock. These blocks are designed to handle common tasks in computer vision, classification, and regression. See the Vision Blocks section for more details.
TransformBlock
def TransformBlock(
type_tfms:list=None, # One or more `Transform`s
item_tfms:list=None, # `ItemTransform`s, applied on an item
batch_tfms:list=None, # `Transform`s or [`RandTransform`](https://docs.fast.ai/vision.augment.html#randtransform)s, applied by batch
dl_type:TfmdDL=None, # Task specific [`TfmdDL`](https://docs.fast.ai/data.core.html#tfmddl), defaults to [`TfmdDL`](https://docs.fast.ai/data.core.html#tfmddl)
dls_kwargs:dict=None, # Additional arguments to be passed to [`DataLoaders`](https://docs.fast.ai/data.core.html#dataloaders)
):
A basic wrapper that links defaults transforms for the data block API
CategoryBlock
def CategoryBlock(
vocab:MutableSequence | pd.Series=None, # List of unique class names
sort:bool=True, # Sort the classes alphabetically
add_na:bool=False, # Add `#na#` to `vocab`
):
TransformBlock for single-label categorical targets
MultiCategoryBlock
def MultiCategoryBlock(
encoded:bool=False, # Whether the data comes in one-hot encoded
vocab:MutableSequence | pd.Series=None, # List of unique class names
add_na:bool=False, # Add `#na#` to `vocab`
):
TransformBlock for multi-label categorical targets
RegressionBlock
def RegressionBlock(
n_out:int=None, # Number of output values
):
TransformBlock for float targets
General API
DataBlock
def DataBlock(
blocks:list=None, # One or more [`TransformBlock`](https://docs.fast.ai/data.block.html#transformblock)s
dl_type:TfmdDL=None, # Task specific [`TfmdDL`](https://docs.fast.ai/data.core.html#tfmddl), defaults to `block`'s dl_type or[`TfmdDL`](https://docs.fast.ai/data.core.html#tfmddl)
getters:list=None, # Getter functions applied to results of `get_items`
n_inp:int=None, # Number of inputs
item_tfms:list=None, # `ItemTransform`s, applied on an item
batch_tfms:list=None, # `Transform`s or [`RandTransform`](https://docs.fast.ai/vision.augment.html#randtransform)s, applied by batch
get_items:NoneType=None, splitter:NoneType=None, get_y:NoneType=None, get_x:NoneType=None
):
Generic container to quickly build Datasets and DataLoaders.
To build a DataBlock you need to give the library four things: the types of your input/labels, and at least two functions: get_items and splitter. You may also need to include get_x and get_y or a more generic list of getters that are applied to the results of get_items.
splitter is a callable which, when called with items, returns a tuple of iterables representing the indices of the training and validation data.
Once those are provided, you automatically get a Datasets or a DataLoaders:
DataBlock.datasets
def datasets(
source, # The data source
verbose:bool=False, # Show verbose messages
)->Datasets:
Create a Datasets object from source
DataBlock.dataloaders
def dataloaders(
source, # The data source
path:str='.', # Data source and default [`Learner`](https://docs.fast.ai/learner.html#learner) path
verbose:bool=False, # Show verbose messages
bs:int=64, # Size of batch
shuffle:bool=False, # Whether to shuffle data
num_workers:int=None, # Number of CPU cores to use in parallel (default: All available up to 16)
do_setup:bool=True, # Whether to run `setup()` for batch transform(s)
pin_memory:bool=False, timeout:int=0, batch_size:NoneType=None, drop_last:bool=False, indexed:NoneType=None,
n:NoneType=None, device:NoneType=None, persistent_workers:bool=False, pin_memory_device:str='',
wif:NoneType=None, before_iter:NoneType=None, after_item:NoneType=None, before_batch:NoneType=None,
after_batch:NoneType=None, after_iter:NoneType=None, create_batches:NoneType=None, create_item:NoneType=None,
create_batch:NoneType=None, retain:NoneType=None, get_idxs:NoneType=None, sample:NoneType=None,
shuffle_fn:NoneType=None, do_batch:NoneType=None
)->DataLoaders:
Create a DataLoaders object from source
You can create a DataBlock by passing functions:
mnist = DataBlock(blocks = (ImageBlock(cls=PILImageBW),CategoryBlock),
get_items = get_image_files,
splitter = GrandparentSplitter(),
get_y = parent_label)Each type comes with default transforms that will be applied:
- at the base level to create items in a tuple (usually input,target) from the base elements (like filenames)
- at the item level of the datasets
- at the batch level
They are called respectively type transforms, item transforms, batch transforms. In the case of MNIST, the type transforms are the method to create a PILImageBW (for the input) and the Categorize transform (for the target), the item transform is ToTensor and the batch transforms are Cuda and IntToFloatTensor. You can add any other transforms by passing them in DataBlock.datasets or DataBlock.dataloaders.
test_eq(mnist.type_tfms[0], [PILImageBW.create])
test_eq(mnist.type_tfms[1].map(type), [Categorize])
test_eq(mnist.default_item_tfms.map(type), [ToTensor])
test_eq(mnist.default_batch_tfms.map(type), [IntToFloatTensor])dsets = mnist.datasets(untar_data(URLs.MNIST_TINY))
test_eq(dsets.vocab, ['3', '7'])
x,y = dsets.train[0]
test_eq(x.size,(28,28))
show_at(dsets.train, 0, cmap='Greys', figsize=(2,2));
test_fail(lambda: DataBlock(wrong_kwarg=42, wrong_kwarg2='foo'))We can pass any number of blocks to DataBlock, we can then define what are the input and target blocks by changing n_inp. For example, defining n_inp=2 will consider the first two blocks passed as inputs and the others as targets.
mnist = DataBlock((ImageBlock, ImageBlock, CategoryBlock), get_items=get_image_files, splitter=GrandparentSplitter(),
get_y=parent_label)
dsets = mnist.datasets(untar_data(URLs.MNIST_TINY))
test_eq(mnist.n_inp, 2)
test_eq(len(dsets.train[0]), 3)test_fail(lambda: DataBlock((ImageBlock, ImageBlock, CategoryBlock), get_items=get_image_files, splitter=GrandparentSplitter(),
get_y=[parent_label, noop],
n_inp=2), msg='get_y contains 2 functions, but must contain 1 (one for each output)')mnist = DataBlock((ImageBlock, ImageBlock, CategoryBlock), get_items=get_image_files, splitter=GrandparentSplitter(),
n_inp=1,
get_y=[noop, Pipeline([noop, parent_label])])
dsets = mnist.datasets(untar_data(URLs.MNIST_TINY))
test_eq(len(dsets.train[0]), 3)Debugging
DataBlock.summary
def summary(
source, # The data source
bs:int=4, # The batch size
show_batch:bool=False, # Call [`show_batch`](https://docs.fast.ai/data.core.html#show_batch) after the summary
kwargs:VAR_KEYWORD
):
Steps through the transform pipeline for one batch, and optionally calls show_batch(**kwargs) on the transient Dataloaders.
DataBlock.summary
def summary(
source, # The data source
bs:int=4, # The batch size
show_batch:bool=False, # Call [`show_batch`](https://docs.fast.ai/data.core.html#show_batch) after the summary
kwargs:VAR_KEYWORD
):
Steps through the transform pipeline for one batch, and optionally calls show_batch(**kwargs) on the transient Dataloaders.
Besides stepping through the transformation, summary() provides a shortcut dls.show_batch(...), to see the data. E.g.
pets.summary(path/"images", bs=8, show_batch=True, unique=True,...)
is a shortcut to:
pets.summary(path/"images", bs=8)
dls = pets.dataloaders(path/"images", bs=8)
dls.show_batch(unique=True,...) # See different tfms effect on the same image.