fedbox.datasets.cifar10

 1from torchvision.datasets import CIFAR10
 2from typing import Any, Union
 3
 4from . import utils
 5
 6
 7def cifar10(
 8    directory: str,
 9    n_subsets: int = 100,
10    heterogeneity_degree: float = None,
11    samples_variance: float = 0.0,
12    transform: Any = None,
13    target_transform: Any = None,
14    seed: int = None,
15    strict: bool = True
16) -> dict[str, list[utils.FederatedSubset]]:
17    '''
18    Loads the `CIFAR10` dataset and partitions it into `n_subsets` training subsets and 
19    `n_subsets` testing subsets according to heterogeneity parameters `heterogeneity_degree` 
20    and `samples_variance`.
21
22    Parameters
23    ----------
24    directory: str
25        Root directory to load the dataset
26    n_subsets: int
27        Number of datasets for splitting
28    heterogeneity_degree: float
29        Class heterogeneity degree, by default is homogeneous
30    samples_variance: float
31        Standard deviation (%) in the number of samples for each client, `0` by default
32    transform: Any
33        Transformation to apply on data samples, `None` by default
34    target_transform: Any
35       Transformation to apply on data labels, `None` by default
36    seed: int
37        Random seed initializer
38    strict: bool
39        In strict mode `heterogeneity_degree` and `samples_variance` are highly respected,
40        otherwise flexibility is allowed
41
42    Returns
43    -------
44    dict[str, list[utils.FederatedSubset]]
45        Returns the lists of subsets of training clients and testing clients
46    '''
47
48    training = CIFAR10(
49        root = directory, 
50        train = True, 
51        download = True,
52        transform = transform,
53        target_transform = target_transform
54    )
55
56    testing = CIFAR10(
57        root = directory, 
58        train = False, 
59        download = True,
60        transform = transform,
61        target_transform = target_transform
62    )
63
64    return {
65        'training': utils.partition(
66            training,
67            n_subsets = min(len(training), n_subsets),
68            n_classes = 10,
69            heterogeneity_degree = heterogeneity_degree,
70            samples_variance = samples_variance,
71            return_indices = False,
72            seed = seed,
73            strict = strict
74        ),
75        'testing': utils.partition(
76            testing,
77            n_subsets = min(len(testing), n_subsets),
78            n_classes = 10,
79            heterogeneity_degree = heterogeneity_degree,
80            samples_variance = samples_variance,
81            return_indices = False,
82            seed = seed,
83            strict = strict
84        )
85    }
def cifar10( directory: str, n_subsets: int = 100, heterogeneity_degree: float = None, samples_variance: float = 0.0, transform: Any = None, target_transform: Any = None, seed: int = None, strict: bool = True) -> dict[str, list[fedbox.datasets.utils.FederatedSubset]]:
 8def cifar10(
 9    directory: str,
10    n_subsets: int = 100,
11    heterogeneity_degree: float = None,
12    samples_variance: float = 0.0,
13    transform: Any = None,
14    target_transform: Any = None,
15    seed: int = None,
16    strict: bool = True
17) -> dict[str, list[utils.FederatedSubset]]:
18    '''
19    Loads the `CIFAR10` dataset and partitions it into `n_subsets` training subsets and 
20    `n_subsets` testing subsets according to heterogeneity parameters `heterogeneity_degree` 
21    and `samples_variance`.
22
23    Parameters
24    ----------
25    directory: str
26        Root directory to load the dataset
27    n_subsets: int
28        Number of datasets for splitting
29    heterogeneity_degree: float
30        Class heterogeneity degree, by default is homogeneous
31    samples_variance: float
32        Standard deviation (%) in the number of samples for each client, `0` by default
33    transform: Any
34        Transformation to apply on data samples, `None` by default
35    target_transform: Any
36       Transformation to apply on data labels, `None` by default
37    seed: int
38        Random seed initializer
39    strict: bool
40        In strict mode `heterogeneity_degree` and `samples_variance` are highly respected,
41        otherwise flexibility is allowed
42
43    Returns
44    -------
45    dict[str, list[utils.FederatedSubset]]
46        Returns the lists of subsets of training clients and testing clients
47    '''
48
49    training = CIFAR10(
50        root = directory, 
51        train = True, 
52        download = True,
53        transform = transform,
54        target_transform = target_transform
55    )
56
57    testing = CIFAR10(
58        root = directory, 
59        train = False, 
60        download = True,
61        transform = transform,
62        target_transform = target_transform
63    )
64
65    return {
66        'training': utils.partition(
67            training,
68            n_subsets = min(len(training), n_subsets),
69            n_classes = 10,
70            heterogeneity_degree = heterogeneity_degree,
71            samples_variance = samples_variance,
72            return_indices = False,
73            seed = seed,
74            strict = strict
75        ),
76        'testing': utils.partition(
77            testing,
78            n_subsets = min(len(testing), n_subsets),
79            n_classes = 10,
80            heterogeneity_degree = heterogeneity_degree,
81            samples_variance = samples_variance,
82            return_indices = False,
83            seed = seed,
84            strict = strict
85        )
86    }

Loads the CIFAR10 dataset and partitions it into n_subsets training subsets and n_subsets testing subsets according to heterogeneity parameters heterogeneity_degree and samples_variance.

Parameters
  • directory (str): Root directory to load the dataset
  • n_subsets (int): Number of datasets for splitting
  • heterogeneity_degree (float): Class heterogeneity degree, by default is homogeneous
  • samples_variance (float): Standard deviation (%) in the number of samples for each client, 0 by default
  • transform (Any): Transformation to apply on data samples, None by default
  • target_transform (Any): Transformation to apply on data labels, None by default
  • seed (int): Random seed initializer
  • strict (bool): In strict mode heterogeneity_degree and samples_variance are highly respected, otherwise flexibility is allowed
Returns
  • dict[str, list[utils.FederatedSubset]]: Returns the lists of subsets of training clients and testing clients