fedbox.datasets.femnist
1import os.path as path 2import torch 3from torch.utils.data import Dataset 4from typing import Any, Tuple, Union 5 6from . import utils 7 8 9class FEMNIST(Dataset): 10 ''' 11 This class loads the FEMNIST dataset, specifically only its 10 classes subset with written digit in [0, 9]. 12 13 Note 14 ---- 15 The dataset must be downloaded beforehand using the instructions in the file README.md. 16 ''' 17 18 def __init__( 19 self, 20 root: str, 21 train: bool = True, 22 transform: Any = None, 23 target_transform: Any = None, 24 download = True 25 ): 26 super().__init__() 27 28 self.root = root 29 self.train = train 30 self.transform = transform 31 self.target_transform = target_transform 32 self.download = download 33 self.data = None 34 self.targets = None 35 36 directory = path.join(self.root, 'femnist') 37 # NOTE: dataset must be downloaded first by following the instructions in README.md file 38 if path.exists(directory): 39 self.data, self.targets, _ = torch.load(path.join(directory, 'training.pt' if self.train else 'testing.pt')) 40 self.targets = self.targets.to(dtype = torch.float32) 41 else: 42 raise RuntimeError('FEMNIST dataset not found in directory {}. It must be downloaded first using the README.md file.'.format(root)) 43 44 self.data = self.data.unsqueeze(3) 45 self.targets = self.targets.to(dtype = torch.long) 46 47 def __getitem__(self, index: int) -> Tuple[torch.Tensor, int]: 48 x, y = self.data[index], self.targets[index] 49 50 if self.transform: 51 x = self.transform(x) 52 53 if self.target_transform: 54 y = self.target_transform(y) 55 56 return x, y 57 58 def __len__(self) -> int: 59 return self.data.shape[0] 60 61def femnist( 62 directory: str, 63 n_subsets: int = 1000, 64 heterogeneity_degree: float = None, 65 samples_variance: float = 0.0, 66 transform: Any = None, 67 target_transform: Any = None, 68 seed: int = None, 69 strict: bool = True 70) -> dict[str, list[utils.FederatedSubset]]: 71 ''' 72 Loads the `FEMNIST` dataset and partitions it into `n_subsets` training subsets and 73 `n_subsets` testing subsets according to heterogeneity parameters `heterogeneity_degree` 74 and `samples_variance`. 75 76 Parameters 77 ---------- 78 directory: str 79 Root directory to load the dataset 80 n_subsets: int 81 Number of datasets for splitting 82 heterogeneity_degree: float 83 Class heterogeneity degree, by default is homogeneous 84 samples_variance: float 85 Standard deviation (%) in the number of samples for each client, `0` by default 86 transform: Any 87 Transformation to apply on data samples, `None` by default 88 target_transform: Any 89 Transformation to apply on data labels, `None` by default 90 seed: int 91 Random seed initializer 92 strict: bool 93 In strict mode `heterogeneity_degree` and `samples_variance` are highly respected, 94 otherwise flexibility is allowed 95 96 Returns 97 ------- 98 dict[str, list[utils.FederatedSubset]] 99 Returns the lists of subsets of training clients and testing clients 100 ''' 101 102 training = FEMNIST( 103 root = directory, 104 train = True, 105 download = True, 106 transform = transform, 107 target_transform = target_transform 108 ) 109 110 testing = FEMNIST( 111 root = directory, 112 train = False, 113 download = True, 114 transform = transform, 115 target_transform = target_transform 116 ) 117 118 return { 119 'training': utils.partition( 120 training, 121 n_subsets = min(len(training), n_subsets), 122 n_classes = 10, 123 heterogeneity_degree = heterogeneity_degree, 124 samples_variance = samples_variance, 125 return_indices = False, 126 seed = seed, 127 strict = strict 128 ), 129 'testing': utils.partition( 130 testing, 131 n_subsets = min(len(testing), n_subsets), 132 n_classes = 10, 133 heterogeneity_degree = heterogeneity_degree, 134 samples_variance = samples_variance, 135 return_indices = False, 136 seed = seed, 137 strict = strict 138 ) 139 }
class
FEMNIST(typing.Generic[+T_co]):
10class FEMNIST(Dataset): 11 ''' 12 This class loads the FEMNIST dataset, specifically only its 10 classes subset with written digit in [0, 9]. 13 14 Note 15 ---- 16 The dataset must be downloaded beforehand using the instructions in the file README.md. 17 ''' 18 19 def __init__( 20 self, 21 root: str, 22 train: bool = True, 23 transform: Any = None, 24 target_transform: Any = None, 25 download = True 26 ): 27 super().__init__() 28 29 self.root = root 30 self.train = train 31 self.transform = transform 32 self.target_transform = target_transform 33 self.download = download 34 self.data = None 35 self.targets = None 36 37 directory = path.join(self.root, 'femnist') 38 # NOTE: dataset must be downloaded first by following the instructions in README.md file 39 if path.exists(directory): 40 self.data, self.targets, _ = torch.load(path.join(directory, 'training.pt' if self.train else 'testing.pt')) 41 self.targets = self.targets.to(dtype = torch.float32) 42 else: 43 raise RuntimeError('FEMNIST dataset not found in directory {}. It must be downloaded first using the README.md file.'.format(root)) 44 45 self.data = self.data.unsqueeze(3) 46 self.targets = self.targets.to(dtype = torch.long) 47 48 def __getitem__(self, index: int) -> Tuple[torch.Tensor, int]: 49 x, y = self.data[index], self.targets[index] 50 51 if self.transform: 52 x = self.transform(x) 53 54 if self.target_transform: 55 y = self.target_transform(y) 56 57 return x, y 58 59 def __len__(self) -> int: 60 return self.data.shape[0]
This class loads the FEMNIST dataset, specifically only its 10 classes subset with written digit in [0, 9].
Note
The dataset must be downloaded beforehand using the instructions in the file README.md.
FEMNIST( root: str, train: bool = True, transform: Any = None, target_transform: Any = None, download=True)
19 def __init__( 20 self, 21 root: str, 22 train: bool = True, 23 transform: Any = None, 24 target_transform: Any = None, 25 download = True 26 ): 27 super().__init__() 28 29 self.root = root 30 self.train = train 31 self.transform = transform 32 self.target_transform = target_transform 33 self.download = download 34 self.data = None 35 self.targets = None 36 37 directory = path.join(self.root, 'femnist') 38 # NOTE: dataset must be downloaded first by following the instructions in README.md file 39 if path.exists(directory): 40 self.data, self.targets, _ = torch.load(path.join(directory, 'training.pt' if self.train else 'testing.pt')) 41 self.targets = self.targets.to(dtype = torch.float32) 42 else: 43 raise RuntimeError('FEMNIST dataset not found in directory {}. It must be downloaded first using the README.md file.'.format(root)) 44 45 self.data = self.data.unsqueeze(3) 46 self.targets = self.targets.to(dtype = torch.long)
def
femnist( directory: str, n_subsets: int = 1000, heterogeneity_degree: float = None, samples_variance: float = 0.0, transform: Any = None, target_transform: Any = None, seed: int = None, strict: bool = True) -> dict[str, list[fedbox.datasets.utils.FederatedSubset]]:
62def femnist( 63 directory: str, 64 n_subsets: int = 1000, 65 heterogeneity_degree: float = None, 66 samples_variance: float = 0.0, 67 transform: Any = None, 68 target_transform: Any = None, 69 seed: int = None, 70 strict: bool = True 71) -> dict[str, list[utils.FederatedSubset]]: 72 ''' 73 Loads the `FEMNIST` dataset and partitions it into `n_subsets` training subsets and 74 `n_subsets` testing subsets according to heterogeneity parameters `heterogeneity_degree` 75 and `samples_variance`. 76 77 Parameters 78 ---------- 79 directory: str 80 Root directory to load the dataset 81 n_subsets: int 82 Number of datasets for splitting 83 heterogeneity_degree: float 84 Class heterogeneity degree, by default is homogeneous 85 samples_variance: float 86 Standard deviation (%) in the number of samples for each client, `0` by default 87 transform: Any 88 Transformation to apply on data samples, `None` by default 89 target_transform: Any 90 Transformation to apply on data labels, `None` by default 91 seed: int 92 Random seed initializer 93 strict: bool 94 In strict mode `heterogeneity_degree` and `samples_variance` are highly respected, 95 otherwise flexibility is allowed 96 97 Returns 98 ------- 99 dict[str, list[utils.FederatedSubset]] 100 Returns the lists of subsets of training clients and testing clients 101 ''' 102 103 training = FEMNIST( 104 root = directory, 105 train = True, 106 download = True, 107 transform = transform, 108 target_transform = target_transform 109 ) 110 111 testing = FEMNIST( 112 root = directory, 113 train = False, 114 download = True, 115 transform = transform, 116 target_transform = target_transform 117 ) 118 119 return { 120 'training': utils.partition( 121 training, 122 n_subsets = min(len(training), n_subsets), 123 n_classes = 10, 124 heterogeneity_degree = heterogeneity_degree, 125 samples_variance = samples_variance, 126 return_indices = False, 127 seed = seed, 128 strict = strict 129 ), 130 'testing': utils.partition( 131 testing, 132 n_subsets = min(len(testing), n_subsets), 133 n_classes = 10, 134 heterogeneity_degree = heterogeneity_degree, 135 samples_variance = samples_variance, 136 return_indices = False, 137 seed = seed, 138 strict = strict 139 ) 140 }
Loads the FEMNIST dataset and partitions it into n_subsets training subsets and
n_subsets testing subsets according to heterogeneity parameters heterogeneity_degree
and samples_variance.
Parameters
- directory (str): Root directory to load the dataset
- n_subsets (int): Number of datasets for splitting
- heterogeneity_degree (float): Class heterogeneity degree, by default is homogeneous
- samples_variance (float):
Standard deviation (%) in the number of samples for each client,
0by default - transform (Any):
Transformation to apply on data samples,
Noneby default - target_transform (Any):
Transformation to apply on data labels,
Noneby default - seed (int): Random seed initializer
- strict (bool):
In strict mode
heterogeneity_degreeandsamples_varianceare highly respected, otherwise flexibility is allowed
Returns
- dict[str, list[utils.FederatedSubset]]: Returns the lists of subsets of training clients and testing clients