diff --git a/.gitignore b/.gitignore index b79f322..f0d32b3 100644 --- a/.gitignore +++ b/.gitignore @@ -179,4 +179,11 @@ cython_debug/ **/*.csv **/*.jpg +benchmarking/ + +# Models +**/*.keras +**/*.pt +**/*.csv + benchmarking/ \ No newline at end of file diff --git a/BLOG.md b/BLOG.md new file mode 100644 index 0000000..f9270ae --- /dev/null +++ b/BLOG.md @@ -0,0 +1,370 @@ +# TensorFlow βš”οΈ PyTorch + +Both TensorFlow and PyTorch are powerhouses in the deep learning world. Google open-sourced TensorFlow in 2015, and a year later, Facebook (now Meta) dropped PyTorch into the ring in 2016. + +πŸ€” But the million-dollar question is... which one is better? + +Well, instead of starting a flame war πŸ”₯, let’s take a look at the adoption trends in the AI community and see where the battle is heading! πŸ“ˆ + +![Tensorflow VS Pytorch Trend](assets/google_trend.png) + +In recent years, PyTorch has been on πŸ”₯, especially in the research community! πŸš€ + +**Why PyTorch is Winning Researchers Over:** + +1️⃣ More Pythonic & Easy to Use – It feels like writing native Python code, giving developers more control and easier debugging compared to TensorFlow. +2️⃣ Rapid Prototyping – Researchers love it because they can iterate faster and experiment more freely, which is why so many cutting-edge papers and models are implemented in PyTorch first. + +But Wait! TensorFlow Still Packs a Punch πŸ’ͺ + +**While PyTorch shines in research, TensorFlow dominates in production:** + +1️⃣ Built for Large-Scale Production – Whether it's serving massive models on the cloud with TensorFlow Serving or deploying AI on edge devices with TensorFlow Lite (TFLite), it's the go-to framework for real-world applications. +2️⃣ Distributed Training Powerhouse – Need to train gigantic models? TensorFlow handles large-scale distributed training like a champ, and it even runs on Google’s TPUs for insane performance boosts. πŸš€ + +![TF vs PT](assets/tfvspt.png) + +PyTorch is the researcher’s best friend, but when it’s time to scale up and go big, TensorFlow still holds the πŸ‘‘ + +--- +--- + + +## Benchmarking Showdown ⚑ + +Let's take [CIFAR100](https://www.cs.toronto.edu/~kriz/cifar.html) dataset & compare both of the frameworks accross multiple aspects. + +### CIFAR100 + +This dataset is just like the CIFAR-10, except it has 100 classes containing 600 images each. There are 500 training images and 100 testing images per class. + +Total Training data: 50,000 +Total Testing data: 10,000 + +![CIFAR100](assets/train_data.jpg) + +Both of the frameworks have APIs to load this dataset: + +```python +import tensorflow as tf +import torchvision + +# tensorflow +(x_train, y_train), (x_test, y_test) = tf.keras.datasets.cifar100.load_data() + +# pytorch +train = torchvision.datasets.CIFAR100(train=True) +test = torchvision.datasets.CIFAR100(train=False) +``` + +But most of the real world datasets won't fully fit in the memory. So to simulate this scenario, we will upscale the images to 64x64 & save them to disk as jpegs. + +Directory Structure +``` +cifar100/ +β”œβ”€β”€ train +β”‚Β Β  β”œβ”€β”€ 0 +| |-- 1.jpg +| |-- 2.jpg +β”‚Β Β  β”œβ”€β”€ 1 +β”‚Β Β  β”œβ”€β”€ 10 +``` + +These will be then read, preprocessed in dataloading part. + +### Reproducibility + +To ensure reproducibility, we will set `seed` for these frameworks and other libraries. (ex: numpy, random etc.) + +### Dataloaders + +#### Tensorflow: + +We will be using `tf.data` api to build dataloader with the following operations: +* Preprocessing + * Resizing - 32x32 + * Normalizing + * Get label from `image_path` + * One hot encode the label +* Caching +* Augmenting +* Shuffle if train data +* Batching +* Prefetcing +* Repeat if train data + +Example: +```python +# train dataloader +dataloader = ( + tf.data.Dataset.from_tensor_slices(paths) + .map(preprocess, num_parallel_calls=AUTOTUNE) + .cache() + .map(augment, num_parallel_calls=AUTOTUNE) + .shuffle(len(paths)) + .batch(BS) + .prefetch(AUTOTUNE) + .repeat() +) +``` +YES! It's that easy. + +#### Pytorch: + +Here we need to do two things, first create a Dataset & then create a Dataloader from it: + +To create a Dataset we will be using `torch.utils.data.Dataset` having these steps: + +* Read image +* Get label from `image_path` +* Tranform the image + * Augmenting + * Resizing + * Normalizing + +Example: + +```python +class ClassificationDataset(Dataset): + + def __init__( + self, + config: Config, + paths: list[str], + transforms=None, + augmentations=None, + cache: bool = False, + ) -> None: + self.config = config + self.paths = paths + self.transforms = transforms + self.augmentations = augmentations + self.cache = None + if cache: + self.cache = SharedCache( + size_limit_gib=get_shm_size(), + dataset_len=len(self.paths), + data_dims=(self.config.imgsz[-1], *self.config.imgsz[:-1]), + dtype=torch.float32, + ) + + def __len__(self) -> None: + return len(self.paths) + + def __getitem__(self, idx: int): + path = self.paths[idx] + label = int(path.split(os.path.sep)[-2]) + image = None + if self.cache: + image = self.cache.get_slot(idx) + if image is None: + image = Image.open(path).convert('RGB') + if self.transforms: + image = self.transforms(image) + if self.cache: + self.cache.set_slot(idx, image) + if self.augmentations: + image = self.augmentations(image) + return image, label +``` + +After this we need to build a Dataloader using `torch.utils.data.Dataloader`: + +```python +# train dataloader +dataloader = DataLoader( + dataset=dataset, + batch_size=sbatch_size, + shuffle=True, + num_workers=num_workers, +) +``` + +### Model + +We will be using the same naive 4 layer CNN in both of the frameworks: + +``` +┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━┓ +┃ Layer (type) ┃ Output Shape ┃ Param # ┃ +┑━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━┩ +β”‚ conv2d (Conv2D) β”‚ (None, 32, 32, 32) β”‚ 896 β”‚ +β”œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”Όβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”Όβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€ +β”‚ batch_normalization β”‚ (None, 32, 32, 32) β”‚ 128 β”‚ +β”‚ (BatchNormalization) β”‚ β”‚ β”‚ +β”œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”Όβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”Όβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€ +β”‚ re_lu (ReLU) β”‚ (None, 32, 32, 32) β”‚ 0 β”‚ +β”œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”Όβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”Όβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€ +β”‚ max_pooling2d (MaxPooling2D) β”‚ (None, 16, 16, 32) β”‚ 0 β”‚ +β”œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”Όβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”Όβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€ +β”‚ conv2d_1 (Conv2D) β”‚ (None, 16, 16, 64) β”‚ 18,496 β”‚ +β”œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”Όβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”Όβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€ +β”‚ batch_normalization_1 β”‚ (None, 16, 16, 64) β”‚ 256 β”‚ +β”‚ (BatchNormalization) β”‚ β”‚ β”‚ +β”œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”Όβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”Όβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€ +β”‚ re_lu_1 (ReLU) β”‚ (None, 16, 16, 64) β”‚ 0 β”‚ +β”œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”Όβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”Όβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€ +β”‚ max_pooling2d_1 (MaxPooling2D) β”‚ (None, 8, 8, 64) β”‚ 0 β”‚ +β”œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”Όβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”Όβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€ +β”‚ conv2d_2 (Conv2D) β”‚ (None, 8, 8, 128) β”‚ 73,856 β”‚ +β”œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”Όβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”Όβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€ +β”‚ batch_normalization_2 β”‚ (None, 8, 8, 128) β”‚ 512 β”‚ +β”‚ (BatchNormalization) β”‚ β”‚ β”‚ +β”œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”Όβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”Όβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€ +β”‚ re_lu_2 (ReLU) β”‚ (None, 8, 8, 128) β”‚ 0 β”‚ +β”œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”Όβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”Όβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€ +β”‚ max_pooling2d_2 (MaxPooling2D) β”‚ (None, 4, 4, 128) β”‚ 0 β”‚ +β”œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”Όβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”Όβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€ +β”‚ conv2d_3 (Conv2D) β”‚ (None, 4, 4, 128) β”‚ 147,584 β”‚ +β”œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”Όβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”Όβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€ +β”‚ batch_normalization_3 β”‚ (None, 4, 4, 128) β”‚ 512 β”‚ +β”‚ (BatchNormalization) β”‚ β”‚ β”‚ +β”œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”Όβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”Όβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€ +β”‚ re_lu_3 (ReLU) β”‚ (None, 4, 4, 128) β”‚ 0 β”‚ +β”œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”Όβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”Όβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€ +β”‚ max_pooling2d_3 (MaxPooling2D) β”‚ (None, 2, 2, 128) β”‚ 0 β”‚ +β”œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”Όβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”Όβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€ +β”‚ global_average_pooling2d β”‚ (None, 128) β”‚ 0 β”‚ +β”‚ (GlobalAveragePooling2D) β”‚ β”‚ β”‚ +β”œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”Όβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”Όβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€ +β”‚ dense (Dense) β”‚ (None, 128) β”‚ 16,512 β”‚ +β”œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”Όβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”Όβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€ +β”‚ dense_1 (Dense) β”‚ (None, 100) β”‚ 12,900 β”‚ +β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”΄β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”΄β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ +Total params: 271,652 (1.04 MB) +Trainable params: 270,948 (1.03 MB) +Non-trainable params: 704 (2.75 KB) +``` + +### Optimizer, Loss & Metrics + +Optimizer: Adam +Loss: CrossEntropyLoss +Metrics: Accuracy + +### Training + +Train the model for some epochs & capture the training history (loss and accuracy) for each epoch. + +Here is an example history plotted for a run: + +![History](assets/history.png) + +### Save the Model + +Tensorflow: `.keras` +Pytorch: torchscript `.pt` + +### Evaluation + +* Build dataloaders for test dataset & evaluate the trained model. +* Save the results + +### Benchmarking Stats + +Along with all the above mentioned steps, we will be logging the time taken for each step to execute: + +Time taken for: + +* Data Loading +* Model Building +* Model Training +* Model Saving +* Model Loading +* Evaluation + +--- +--- + +## Benchmarking Results πŸŽ–οΈ + +After running with different combinations of training parameters (batch-size, num_workers etc) & devices, here are the results πŸ₯ + +### SEED: 25 | Epochs: 20 + +#### CPU + +Name: Intel(R) Core(TM) Ultra 7 155H + +Without Augmentations: +| Framework | Batch Size | Num Workers | Caching | Training Time | Eval Time | +|------------|------------|------------|---------|---------------|-----------| +| PyTorch | 32| 0 | False | 1518s | 11s | +| TensorFlow πŸ’― | 32 | NA | True | 1086s | 3s | +| PyTorch | 32 | 5 | False | 1402s | 6s | +| TensorFlow | 32 | NA | False | 1164s | 3s | + + +With Augmentations: +| Framework | Batch Size | Num Workers | Caching | Training Time | Eval Time | +|------------|------------|------------|---------|---------------|-----------| +| PyTorch | 16 | 0 | False | 1741s | 8s | +| PyTorch | 32 | 0 | False | 1589s | 8s | +| PyTorch | 32 | 0 | True | 1217s | 8s | +| PyTorch | 64 | 0 | False | 1443s | 8s | +| PyTorch | 32 | 5 | False | 1420s | 6s | +| PyTorch | 32 | 5 | True | 1301s | 6s | +| PyTorch | 32 | 10 | False | 1407s | 6s | +| PyTorch | 32 | 10 | True | 1305s | 7s | +| PyTorch | 32 | 15 | False | 1361s | 5s | +| PyTorch | 32 | 20 | False | 1404s | 7s | +| TensorFlow | 16 | NA | True | 1419s | 4s | +| TensorFlow | 32 | NA | True | 1151s | 3s | +| TensorFlow πŸ’― | 64 | NA | True | 1021s | 2s | + + +#### GPU + +Name: Tesla T4 + +With Augmentations: +| Framework | Batch Size | Num Workers | Caching | Training Time | Eval Time | +|------------|------------|------------|---------|---------------|-----------| +| PyTorch | 16 | 0 | True | 320s | 8s | +| PyTorch | 32 | 0 | False | 580s | 4s | +| PyTorch | 32 | 0 | True | 201s | 4s | +| PyTorch | 32 | 5 | False | 187s | 2s | +| PyTorch | 32 | 5 | True | 161s | 2s | +| PyTorch | 32 | 10 | False | 192s | 2s | +| PyTorch | 32 | 10 | True | 163s | 2s | +| PyTorch | 32 | 15 | True | 165s | 2s | +| PyTorch | 32 | 20 | True | 168s | 2s | +| PyTorch | 64 | 0 | True | 155s | 4s | +| TensorFlow | 16 | NA | True | 199s | 3s | +| TensorFlow | 32 | NA | False | 143s | 3s | +| TensorFlow | 32 | NA | True | 126s | 3s | +| TensorFlow πŸ’― | 64 | NA | True | 94s | 2s | + + +--- + + +We also conducted experiments using a dataset large enough that it could not be cached in memory: + +Dataset: [DeepFashion](https://mmlab.ie.cuhk.edu.hk/projects/DeepFashion/AttributePrediction.html) +(_Dataset was formatted to follow a proper folder structure_) + +Data Size: +- Training: 249222 +- Testing: 40000 + +Image Size: 128 x 128 + +With Augmentations: +| Framework | Batch Size | Num Workers | Caching | Training Time | Eval Time | +|------------|------------|------------|---------|---------------|-----------| +| Pytorch | 32 | 5 | False | 4068s | 19s | +| Tensorflow πŸ’― | 32 | NA | False | 3663s | 10s | + + +## References + +- https://pyimagesearch.com/2021/06/21/data-pipelines-with-tf-data-and-tensorflow/ +- https://www.tensorflow.org/guide/data_performance +- https://opencv.org/blog/pytorch-vs-tensorflow/ +- https://charl-ai.github.io/blog/dataloaders/ +- https://github.com/szymonmaszke/torchdatasets +- https://pytorch.org/tutorials/recipes/recipes/tuning_guide.html + + +### Find the code here - [πŸš€](https://github.com/infocusp/tf_pt_benchmarking) \ No newline at end of file diff --git a/assets/google_trend.png b/assets/google_trend.png new file mode 100644 index 0000000..47f9477 Binary files /dev/null and b/assets/google_trend.png differ diff --git a/assets/history.png b/assets/history.png new file mode 100644 index 0000000..29d0b6d Binary files /dev/null and b/assets/history.png differ diff --git a/assets/tfvspt.png b/assets/tfvspt.png new file mode 100644 index 0000000..cc8e72a Binary files /dev/null and b/assets/tfvspt.png differ diff --git a/assets/train_data.jpg b/assets/train_data.jpg new file mode 100644 index 0000000..47a2c8f Binary files /dev/null and b/assets/train_data.jpg differ diff --git a/pyproject.toml b/pyproject.toml index 2844683..61b8e9e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -21,7 +21,7 @@ dependencies = [ "pydantic >= 2.10.6", "torchinfo >= 1.8.0", "stocaching >= 0.2.0", - "tensorflow >= 2.19.0", + "tensorflow[and-cuda] >= 2.19.0", "matplotlib >= 3.10.1", "torchvision >= 0.21.0", ] diff --git a/tfvspt/base.py b/tfvspt/base.py index 36a3c69..5984d8a 100644 --- a/tfvspt/base.py +++ b/tfvspt/base.py @@ -1,7 +1,5 @@ """Benchmarking Base.""" -import logging - import matplotlib.pyplot as plt import numpy as np diff --git a/tfvspt/pt/main.py b/tfvspt/pt/main.py index d54285a..b0c0c41 100644 --- a/tfvspt/pt/main.py +++ b/tfvspt/pt/main.py @@ -193,7 +193,7 @@ def train(self) -> None: # save model st = time.time() - torch.jit.script(model).save(str(self.config.output / "cifar100.pt")) + torch.jit.script(model).save(str(self.config.output / "last.pt")) self._log_stats("model_saving_time", time.time() - st) def eval(self) -> dict: @@ -202,7 +202,7 @@ def eval(self) -> dict: # load model st = time.time() - model = torch.jit.load(str(self.config.output / "cifar100.pt")) + model = torch.jit.load(str(self.config.output / "last.pt")) self._log_stats("model_loading_time", time.time() - st) # evaluate diff --git a/tfvspt/tf/main.py b/tfvspt/tf/main.py index 9b9e7fb..f6db4bc 100644 --- a/tfvspt/tf/main.py +++ b/tfvspt/tf/main.py @@ -49,7 +49,7 @@ def plot_images(self, dataloader, name: str) -> None: def get_callbacks(self) -> list: # train history logger csv_logger = tf.keras.callbacks.CSVLogger(str(self.config.output / - 'cifar100_training.csv'), + 'history.csv'), separator=",", append=False) return [csv_logger] @@ -97,7 +97,7 @@ def train(self) -> None: # save model st = time.time() - model.save(str(self.config.output / "cifar100.keras")) + model.save(str(self.config.output / "last.keras")) self._log_stats("model_saving_time", time.time() - st) def eval(self) -> dict: @@ -107,7 +107,7 @@ def eval(self) -> dict: # load model st = time.time() model = tf.keras.models.load_model( - str(self.config.output / "cifar100.keras")) + str(self.config.output / "last.keras")) self._log_stats("model_loading_time", time.time() - st) # evaluate