DRAG/cache.py at main · ntuaislab/DRAG · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
"""
cache.py
--------
Script to cache embeddings from CLIP/DINOv2 models.

Arguments
---------
ckpt: Checkpoint name of the CLIP model.
"""

from copy import deepcopy
from pathlib import Path

import torch
from omegaconf import OmegaConf, open_dict
from torch import nn
from torch.utils.data import DataLoader, random_split
from tqdm import tqdm
from transformers.models.clip.modeling_clip import \
    CLIPVisionModelWithProjection

from runner.dataset import create_dataset, create_transforms


@torch.no_grad()
def cache(ckpt: str):
    """
    Cache image embeddings from CLIP/DINOv2 models.

    root/embedding
    └── <checkpoint>
        └── <dataset>
            ├── train
            │   ├── embeds_part01.pt  # private set, client uses it to fine-tune the model
            │   ├── embeds_part02.pt  # private set, client uses it to validate the model
            │   ├── embeds_part03.pt  # public set, adversary uses it to invert the data
            │   ├── embeds_part04.pt  # public set, adversary uses it to validate the **inversion model** (if needed)
            │   ├── labels_part01.pt
            │   ├── labels_part02.pt
            │   ├── labels_part03.pt
            │   └── labels_part04.pt
            └── val
                ├── embeds_part01.pt  # private set, client uses it to fine-tune the model
                ├── embeds_part02.pt  # private set, client uses it to validate the model
                ├── embeds_part03.pt  # public set, adversary uses it to invert the data
                ├── embeds_part04.pt  # public set, adversary uses it to validate the **inversion model** (if needed)
                ├── labels_part01.pt
                ├── labels_part02.pt
                ├── labels_part03.pt
                └── labels_part04.pt
    """

    config_str = f"""
checkpoint_dir: ./checkpoints/  # NOTE: Update this path if needed
dataset_dir: ./datasets/        # NOTE: Update this path if needed
workers: 16
batch_size: 128

model:
  name: CLIPVisionModelWithProjection
  torch_dtype: float32
  checkpoint: {ckpt}
  image_size: 224
  preprocess:
  - clip_vit_processor: {{}}

dataset:
  name: imagenet
"""
    config = OmegaConf.create(config_str)

    device = torch.device('cuda')

    ckpt_dir: Path = (
        Path(config.checkpoint_dir)
        / 'embedding'
        / config.model.checkpoint # First level
        / config.dataset.name     # Second level
    )
    ckpt_dir.mkdir(parents=True, exist_ok=True)
    (ckpt_dir / 'train').mkdir(parents=True, exist_ok=True)
    (ckpt_dir / 'val').mkdir(parents=True, exist_ok=True)

    dataset_config = deepcopy(config.dataset)
    with open_dict(dataset_config):
        dataset_config.dataset_dir = config.dataset_dir

    print(f'Checkpoint: {ckpt}')
    print(f'Target dir: {ckpt_dir}')
    # Use `create_model` for DINOv2
    # model = create_model(config.model.name, config.model.checkpoint,
    #                      parse_torch_dtype(config.model.torch_dtype)).to(device)
    model = CLIPVisionModelWithProjection.from_pretrained(config.model.checkpoint).to(device)

    # Disable next line for DINOv2
    model.visual_projection = nn.Identity() # type: ignore
    hidden_dim = model.config.hidden_size

    transform, _ = create_transforms(OmegaConf.to_object(config.model.preprocess)) # type: ignore
    train_dataset, _ = create_dataset(
        dataset_config.dataset_dir, dataset_config.name, split='train',
        transform=transform,
    )

    val_dataset, _ = create_dataset(
        dataset_config.dataset_dir, dataset_config.name, split='val',
        transform=transform,
    )

    private_dataset, public_dataset = random_split(
        train_dataset, [0.5, 0.5], torch.Generator().manual_seed(0))

    # Partition 01 and 02 for private set, 03 and 04 for public set
    private_train_dataset, private_val_dataset = random_split(
        private_dataset, [0.8, 0.2], torch.Generator().manual_seed(0))

    public_train_dataset, public_val_dataset = random_split(
        public_dataset, [0.8, 0.2], torch.Generator().manual_seed(0))

    for idx, dataset in enumerate([private_train_dataset,
                                   private_val_dataset,
                                   public_train_dataset,
                                   public_val_dataset], 1):
        if ((ckpt_dir / 'train' / f'embeds_part{idx:02d}.pt').exists()
            and (ckpt_dir / 'train' / f'labels_part{idx:02d}.pt').exists()
        ):
            print(f'Dataset {dataset_config.name} (train) part {idx:02d} already exists. Skip.')
            continue

        dataloader = DataLoader(dataset, batch_size=config.batch_size, num_workers=config.workers)
        f = torch.empty(len(dataset), hidden_dim, dtype=torch.float32, device=device)
        Y = torch.empty(len(dataset), dtype=torch.long, device=device)

        start = 0
        for im, label in tqdm(dataloader, desc=f'{dataset_config.name}{idx:02d}', ncols=0):
            im = im.to(device)
            bs = im.size(0)
            f[start:start+bs] = model(im).image_embeds
            Y[start:start+bs] = label
            start += bs

        f, Y = f.cpu(), Y.cpu()
        torch.save(f, ckpt_dir / 'train' / f'embeds_part{idx:02d}.pt')
        torch.save(Y, ckpt_dir / 'train' / f'labels_part{idx:02d}.pt')

    if (ckpt_dir / 'val' / 'embeds.pt').exists() and (ckpt_dir / 'val' / 'labels.pt').exists():
        print(f'Dataset {dataset_config.name} (validation) part already exists. Skip.')
        return

    dataloader = DataLoader(val_dataset, batch_size=config.batch_size, num_workers=config.workers)
    f = torch.empty(len(val_dataset), hidden_dim, dtype=torch.float32, device=device) # type: ignore
    Y = torch.empty(len(val_dataset), dtype=torch.long, device=device) # type: ignore

    start = 0
    for im, label in tqdm(dataloader, desc=f'{dataset_config.name}{idx:02d}', ncols=0):
        im = im.to(device)
        bs = im.size(0)
        f[start:start+bs] = model(im).image_embeds
        Y[start:start+bs] = label
        start += bs

    f, Y = f.cpu(), Y.cpu()
    torch.save(f, ckpt_dir / 'val' / 'embeds.pt')
    torch.save(Y, ckpt_dir / 'val' / 'labels.pt')


if __name__ == "__main__":
    cache('openai/clip-vit-base-patch16')