Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 1 addition & 2 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -3,5 +3,4 @@ ckpts
data
imgs
lightning_logs
out
src/model/ops
out
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ sh make.sh

To download the FGVC-Aircraft and FSOD datasets, use the following command. You can also download the iNaturalist 2017 dataset, but it is quite large, so we suggest starting with FGVC-Aircraft and FSOD.

`python download.py --datasets fgvc fsod`
`python download.py --datasets fgvc`

To download our FSOD-800 pretrained Deformable DETR checkpoints, use:

Expand Down
2 changes: 1 addition & 1 deletion cfgs/fgvc.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -52,5 +52,5 @@ queries: 300

accumulate_grad_batches: 2
deterministic: True
gpus: 4
gpus: 1
max_epochs: 30
189 changes: 189 additions & 0 deletions reu_env.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,189 @@
name: reu
channels:
- pytorch
- conda-forge
- defaults
dependencies:
- aom=3.5.0
- brotli-python=1.0.9
- bzip2=1.0.8
- ca-certificates=2023.5.7
- cairo=1.16.0
- charset-normalizer=3.1.0
- dav1d=1.2.1
- expat=2.5.0
- ffmpeg=6.0.0
- filelock=3.12.2
- font-ttf-dejavu-sans-mono=2.37
- font-ttf-inconsolata=3.000
- font-ttf-source-code-pro=2.038
- font-ttf-ubuntu=0.83
- fontconfig=2.14.2
- fonts-conda-ecosystem=1
- fonts-conda-forge=1
- freetype=2.12.1
- fribidi=1.0.10
- gettext=0.21.1
- gmp=6.2.1
- gmpy2=2.1.2
- gnutls=3.7.8
- graphite2=1.3.13
- harfbuzz=7.3.0
- icu=72.1
- idna=3.4
- jinja2=3.1.2
- jpeg=9e
- lame=3.100
- lcms2=2.15
- lerc=4.0.0
- libass=0.17.1
- libblas=3.9.0
- libcblas=3.9.0
- libcxx=16.0.6
- libdeflate=1.17
- libexpat=2.5.0
- libffi=3.4.2
- libgfortran=5.0.0
- libgfortran5=12.2.0
- libglib=2.76.3
- libiconv=1.17
- libidn2=2.3.4
- liblapack=3.9.0
- libopenblas=0.3.23
- libopus=1.3.1
- libpng=1.6.39
- libsqlite=3.42.0
- libtasn1=4.19.0
- libtiff=4.5.0
- libunistring=0.9.10
- libvpx=1.13.0
- libwebp-base=1.3.1
- libxcb=1.13
- libxml2=2.11.4
- libzlib=1.2.13
- llvm-openmp=16.0.6
- markupsafe=2.1.3
- mpc=1.3.1
- mpfr=4.2.0
- mpmath=1.3.0
- ncurses=6.4
- nettle=3.8.1
- networkx=3.1
- openh264=2.3.1
- openjpeg=2.5.0
- openssl=3.1.1
- p11-kit=0.24.1
- pcre2=10.40
- pip=23.1.2
- pixman=0.40.0
- pthread-stubs=0.4
- pysocks=1.7.1
- python=3.9.16
- python_abi=3.9
- pytorch=2.0.1
- readline=8.2
- requests=2.31.0
- setuptools=68.0.0
- svt-av1=1.6.0
- sympy=1.12
- tk=8.6.12
- torchvision=0.15.2
- typing_extensions=4.7.1
- tzdata=2023c
- wheel=0.40.0
- x264=1!164.3095
- x265=3.5
- xorg-libxau=1.0.11
- xorg-libxdmcp=1.1.3
- xz=5.2.6
- zlib=1.2.13
- zstd=1.5.2
- pip:
- absl-py==1.4.0
- adal==1.2.7
- aiohttp==3.8.4
- aiosignal==1.3.1
- async-timeout==4.0.2
- attrs==23.1.0
- azure-common==1.1.28
- azure-core==1.27.1
- azure-graphrbac==0.61.1
- azure-mgmt-authorization==0.61.0
- azure-mgmt-containerregistry==10.1.0
- azure-mgmt-core==1.4.0
- azure-mgmt-keyvault==9.3.0
- azure-mgmt-resource==13.0.0
- azure-mgmt-storage==11.2.0
- azureml-core==1.35.0.post1
- backports-tempfile==1.0
- backports-weakref==1.0.post1
- beautifulsoup4==4.12.2
- cachetools==5.3.1
- certifi==2023.5.7
- cffi==1.15.1
- configargparse==1.5.3
- contextlib2==21.6.0
- contourpy==1.1.0
- cryptography==3.4.8
- cycler==0.11.0
- cython==0.29.35
- docker==5.0.3
- fonttools==4.40.0
- frozenlist==1.3.3
- fsspec==2023.6.0
- future==0.18.3
- gdown==4.4.0
- google-auth==2.21.0
- google-auth-oauthlib==0.4.6
- grpcio==1.56.0
- importlib-metadata==6.7.0
- importlib-resources==5.12.0
- isodate==0.6.1
- jeepney==0.8.0
- jmespath==0.10.0
- jsonpickle==2.2.0
- kiwisolver==1.4.4
- markdown==3.4.3
- matplotlib==3.7.1
- msrest==0.7.1
- msrestazure==0.6.4
- multidict==6.0.4
- ndg-httpsclient==0.5.1
- numpy==1.23.5
- oauthlib==3.2.2
- opencv-contrib-python==4.5.4.60
- packaging==23.1
- pathspec==0.11.1
- pillow==10.0.0
- protobuf==3.20.0
- pyasn1==0.5.0
- pyasn1-modules==0.3.0
- pycocotools==2.0.2
- pycparser==2.21
- pydeprecate==0.3.1
- pyjwt==2.7.0
- pyopenssl==20.0.1
- pyparsing==3.1.0
- python-dateutil==2.8.2
- pytorch-lightning==1.5.1
- pytz==2023.3
- pyyaml==6.0
- requests-oauthlib==1.3.1
- rsa==4.9
- ruamel-yaml==0.17.16
- ruamel-yaml-clib==0.2.7
- secretstorage==3.3.3
- six==1.16.0
- soupsieve==2.4.1
- sparsemax==0.1.9
- tensorboard==2.7.0
- tensorboard-data-server==0.6.1
- tensorboard-plugin-wit==1.8.1
- torchmetrics==0.11.4
- tqdm==4.65.0
- urllib3==1.26.6
- websocket-client==1.6.1
- werkzeug==2.3.6
- yarl==1.9.2
- zipp==3.15.0
prefix: /Users/patrickwatters/opt/anaconda3/envs/reu
19 changes: 15 additions & 4 deletions src/args.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
# Imports other packages.
from configargparse import Parser
from pytorch_lightning import Trainer

import configargparse
# Imports local packages.
from model.ws_detr import WS_DETR

Expand All @@ -12,11 +12,22 @@ def parse_args():
"""Parses command line and config file arguments."""

# Instantiates config arg parser with required config file.
parser = Parser(
args_for_setting_config_path=["-c", "--cfg", "--config"],
config_arg_is_required=True,
#parser = Parser(
# args_for_setting_config_path=["-c", "--cfg", "--config"],
# config_arg_is_required=True,
#)
parser = configargparse.ArgumentParser(
description='Transcribe text from speech using a speech recognition model on one CPU or GPU',
config_file_parser_class=configargparse.YAMLConfigFileParser,
formatter_class=configargparse.ArgumentDefaultsHelpFormatter)

parser.add_argument(
"--config", is_config_file=True, help="config file path", default='cfgs/fgvc.yaml'
)

#parserb = Parser(de)


# Adds command line, Trainer, and model arguments.
parser = add_input_args(parser)
parser = Trainer.add_argparse_args(parser)
Expand Down
2 changes: 1 addition & 1 deletion src/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -111,7 +111,7 @@ def load_trainer(args):

# Sets DDP strategy for multi-GPU training.
args.strategy = "ddp" if args.gpus > 1 else None

#args.strategy = None
# Instantiates PL Trainer using args.
callbacks = [checkpointer, progress_bar]
trainer = Trainer.from_argparse_args(args, callbacks=callbacks)
Expand Down
16 changes: 8 additions & 8 deletions src/model/deformable_transformer.py
Original file line number Diff line number Diff line change
Expand Up @@ -162,10 +162,10 @@ def __init__(self, args):

# self attention
self.self_attn = MSDeformAttn(
dim=args.hidden_dim,
feature_levels=args.feature_levels,
heads=args.heads,
points=args.enc_points,
d_model=args.hidden_dim,
n_levels=args.feature_levels,
n_heads=args.heads,
n_points=args.enc_points,
)
self.dropout1 = nn.Dropout(args.dropout)
self.norm1 = nn.LayerNorm(args.hidden_dim)
Expand Down Expand Up @@ -236,10 +236,10 @@ def __init__(self, args):

# cross attention
self.cross_attn = MSDeformAttn(
dim=args.hidden_dim,
feature_levels=args.feature_levels,
heads=args.heads,
points=args.dec_points,
d_model=args.hidden_dim,
n_levels=args.feature_levels,
n_heads=args.heads,
n_points=args.dec_points,
)
self.dropout1 = nn.Dropout(args.dropout)
self.norm1 = nn.LayerNorm(args.hidden_dim)
Expand Down
10 changes: 10 additions & 0 deletions src/model/ops/functions/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
# ------------------------------------------------------------------------------------------------
# Deformable DETR
# Copyright (c) 2020 SenseTime. All Rights Reserved.
# Licensed under the Apache License, Version 2.0 [see LICENSE for details]
# ------------------------------------------------------------------------------------------------
# Modified from https://github.com/chengdazhi/Deformable-Convolution-V2-PyTorch/tree/pytorch_1.0.0
# ------------------------------------------------------------------------------------------------

from .ms_deform_attn_func import MSDeformAttnFunction

75 changes: 75 additions & 0 deletions src/model/ops/functions/ms_deform_attn_func.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
# ------------------------------------------------------------------------------------------------
# Deformable DETR
# Copyright (c) 2020 SenseTime. All Rights Reserved.
# Licensed under the Apache License, Version 2.0 [see LICENSE for details]
# ------------------------------------------------------------------------------------------------
# Modified from https://github.com/chengdazhi/Deformable-Convolution-V2-PyTorch/tree/pytorch_1.0.0
# ------------------------------------------------------------------------------------------------

# Copyright (c) Facebook, Inc. and its affiliates.
# Modified by Bowen Cheng from https://github.com/fundamentalvision/Deformable-DETR

from __future__ import absolute_import
from __future__ import print_function
from __future__ import division

import torch
import torch.nn.functional as F
from torch.autograd import Function
from torch.autograd.function import once_differentiable

if torch.cuda.is_available():
try:
import MultiScaleDeformableAttention as MSDA
except ModuleNotFoundError as e:
info_string = (
"\n\nPlease compile MultiScaleDeformableAttention CUDA op with the following commands:\n"
"\t`cd mask2former/modeling/pixel_decoder/ops`\n"
"\t`sh make.sh`\n"
)
raise ModuleNotFoundError(info_string)
else:
MultiScaleDeformableAttention = None


class MSDeformAttnFunction(Function):
@staticmethod
def forward(ctx, value, value_spatial_shapes, value_level_start_index, sampling_locations, attention_weights, im2col_step):
ctx.im2col_step = im2col_step
output = MSDA.ms_deform_attn_forward(
value, value_spatial_shapes, value_level_start_index, sampling_locations, attention_weights, ctx.im2col_step)
ctx.save_for_backward(value, value_spatial_shapes, value_level_start_index, sampling_locations, attention_weights)
return output

@staticmethod
@once_differentiable
def backward(ctx, grad_output):
value, value_spatial_shapes, value_level_start_index, sampling_locations, attention_weights = ctx.saved_tensors
grad_value, grad_sampling_loc, grad_attn_weight = \
MSDA.ms_deform_attn_backward(
value, value_spatial_shapes, value_level_start_index, sampling_locations, attention_weights, grad_output, ctx.im2col_step)

return grad_value, None, None, grad_sampling_loc, grad_attn_weight, None


def ms_deform_attn_core_pytorch(value, value_spatial_shapes, sampling_locations, attention_weights):
# for debug and test only,
# need to use cuda version instead
N_, S_, M_, D_ = value.shape
_, Lq_, M_, L_, P_, _ = sampling_locations.shape
value_list = value.split([H_ * W_ for H_, W_ in value_spatial_shapes], dim=1)
sampling_grids = 2 * sampling_locations - 1
sampling_value_list = []
for lid_, (H_, W_) in enumerate(value_spatial_shapes):
# N_, H_*W_, M_, D_ -> N_, H_*W_, M_*D_ -> N_, M_*D_, H_*W_ -> N_*M_, D_, H_, W_
value_l_ = value_list[lid_].flatten(2).transpose(1, 2).reshape(N_*M_, D_, H_, W_)
# N_, Lq_, M_, P_, 2 -> N_, M_, Lq_, P_, 2 -> N_*M_, Lq_, P_, 2
sampling_grid_l_ = sampling_grids[:, :, :, lid_].transpose(1, 2).flatten(0, 1)
# N_*M_, D_, Lq_, P_
sampling_value_l_ = F.grid_sample(value_l_, sampling_grid_l_,
mode='bilinear', padding_mode='zeros', align_corners=False)
sampling_value_list.append(sampling_value_l_)
# (N_, Lq_, M_, L_, P_) -> (N_, M_, Lq_, L_, P_) -> (N_, M_, 1, Lq_, L_*P_)
attention_weights = attention_weights.transpose(1, 2).reshape(N_*M_, 1, Lq_, L_*P_)
output = (torch.stack(sampling_value_list, dim=-2).flatten(-2) * attention_weights).sum(-1).view(N_, M_*D_, Lq_)
return output.transpose(1, 2).contiguous()
13 changes: 13 additions & 0 deletions src/model/ops/make.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
#!/usr/bin/env bash
# ------------------------------------------------------------------------------------------------
# Deformable DETR
# Copyright (c) 2020 SenseTime. All Rights Reserved.
# Licensed under the Apache License, Version 2.0 [see LICENSE for details]
# ------------------------------------------------------------------------------------------------
# Modified from https://github.com/chengdazhi/Deformable-Convolution-V2-PyTorch/tree/pytorch_1.0.0
# ------------------------------------------------------------------------------------------------

# Copyright (c) Facebook, Inc. and its affiliates.
# Modified by Bowen Cheng from https://github.com/fundamentalvision/Deformable-DETR

python setup.py build install
10 changes: 10 additions & 0 deletions src/model/ops/modules/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
# ------------------------------------------------------------------------------------------------
# Deformable DETR
# Copyright (c) 2020 SenseTime. All Rights Reserved.
# Licensed under the Apache License, Version 2.0 [see LICENSE for details]
# ------------------------------------------------------------------------------------------------
# Modified from https://github.com/chengdazhi/Deformable-Convolution-V2-PyTorch/tree/pytorch_1.0.0
# ------------------------------------------------------------------------------------------------


from .ms_deform_attn import MSDeformAttn
Loading