tmlabonte · pw-02 · Jul 3, 2023 · Jul 3, 2023 · Jul 3, 2023 · Jul 3, 2023
diff --git a/.gitignore b/.gitignore
@@ -3,5 +3,4 @@ ckpts
 data
 imgs
 lightning_logs
-out
-src/model/ops
+out
diff --git a/README.md b/README.md
@@ -19,7 +19,7 @@ sh make.sh
 
 To download the FGVC-Aircraft and FSOD datasets, use the following command. You can also download the iNaturalist 2017 dataset, but it is quite large, so we suggest starting with FGVC-Aircraft and FSOD.
 
-`python download.py --datasets fgvc fsod`
+`python download.py --datasets fgvc`
 
 To download our FSOD-800 pretrained Deformable DETR checkpoints, use:
 

diff --git a/cfgs/fgvc.yaml b/cfgs/fgvc.yaml
@@ -52,5 +52,5 @@ queries: 300
 
 accumulate_grad_batches: 2
 deterministic: True
-gpus: 4
+gpus: 1
 max_epochs: 30
diff --git a/reu_env.yml b/reu_env.yml
@@ -0,0 +1,189 @@
+name: reu
+channels:
+  - pytorch
+  - conda-forge
+  - defaults
+dependencies:
+  - aom=3.5.0
+  - brotli-python=1.0.9
+  - bzip2=1.0.8
+  - ca-certificates=2023.5.7
+  - cairo=1.16.0
+  - charset-normalizer=3.1.0
+  - dav1d=1.2.1
+  - expat=2.5.0
+  - ffmpeg=6.0.0
+  - filelock=3.12.2
+  - font-ttf-dejavu-sans-mono=2.37
+  - font-ttf-inconsolata=3.000
+  - font-ttf-source-code-pro=2.038
+  - font-ttf-ubuntu=0.83
+  - fontconfig=2.14.2
+  - fonts-conda-ecosystem=1
+  - fonts-conda-forge=1
+  - freetype=2.12.1
+  - fribidi=1.0.10
+  - gettext=0.21.1
+  - gmp=6.2.1
+  - gmpy2=2.1.2
+  - gnutls=3.7.8
+  - graphite2=1.3.13
+  - harfbuzz=7.3.0
+  - icu=72.1
+  - idna=3.4
+  - jinja2=3.1.2
+  - jpeg=9e
+  - lame=3.100
+  - lcms2=2.15
+  - lerc=4.0.0
+  - libass=0.17.1
+  - libblas=3.9.0
+  - libcblas=3.9.0
+  - libcxx=16.0.6
+  - libdeflate=1.17
+  - libexpat=2.5.0
+  - libffi=3.4.2
+  - libgfortran=5.0.0
+  - libgfortran5=12.2.0
+  - libglib=2.76.3
+  - libiconv=1.17
+  - libidn2=2.3.4
+  - liblapack=3.9.0
+  - libopenblas=0.3.23
+  - libopus=1.3.1
+  - libpng=1.6.39
+  - libsqlite=3.42.0
+  - libtasn1=4.19.0
+  - libtiff=4.5.0
+  - libunistring=0.9.10
+  - libvpx=1.13.0
+  - libwebp-base=1.3.1
+  - libxcb=1.13
+  - libxml2=2.11.4
+  - libzlib=1.2.13
+  - llvm-openmp=16.0.6
+  - markupsafe=2.1.3
+  - mpc=1.3.1
+  - mpfr=4.2.0
+  - mpmath=1.3.0
+  - ncurses=6.4
+  - nettle=3.8.1
+  - networkx=3.1
+  - openh264=2.3.1
+  - openjpeg=2.5.0
+  - openssl=3.1.1
+  - p11-kit=0.24.1
+  - pcre2=10.40
+  - pip=23.1.2
+  - pixman=0.40.0
+  - pthread-stubs=0.4
+  - pysocks=1.7.1
+  - python=3.9.16
+  - python_abi=3.9
+  - pytorch=2.0.1
+  - readline=8.2
+  - requests=2.31.0
+  - setuptools=68.0.0
+  - svt-av1=1.6.0
+  - sympy=1.12
+  - tk=8.6.12
+  - torchvision=0.15.2
+  - typing_extensions=4.7.1
+  - tzdata=2023c
+  - wheel=0.40.0
+  - x264=1!164.3095
+  - x265=3.5
+  - xorg-libxau=1.0.11
+  - xorg-libxdmcp=1.1.3
+  - xz=5.2.6
+  - zlib=1.2.13
+  - zstd=1.5.2
+  - pip:
+      - absl-py==1.4.0
+      - adal==1.2.7
+      - aiohttp==3.8.4
+      - aiosignal==1.3.1
+      - async-timeout==4.0.2
+      - attrs==23.1.0
+      - azure-common==1.1.28
+      - azure-core==1.27.1
+      - azure-graphrbac==0.61.1
+      - azure-mgmt-authorization==0.61.0
+      - azure-mgmt-containerregistry==10.1.0
+      - azure-mgmt-core==1.4.0
+      - azure-mgmt-keyvault==9.3.0
+      - azure-mgmt-resource==13.0.0
+      - azure-mgmt-storage==11.2.0
+      - azureml-core==1.35.0.post1
+      - backports-tempfile==1.0
+      - backports-weakref==1.0.post1
+      - beautifulsoup4==4.12.2
+      - cachetools==5.3.1
+      - certifi==2023.5.7
+      - cffi==1.15.1
+      - configargparse==1.5.3
+      - contextlib2==21.6.0
+      - contourpy==1.1.0
+      - cryptography==3.4.8
+      - cycler==0.11.0
+      - cython==0.29.35
+      - docker==5.0.3
+      - fonttools==4.40.0
+      - frozenlist==1.3.3
+      - fsspec==2023.6.0
+      - future==0.18.3
+      - gdown==4.4.0
+      - google-auth==2.21.0
+      - google-auth-oauthlib==0.4.6
+      - grpcio==1.56.0
+      - importlib-metadata==6.7.0
+      - importlib-resources==5.12.0
+      - isodate==0.6.1
+      - jeepney==0.8.0
+      - jmespath==0.10.0
+      - jsonpickle==2.2.0
+      - kiwisolver==1.4.4
+      - markdown==3.4.3
+      - matplotlib==3.7.1
+      - msrest==0.7.1
+      - msrestazure==0.6.4
+      - multidict==6.0.4
+      - ndg-httpsclient==0.5.1
+      - numpy==1.23.5
+      - oauthlib==3.2.2
+      - opencv-contrib-python==4.5.4.60
+      - packaging==23.1
+      - pathspec==0.11.1
+      - pillow==10.0.0
+      - protobuf==3.20.0
+      - pyasn1==0.5.0
+      - pyasn1-modules==0.3.0
+      - pycocotools==2.0.2
+      - pycparser==2.21
+      - pydeprecate==0.3.1
+      - pyjwt==2.7.0
+      - pyopenssl==20.0.1
+      - pyparsing==3.1.0
+      - python-dateutil==2.8.2
+      - pytorch-lightning==1.5.1
+      - pytz==2023.3
+      - pyyaml==6.0
+      - requests-oauthlib==1.3.1
+      - rsa==4.9
+      - ruamel-yaml==0.17.16
+      - ruamel-yaml-clib==0.2.7
+      - secretstorage==3.3.3
+      - six==1.16.0
+      - soupsieve==2.4.1
+      - sparsemax==0.1.9
+      - tensorboard==2.7.0
+      - tensorboard-data-server==0.6.1
+      - tensorboard-plugin-wit==1.8.1
+      - torchmetrics==0.11.4
+      - tqdm==4.65.0
+      - urllib3==1.26.6
+      - websocket-client==1.6.1
+      - werkzeug==2.3.6
+      - yarl==1.9.2
+      - zipp==3.15.0
+prefix: /Users/patrickwatters/opt/anaconda3/envs/reu
diff --git a/src/args.py b/src/args.py
@@ -3,7 +3,7 @@
 # Imports other packages.
 from configargparse import Parser
 from pytorch_lightning import Trainer
-
+import configargparse
 # Imports local packages.
 from model.ws_detr import WS_DETR
 
@@ -12,11 +12,22 @@ def parse_args():
     """Parses command line and config file arguments."""
 
     # Instantiates config arg parser with required config file.
-    parser = Parser(
-        args_for_setting_config_path=["-c", "--cfg", "--config"],
-        config_arg_is_required=True,
+    #parser = Parser(
+    #    args_for_setting_config_path=["-c", "--cfg", "--config"],
+    #    config_arg_is_required=True,
+    #)
+    parser = configargparse.ArgumentParser(
+        description='Transcribe text from speech using a speech recognition model on one CPU or GPU',
+        config_file_parser_class=configargparse.YAMLConfigFileParser,
+        formatter_class=configargparse.ArgumentDefaultsHelpFormatter)
+
+    parser.add_argument(
+        "--config", is_config_file=True, help="config file path", default='cfgs/fgvc.yaml'
     )
 
+    #parserb = Parser(de)
+
+
     # Adds command line, Trainer, and model arguments.
     parser = add_input_args(parser)
     parser = Trainer.add_argparse_args(parser)

diff --git a/src/main.py b/src/main.py
@@ -111,7 +111,7 @@ def load_trainer(args):
 
     # Sets DDP strategy for multi-GPU training.
     args.strategy = "ddp" if args.gpus > 1 else None
-
+    #args.strategy = None
     # Instantiates PL Trainer using args.
     callbacks = [checkpointer, progress_bar]
     trainer = Trainer.from_argparse_args(args, callbacks=callbacks)

diff --git a/src/model/deformable_transformer.py b/src/model/deformable_transformer.py
@@ -162,10 +162,10 @@ def __init__(self, args):
 
         # self attention
         self.self_attn = MSDeformAttn(
-            dim=args.hidden_dim,
-            feature_levels=args.feature_levels,
-            heads=args.heads,
-            points=args.enc_points,
+            d_model=args.hidden_dim,
+            n_levels=args.feature_levels,
+            n_heads=args.heads,
+            n_points=args.enc_points,
         )
         self.dropout1 = nn.Dropout(args.dropout)
         self.norm1 = nn.LayerNorm(args.hidden_dim)
@@ -236,10 +236,10 @@ def __init__(self, args):
 
         # cross attention
         self.cross_attn = MSDeformAttn(
-            dim=args.hidden_dim,
-            feature_levels=args.feature_levels,
-            heads=args.heads,
-            points=args.dec_points,
+            d_model=args.hidden_dim,
+            n_levels=args.feature_levels,
+            n_heads=args.heads,
+            n_points=args.dec_points,
         )
         self.dropout1 = nn.Dropout(args.dropout)
         self.norm1 = nn.LayerNorm(args.hidden_dim)

diff --git a/src/model/ops/functions/__init__.py b/src/model/ops/functions/__init__.py
@@ -0,0 +1,10 @@
+# ------------------------------------------------------------------------------------------------
+# Deformable DETR
+# Copyright (c) 2020 SenseTime. All Rights Reserved.
+# Licensed under the Apache License, Version 2.0 [see LICENSE for details]
+# ------------------------------------------------------------------------------------------------
+# Modified from https://github.com/chengdazhi/Deformable-Convolution-V2-PyTorch/tree/pytorch_1.0.0
+# ------------------------------------------------------------------------------------------------
+
+from .ms_deform_attn_func import MSDeformAttnFunction
+
diff --git a/src/model/ops/functions/ms_deform_attn_func.py b/src/model/ops/functions/ms_deform_attn_func.py
@@ -0,0 +1,75 @@
+# ------------------------------------------------------------------------------------------------
+# Deformable DETR
+# Copyright (c) 2020 SenseTime. All Rights Reserved.
+# Licensed under the Apache License, Version 2.0 [see LICENSE for details]
+# ------------------------------------------------------------------------------------------------
+# Modified from https://github.com/chengdazhi/Deformable-Convolution-V2-PyTorch/tree/pytorch_1.0.0
+# ------------------------------------------------------------------------------------------------
+
+# Copyright (c) Facebook, Inc. and its affiliates.
+# Modified by Bowen Cheng from https://github.com/fundamentalvision/Deformable-DETR
+
+from __future__ import absolute_import
+from __future__ import print_function
+from __future__ import division
+
+import torch
+import torch.nn.functional as F
+from torch.autograd import Function
+from torch.autograd.function import once_differentiable
+
+if torch.cuda.is_available():
+    try:
+        import MultiScaleDeformableAttention as MSDA
+    except ModuleNotFoundError as e:
+        info_string = (
+            "\n\nPlease compile MultiScaleDeformableAttention CUDA op with the following commands:\n"
+            "\t`cd mask2former/modeling/pixel_decoder/ops`\n"
+            "\t`sh make.sh`\n"
+        )
+        raise ModuleNotFoundError(info_string)
+else:
+    MultiScaleDeformableAttention = None
+
+
+class MSDeformAttnFunction(Function):
+    @staticmethod
+    def forward(ctx, value, value_spatial_shapes, value_level_start_index, sampling_locations, attention_weights, im2col_step):
+        ctx.im2col_step = im2col_step
+        output = MSDA.ms_deform_attn_forward(
+            value, value_spatial_shapes, value_level_start_index, sampling_locations, attention_weights, ctx.im2col_step)
+        ctx.save_for_backward(value, value_spatial_shapes, value_level_start_index, sampling_locations, attention_weights)
+        return output
+
+    @staticmethod
+    @once_differentiable
+    def backward(ctx, grad_output):
+        value, value_spatial_shapes, value_level_start_index, sampling_locations, attention_weights = ctx.saved_tensors
+        grad_value, grad_sampling_loc, grad_attn_weight = \
+            MSDA.ms_deform_attn_backward(
+                value, value_spatial_shapes, value_level_start_index, sampling_locations, attention_weights, grad_output, ctx.im2col_step)
+
+        return grad_value, None, None, grad_sampling_loc, grad_attn_weight, None
+
+
+def ms_deform_attn_core_pytorch(value, value_spatial_shapes, sampling_locations, attention_weights):
+    # for debug and test only,
+    # need to use cuda version instead
+    N_, S_, M_, D_ = value.shape
+    _, Lq_, M_, L_, P_, _ = sampling_locations.shape
+    value_list = value.split([H_ * W_ for H_, W_ in value_spatial_shapes], dim=1)
+    sampling_grids = 2 * sampling_locations - 1
+    sampling_value_list = []
+    for lid_, (H_, W_) in enumerate(value_spatial_shapes):
+        # N_, H_*W_, M_, D_ -> N_, H_*W_, M_*D_ -> N_, M_*D_, H_*W_ -> N_*M_, D_, H_, W_
+        value_l_ = value_list[lid_].flatten(2).transpose(1, 2).reshape(N_*M_, D_, H_, W_)
+        # N_, Lq_, M_, P_, 2 -> N_, M_, Lq_, P_, 2 -> N_*M_, Lq_, P_, 2
+        sampling_grid_l_ = sampling_grids[:, :, :, lid_].transpose(1, 2).flatten(0, 1)
+        # N_*M_, D_, Lq_, P_
+        sampling_value_l_ = F.grid_sample(value_l_, sampling_grid_l_,
+                                          mode='bilinear', padding_mode='zeros', align_corners=False)
+        sampling_value_list.append(sampling_value_l_)
+    # (N_, Lq_, M_, L_, P_) -> (N_, M_, Lq_, L_, P_) -> (N_, M_, 1, Lq_, L_*P_)
+    attention_weights = attention_weights.transpose(1, 2).reshape(N_*M_, 1, Lq_, L_*P_)
+    output = (torch.stack(sampling_value_list, dim=-2).flatten(-2) * attention_weights).sum(-1).view(N_, M_*D_, Lq_)
+    return output.transpose(1, 2).contiguous()
diff --git a/src/model/ops/make.sh b/src/model/ops/make.sh
@@ -0,0 +1,13 @@
+#!/usr/bin/env bash
+# ------------------------------------------------------------------------------------------------
+# Deformable DETR
+# Copyright (c) 2020 SenseTime. All Rights Reserved.
+# Licensed under the Apache License, Version 2.0 [see LICENSE for details]
+# ------------------------------------------------------------------------------------------------
+# Modified from https://github.com/chengdazhi/Deformable-Convolution-V2-PyTorch/tree/pytorch_1.0.0
+# ------------------------------------------------------------------------------------------------
+
+# Copyright (c) Facebook, Inc. and its affiliates.
+# Modified by Bowen Cheng from https://github.com/fundamentalvision/Deformable-DETR
+
+python setup.py build install
diff --git a/src/model/ops/modules/__init__.py b/src/model/ops/modules/__init__.py
@@ -0,0 +1,10 @@
+# ------------------------------------------------------------------------------------------------
+# Deformable DETR
+# Copyright (c) 2020 SenseTime. All Rights Reserved.
+# Licensed under the Apache License, Version 2.0 [see LICENSE for details]
+# ------------------------------------------------------------------------------------------------
+# Modified from https://github.com/chengdazhi/Deformable-Convolution-V2-PyTorch/tree/pytorch_1.0.0
+# ------------------------------------------------------------------------------------------------
+
+
+from .ms_deform_attn import MSDeformAttn
-Original file line number
+Diff line change
@@ Expand Up / @@ -3,5 +3,4 @@ ckpts @@
     data
     imgs
     lightning_logs
-    out
-    src/model/ops
+    out