diff --git a/.github/workflows/readme_installation.yml b/.github/workflows/readme_installation.yml new file mode 100644 index 0000000..bde2fc9 --- /dev/null +++ b/.github/workflows/readme_installation.yml @@ -0,0 +1,86 @@ +name: readme_installation + +on: + pull_request: + types: [opened, synchronize, reopened, closed] + branches: + - main + +jobs: + readme_installation: + runs-on: ubuntu-latest + timeout-minutes: 30 # change max time from default 6hr + + steps: + ####################### + # Configure + ####################### + + - name: Check out code repository (main branch) + uses: actions/checkout@v3 + with: + ref: ${{ github.base_ref }} + if: github.event.pull_request.merged == true + + - name: Check out code repository (head branch) + uses: actions/checkout@v3 + with: + ref: ${{ github.head_ref }} + if: github.event.pull_request.merged != true + + ####################### + # Build + ####################### + + - name: Write installation instructions (main branch) + run: | + cat > readme_installation_instructions.md <<"EOL" + Install **calipmatch** in Stata from the SSC repository: + ``` + ssc install calipmatch + ``` + + Or you can install **calipmatch** in Stata directly from this Github repository: + ``` + net install calipmatch, from(https://github.com/michaelstepner/calipmatch/raw/main) + ``` + EOL + if: github.event.pull_request.merged == true + + - name: Write installation instructions (head branch) + run: | + cat > readme_installation_instructions.md <<"EOL" + This development version of **calipmatch** can be installed using: + ``` + net install calipmatch, from(https://github.com/michaelstepner/calipmatch/raw/${GITHUB_HEAD_REF}) + ``` + EOL + + sed -i "s/\${GITHUB_HEAD_REF}/${GITHUB_HEAD_REF}/" readme_installation_instructions.md + if: github.event.pull_request.merged != true + + - name: Place installation instructions in README.md + run: | + sed -z -i 's/.*/\n\n/' README.md + sed -i '// r readme_installation_instructions.md' README.md + rm readme_installation_instructions.md + + ####################### + # Push + ####################### + + - name: Check if there are changes + run: | + set +e + test -z "$(git status --porcelain)" + echo "README_UPDATED=$?" >> $GITHUB_ENV + + - name: Push updated README to Github + run: | + git config user.name OppInsights-Bot + git config user.email info@opportunityinsights.org + git pull --ff-only + git add README.md + git commit -m "README: update installation instructions" + git push + if: env.README_UPDATED == 1 diff --git a/.github/workflows/readme_sthlp.yml b/.github/workflows/readme_sthlp.yml new file mode 100644 index 0000000..c6366ac --- /dev/null +++ b/.github/workflows/readme_sthlp.yml @@ -0,0 +1,67 @@ +name: readme_sthlp + +on: + pull_request: + types: [opened, synchronize, reopened] + paths: + - 'calipmatch.sthlp' + workflow_dispatch: + +jobs: + readme_sthlp: + runs-on: ubuntu-latest + timeout-minutes: 30 # change max time from default 6hr + + steps: + ####################### + # Configure + ####################### + + - name: Check out code repository (main branch) + uses: actions/checkout@v3 + if: github.ref == 'refs/heads/main' + + - name: Check out code repository (PR branch) + uses: actions/checkout@v3 + with: + ref: ${{ github.event.pull_request.head.ref }} + if: github.ref != 'refs/heads/main' + + - name: Install Stata + run: ./automation/install_stata.sh + env: + OI_HTTPS_PW: ${{ secrets.OI_HTTPS_PW }} + STATA_VERSION: 16 + + ####################### + # Build + ####################### + + - name: Convert Stata help file to HTML + run: stata-mp log html calipmatch.sthlp calipmatch.html + + - name: Place Stata help file in README.md + run: | + sed -z -i 's/.*/\n\n/' README.md + sed -i '// r calipmatch.html' README.md + rm calipmatch.html + + ####################### + # Push + ####################### + + - name: Check if there are changes + run: | + set +e + test -z "$(git status --porcelain)" + echo "README_UPDATED=$?" >> $GITHUB_ENV + + - name: Push updated README to Github + run: | + git config user.name OppInsights-Bot + git config user.email info@opportunityinsights.org + git pull --ff-only + git add README.md + git commit -m "README: update embedded calipmatch.sthlp" + git push + if: env.README_UPDATED == 1 diff --git a/.github/workflows/stata_tests.yml b/.github/workflows/stata_tests.yml index 8607ddf..d64f4e0 100644 --- a/.github/workflows/stata_tests.yml +++ b/.github/workflows/stata_tests.yml @@ -1,6 +1,8 @@ name: stata_tests on: + pull_request: + types: [opened, synchronize, reopened] workflow_dispatch: jobs: @@ -9,5 +11,28 @@ jobs: timeout-minutes: 30 # change max time from default 6hr steps: - - name: Hello World - run: echo "Hello World" + ####################### + # Configure + ####################### + + - name: Check out code repository + uses: actions/checkout@v3 + + - name: Install Stata + run: ./automation/install_stata.sh + env: + OI_HTTPS_PW: ${{ secrets.OI_HTTPS_PW }} + STATA_VERSION: 16 + + ####################### + # Build + ####################### + + - name: Run tests + run: | + stata-mp -b do test_calipmatch.do + cat test_calipmatch.log + if egrep --before-context=1 --max-count=1 "^r\([0-9]+\);$" test_calipmatch.log + then + exit 1 + fi diff --git a/.github/workflows/version_increment.yml b/.github/workflows/version_increment.yml new file mode 100644 index 0000000..a95feda --- /dev/null +++ b/.github/workflows/version_increment.yml @@ -0,0 +1,144 @@ +name: version_increment + +on: + pull_request: + types: [opened, synchronize, reopened] + branches: + - main + paths: + - 'calipmatch.ado' + - 'calipmatch.sthlp' + - 'calipmatch.pkg' + workflow_dispatch: + +jobs: + version_increment: + runs-on: ubuntu-latest + timeout-minutes: 30 # change max time from default 6hr + + steps: + ####################### + # Configure + ####################### + + - name: Check out repository, PR branch + uses: actions/checkout@v3 + with: + ref: ${{ github.event.pull_request.head.ref }} + path: pr + + - name: Check out repository, main branch + uses: actions/checkout@v3 + with: + ref: main + path: main + + - name: Obtain 'version number' and 'changed date' from ado-file and help-file + run: | + for b in "pr" "main"; do + cd ${b} + echo "${b}_ado_version=$(head -n 1 calipmatch.ado | sed -nE 's/.*version ([0-9]+\.[0-9]+\.[0-9]+).*/\1/p')" >> $GITHUB_ENV + echo "${b}_ado_date=$(head -n 1 calipmatch.ado | sed -nE 's/.*version [0-9]+\.[0-9]+\.[0-9]+ +([0-3]?[0-9][a-z][a-z][a-z][0-9][0-9][0-9][0-9]).*/\1/p')" >> $GITHUB_ENV + + echo "${b}_help_version=$(sed '2q;d' calipmatch.sthlp | sed -nE 's/.*version ([0-9]+\.[0-9]+\.[0-9]+).*/\1/p')" >> $GITHUB_ENV + echo "${b}_help_date=$(sed '2q;d' calipmatch.sthlp | sed -nE 's/.*version [0-9]+\.[0-9]+\.[0-9]+ +([0-3]?[0-9][a-z][a-z][a-z][0-9][0-9][0-9][0-9]).*/\1/p')" >> $GITHUB_ENV + cd .. + done + + ####################### + # Version numbers + ####################### + + - name: Validate ado-file version numbers exist + run: | + if [[ -z ${pr_ado_version} ]]; then + echo "PR branch: version number not detected in calipmatch.ado" + exit 1 + fi + if [[ -z ${main_ado_version} ]]; then + echo "main branch: version number not detected in calipmatch.ado" + exit 1 + fi + + - name: Validate help-file version numbers exist + run: | + if [[ -z ${pr_help_version} ]]; then + echo "PR branch: version number not detected in calipmatch.sthlp" + exit 1 + fi + if [[ -z ${main_help_version} ]]; then + echo "main branch: version number not detected in calipmatch.sthlp" + exit 1 + fi + + - name: Validate ado-file version numbers are incremented + run: | + for b in "pr" "main"; do + v=${b}_ado_version + printf -v "${b}_major" "%s" "$(echo "${!v}" | sed -nE 's/([0-9]+)\.[0-9]+\.[0-9]+/\1/p')" + printf -v "${b}_minor" "%s" "$(echo "${!v}" | sed -nE 's/[0-9]+\.([0-9]+)\.[0-9]+/\1/p')" + printf -v "${b}_patch" "%s" "$(echo "${!v}" | sed -nE 's/[0-9]+\.[0-9]+\.([0-9]+)/\1/p')" + done + if [[ ! $pr_major -gt $main_major ]] && [[ ! $pr_minor -gt $main_minor ]] && [[ ! $pr_minor -gt $main_minor ]]; then + echo "version number not incremented, pr=${pr_ado_version} main=${main_ado_version}" + exit 1 + fi + + - name: Validate help-file version numbers match ado-file version numbers + run: | + if [[ "$pr_ado_version" != "$pr_help_version" ]]; then + echo "version numbers in calipmatch.ado and calipmatch.sthlp do not match, ado=${pr_ado_version} sthlp=${pr_help_version}" + exit 1 + fi + if [[ "$main_ado_version" != "$main_help_version" ]]; then + echo 'error in main branch:' + echo "version numbers in calipmatch.ado and calipmatch.sthlp do not match, ado=${main_ado_version} sthlp=${main_help_version}" + exit 1 + fi + + ####################### + # Date last updated + ####################### + + - name: Update date in ado-file and help-file + run: | + today=$(TZ=America/New_York date +%-d%b%Y | tr A-Z a-z) + if [[ "$pr_ado_date" != "$today" ]]; then + sed -i "1s/${pr_ado_date}/${today}/" calipmatch.ado + fi + if [[ "$pr_help_date" != "$today" ]]; then + sed -i "2s/${pr_help_date}/${today}/" calipmatch.sthlp + fi + working-directory: pr + + - name: Update date in package-file + run: | + if grep -q '^d Distribution-Date: [0-9][0-9][0-9][0-9][0-1][0-9][0-3][0-9]' calipmatch.pkg; then + sed -i "s/^d Distribution-Date: [0-9][0-9][0-9][0-9][0-1][0-9][0-3][0-9]/d Distribution-Date: $(TZ=America/New_York date +%Y%m%d)/" calipmatch.pkg + else + echo "PR branch: Distribution-Date not detected in calipmatch.pkg" + exit 1 + fi + working-directory: pr + + ####################### + # Push + ####################### + + - name: Check if there are changes + run: | + set +e + test -z "$(git status --porcelain)" + echo "FILES_UPDATED=$?" >> $GITHUB_ENV + working-directory: pr + + - name: Push updates to Github + run: | + git config user.name OppInsights-Bot + git config user.email info@opportunityinsights.org + git pull --ff-only + git add calipmatch.ado calipmatch.sthlp calipmatch.pkg + git commit -m "Update 'last updated' dates" + git push + if: env.FILES_UPDATED == 1 + working-directory: pr diff --git a/LICENSE b/LICENSE index 762776b..1625c17 100644 --- a/LICENSE +++ b/LICENSE @@ -1,6 +1,121 @@ -To the extent possible under law, the author has dedicated all copyright and related and neighboring rights -to this software to the public domain worldwide. This software is distributed without any warranty. +Creative Commons Legal Code -This code is licensed under the CC0 1.0 Universal license. You should have received a copy of the -CC0 Public Domain Dedication along with this software (see the file 'LICENSE_cc0_fulltext'). A human-readable -summary as well as the full legal text can be accessed at http://creativecommons.org/publicdomain/zero/1.0/ \ No newline at end of file +CC0 1.0 Universal + + CREATIVE COMMONS CORPORATION IS NOT A LAW FIRM AND DOES NOT PROVIDE + LEGAL SERVICES. DISTRIBUTION OF THIS DOCUMENT DOES NOT CREATE AN + ATTORNEY-CLIENT RELATIONSHIP. CREATIVE COMMONS PROVIDES THIS + INFORMATION ON AN "AS-IS" BASIS. CREATIVE COMMONS MAKES NO WARRANTIES + REGARDING THE USE OF THIS DOCUMENT OR THE INFORMATION OR WORKS + PROVIDED HEREUNDER, AND DISCLAIMS LIABILITY FOR DAMAGES RESULTING FROM + THE USE OF THIS DOCUMENT OR THE INFORMATION OR WORKS PROVIDED + HEREUNDER. + +Statement of Purpose + +The laws of most jurisdictions throughout the world automatically confer +exclusive Copyright and Related Rights (defined below) upon the creator +and subsequent owner(s) (each and all, an "owner") of an original work of +authorship and/or a database (each, a "Work"). + +Certain owners wish to permanently relinquish those rights to a Work for +the purpose of contributing to a commons of creative, cultural and +scientific works ("Commons") that the public can reliably and without fear +of later claims of infringement build upon, modify, incorporate in other +works, reuse and redistribute as freely as possible in any form whatsoever +and for any purposes, including without limitation commercial purposes. +These owners may contribute to the Commons to promote the ideal of a free +culture and the further production of creative, cultural and scientific +works, or to gain reputation or greater distribution for their Work in +part through the use and efforts of others. + +For these and/or other purposes and motivations, and without any +expectation of additional consideration or compensation, the person +associating CC0 with a Work (the "Affirmer"), to the extent that he or she +is an owner of Copyright and Related Rights in the Work, voluntarily +elects to apply CC0 to the Work and publicly distribute the Work under its +terms, with knowledge of his or her Copyright and Related Rights in the +Work and the meaning and intended legal effect of CC0 on those rights. + +1. Copyright and Related Rights. A Work made available under CC0 may be +protected by copyright and related or neighboring rights ("Copyright and +Related Rights"). Copyright and Related Rights include, but are not +limited to, the following: + + i. the right to reproduce, adapt, distribute, perform, display, + communicate, and translate a Work; + ii. moral rights retained by the original author(s) and/or performer(s); +iii. publicity and privacy rights pertaining to a person's image or + likeness depicted in a Work; + iv. rights protecting against unfair competition in regards to a Work, + subject to the limitations in paragraph 4(a), below; + v. rights protecting the extraction, dissemination, use and reuse of data + in a Work; + vi. database rights (such as those arising under Directive 96/9/EC of the + European Parliament and of the Council of 11 March 1996 on the legal + protection of databases, and under any national implementation + thereof, including any amended or successor version of such + directive); and +vii. other similar, equivalent or corresponding rights throughout the + world based on applicable law or treaty, and any national + implementations thereof. + +2. Waiver. To the greatest extent permitted by, but not in contravention +of, applicable law, Affirmer hereby overtly, fully, permanently, +irrevocably and unconditionally waives, abandons, and surrenders all of +Affirmer's Copyright and Related Rights and associated claims and causes +of action, whether now known or unknown (including existing as well as +future claims and causes of action), in the Work (i) in all territories +worldwide, (ii) for the maximum duration provided by applicable law or +treaty (including future time extensions), (iii) in any current or future +medium and for any number of copies, and (iv) for any purpose whatsoever, +including without limitation commercial, advertising or promotional +purposes (the "Waiver"). Affirmer makes the Waiver for the benefit of each +member of the public at large and to the detriment of Affirmer's heirs and +successors, fully intending that such Waiver shall not be subject to +revocation, rescission, cancellation, termination, or any other legal or +equitable action to disrupt the quiet enjoyment of the Work by the public +as contemplated by Affirmer's express Statement of Purpose. + +3. Public License Fallback. Should any part of the Waiver for any reason +be judged legally invalid or ineffective under applicable law, then the +Waiver shall be preserved to the maximum extent permitted taking into +account Affirmer's express Statement of Purpose. In addition, to the +extent the Waiver is so judged Affirmer hereby grants to each affected +person a royalty-free, non transferable, non sublicensable, non exclusive, +irrevocable and unconditional license to exercise Affirmer's Copyright and +Related Rights in the Work (i) in all territories worldwide, (ii) for the +maximum duration provided by applicable law or treaty (including future +time extensions), (iii) in any current or future medium and for any number +of copies, and (iv) for any purpose whatsoever, including without +limitation commercial, advertising or promotional purposes (the +"License"). The License shall be deemed effective as of the date CC0 was +applied by Affirmer to the Work. Should any part of the License for any +reason be judged legally invalid or ineffective under applicable law, such +partial invalidity or ineffectiveness shall not invalidate the remainder +of the License, and in such case Affirmer hereby affirms that he or she +will not (i) exercise any of his or her remaining Copyright and Related +Rights in the Work or (ii) assert any associated claims and causes of +action with respect to the Work, in either case contrary to Affirmer's +express Statement of Purpose. + +4. Limitations and Disclaimers. + + a. No trademark or patent rights held by Affirmer are waived, abandoned, + surrendered, licensed or otherwise affected by this document. + b. Affirmer offers the Work as-is and makes no representations or + warranties of any kind concerning the Work, express, implied, + statutory or otherwise, including without limitation warranties of + title, merchantability, fitness for a particular purpose, non + infringement, or the absence of latent or other defects, accuracy, or + the present or absence of errors, whether or not discoverable, all to + the greatest extent permissible under applicable law. + c. Affirmer disclaims responsibility for clearing rights of other persons + that may apply to the Work or any use thereof, including without + limitation any person's Copyright and Related Rights in the Work. + Further, Affirmer disclaims responsibility for obtaining any necessary + consents, permissions or other rights required for any use of the + Work. + d. Affirmer understands and acknowledges that Creative Commons is not a + party to this document and has no duty or obligation with respect to + this CC0 or use of the Work. \ No newline at end of file diff --git a/LICENSE_cc0_fulltext b/LICENSE_cc0_fulltext deleted file mode 100644 index 1625c17..0000000 --- a/LICENSE_cc0_fulltext +++ /dev/null @@ -1,121 +0,0 @@ -Creative Commons Legal Code - -CC0 1.0 Universal - - CREATIVE COMMONS CORPORATION IS NOT A LAW FIRM AND DOES NOT PROVIDE - LEGAL SERVICES. DISTRIBUTION OF THIS DOCUMENT DOES NOT CREATE AN - ATTORNEY-CLIENT RELATIONSHIP. CREATIVE COMMONS PROVIDES THIS - INFORMATION ON AN "AS-IS" BASIS. CREATIVE COMMONS MAKES NO WARRANTIES - REGARDING THE USE OF THIS DOCUMENT OR THE INFORMATION OR WORKS - PROVIDED HEREUNDER, AND DISCLAIMS LIABILITY FOR DAMAGES RESULTING FROM - THE USE OF THIS DOCUMENT OR THE INFORMATION OR WORKS PROVIDED - HEREUNDER. - -Statement of Purpose - -The laws of most jurisdictions throughout the world automatically confer -exclusive Copyright and Related Rights (defined below) upon the creator -and subsequent owner(s) (each and all, an "owner") of an original work of -authorship and/or a database (each, a "Work"). - -Certain owners wish to permanently relinquish those rights to a Work for -the purpose of contributing to a commons of creative, cultural and -scientific works ("Commons") that the public can reliably and without fear -of later claims of infringement build upon, modify, incorporate in other -works, reuse and redistribute as freely as possible in any form whatsoever -and for any purposes, including without limitation commercial purposes. -These owners may contribute to the Commons to promote the ideal of a free -culture and the further production of creative, cultural and scientific -works, or to gain reputation or greater distribution for their Work in -part through the use and efforts of others. - -For these and/or other purposes and motivations, and without any -expectation of additional consideration or compensation, the person -associating CC0 with a Work (the "Affirmer"), to the extent that he or she -is an owner of Copyright and Related Rights in the Work, voluntarily -elects to apply CC0 to the Work and publicly distribute the Work under its -terms, with knowledge of his or her Copyright and Related Rights in the -Work and the meaning and intended legal effect of CC0 on those rights. - -1. Copyright and Related Rights. A Work made available under CC0 may be -protected by copyright and related or neighboring rights ("Copyright and -Related Rights"). Copyright and Related Rights include, but are not -limited to, the following: - - i. the right to reproduce, adapt, distribute, perform, display, - communicate, and translate a Work; - ii. moral rights retained by the original author(s) and/or performer(s); -iii. publicity and privacy rights pertaining to a person's image or - likeness depicted in a Work; - iv. rights protecting against unfair competition in regards to a Work, - subject to the limitations in paragraph 4(a), below; - v. rights protecting the extraction, dissemination, use and reuse of data - in a Work; - vi. database rights (such as those arising under Directive 96/9/EC of the - European Parliament and of the Council of 11 March 1996 on the legal - protection of databases, and under any national implementation - thereof, including any amended or successor version of such - directive); and -vii. other similar, equivalent or corresponding rights throughout the - world based on applicable law or treaty, and any national - implementations thereof. - -2. Waiver. To the greatest extent permitted by, but not in contravention -of, applicable law, Affirmer hereby overtly, fully, permanently, -irrevocably and unconditionally waives, abandons, and surrenders all of -Affirmer's Copyright and Related Rights and associated claims and causes -of action, whether now known or unknown (including existing as well as -future claims and causes of action), in the Work (i) in all territories -worldwide, (ii) for the maximum duration provided by applicable law or -treaty (including future time extensions), (iii) in any current or future -medium and for any number of copies, and (iv) for any purpose whatsoever, -including without limitation commercial, advertising or promotional -purposes (the "Waiver"). Affirmer makes the Waiver for the benefit of each -member of the public at large and to the detriment of Affirmer's heirs and -successors, fully intending that such Waiver shall not be subject to -revocation, rescission, cancellation, termination, or any other legal or -equitable action to disrupt the quiet enjoyment of the Work by the public -as contemplated by Affirmer's express Statement of Purpose. - -3. Public License Fallback. Should any part of the Waiver for any reason -be judged legally invalid or ineffective under applicable law, then the -Waiver shall be preserved to the maximum extent permitted taking into -account Affirmer's express Statement of Purpose. In addition, to the -extent the Waiver is so judged Affirmer hereby grants to each affected -person a royalty-free, non transferable, non sublicensable, non exclusive, -irrevocable and unconditional license to exercise Affirmer's Copyright and -Related Rights in the Work (i) in all territories worldwide, (ii) for the -maximum duration provided by applicable law or treaty (including future -time extensions), (iii) in any current or future medium and for any number -of copies, and (iv) for any purpose whatsoever, including without -limitation commercial, advertising or promotional purposes (the -"License"). The License shall be deemed effective as of the date CC0 was -applied by Affirmer to the Work. Should any part of the License for any -reason be judged legally invalid or ineffective under applicable law, such -partial invalidity or ineffectiveness shall not invalidate the remainder -of the License, and in such case Affirmer hereby affirms that he or she -will not (i) exercise any of his or her remaining Copyright and Related -Rights in the Work or (ii) assert any associated claims and causes of -action with respect to the Work, in either case contrary to Affirmer's -express Statement of Purpose. - -4. Limitations and Disclaimers. - - a. No trademark or patent rights held by Affirmer are waived, abandoned, - surrendered, licensed or otherwise affected by this document. - b. Affirmer offers the Work as-is and makes no representations or - warranties of any kind concerning the Work, express, implied, - statutory or otherwise, including without limitation warranties of - title, merchantability, fitness for a particular purpose, non - infringement, or the absence of latent or other defects, accuracy, or - the present or absence of errors, whether or not discoverable, all to - the greatest extent permissible under applicable law. - c. Affirmer disclaims responsibility for clearing rights of other persons - that may apply to the Work or any use thereof, including without - limitation any person's Copyright and Related Rights in the Work. - Further, Affirmer disclaims responsibility for obtaining any necessary - consents, permissions or other rights required for any use of the - Work. - d. Affirmer understands and acknowledges that Creative Commons is not a - party to this document and has no duty or obligation with respect to - this CC0 or use of the Work. \ No newline at end of file diff --git a/README.md b/README.md index ae9139c..e08003d 100644 --- a/README.md +++ b/README.md @@ -1,13 +1,20 @@ ## Installation -Install **calipmatch** in Stata from the SSC repository: `ssc install calipmatch` + +This development version of **calipmatch** can be installed using: +``` +net install calipmatch, from(https://github.com/michaelstepner/calipmatch/raw/develop) +``` + + ## Stata help file -This documentation was converted automatically from the Stata help file by running `log html calipmatch.sthlp calipmatch.md` in Stata. +This documentation was converted automatically from the Stata help file by running `log html calipmatch.sthlp calipmatch.html` in Stata. -The help file looks best when viewed in Stata using `help calipmatch`. +The help file can be explored interactively in Stata using `help calipmatch`. +
Title+ diff --git a/automation/install_stata.sh b/automation/install_stata.sh new file mode 100755 index 0000000..d40b6b8 --- /dev/null +++ b/automation/install_stata.sh @@ -0,0 +1,30 @@ +#!/bin/bash + +# Define parameters +if [ -z "$STATA_VERSION" ]; then + STATA_VERSION=17 +fi +url_installer=https://d2bx6aas1fcmzl.cloudfront.net/stata_install/Stata${STATA_VERSION}Linux64.tar.gz +url_license=https://d2bx6aas1fcmzl.cloudfront.net/stata_install/stata.lic +download_username=oi +download_password=${OI_HTTPS_PW} + +# Install Stata +mkdir -p /tmp/statafiles +curl -u ${download_username}:${download_password} ${url_installer} --output /tmp/statafiles/Stata${STATA_VERSION}Linux64.tar.gz +cd /tmp/statafiles +tar -zxf ./Stata${STATA_VERSION}Linux64.tar.gz +sudo mkdir -p /usr/local/stata${STATA_VERSION} +cd /usr/local/stata${STATA_VERSION} + +# The following command returns 1 even though it's ok +set +e +sudo sh -c 'yes | /tmp/statafiles/install' +set -e + +cd /usr/local/bin +sudo ln -s /usr/local/stata${STATA_VERSION}/stata-mp . +sudo ln -s /usr/local/stata${STATA_VERSION}/xstata-mp . +sudo curl -u ${download_username}:${download_password} ${url_license} --output /usr/local/stata${STATA_VERSION}/stata.lic +rm -r /tmp/statafiles +cd /tmp diff --git a/calipmatch.ado b/calipmatch.ado index 0f54c29..39351f6 100644 --- a/calipmatch.ado +++ b/calipmatch.ado @@ -1,4 +1,4 @@ -*! version 1.0.0 9may2017 Michael Stepner and Allan Garland, stepner@mit.edu +*! version 1.1.0 26oct2022 Michael Stepner and Allan Garland, software@michaelstepner.com /* CC0 license information: To the extent possible under law, the author has dedicated all copyright and related and neighboring rights @@ -12,14 +12,12 @@ human-readable summary can be accessed at http://creativecommons.org/publicdomai program define calipmatch, sortpreserve rclass version 13.0 - syntax [if] [in], GENerate(name) CASEvar(varname numeric) MAXmatches(integer) CALIPERMatch(varlist numeric) CALIPERWidth(numlist >0) [EXACTmatch(varlist)] + syntax [if] [in], GENerate(name) CASEvar(varname numeric) MAXmatches(numlist integer >0 max=1) CALIPERMatch(varlist numeric) CALIPERWidth(numlist >0) [EXACTmatch(varlist)] * Verify there are same number of caliper vars as caliper widths - local caliper_var_count : word count `calipermatch' - local caliper_width_count : word count `caliperwidth' - if (`caliper_var_count'!=`caliper_width_count') { + if (`: word count `calipermatch'' != `: word count `caliperwidth'') { di as error "must specify the same number of caliper widths as caliper matching variables." - if (`caliper_var_count'<`caliper_width_count') exit 123 + if (`: word count `calipermatch'' < `: word count `caliperwidth'') exit 123 else exit 122 } @@ -82,18 +80,18 @@ program define calipmatch, sortpreserve rclass exit 2001 } - * Find group boundaries + * Find group boundaries within exact-match groups mata: boundaries=find_group_boundaries("`exactmatch'", "`casevar'", `=_N-`insample_total'+1', `=_N') - * Perform matching within each group - qui gen long `generate'=. + * Perform caliper matching within each exact-match group + qui gen long `generate' = . tempname case_matches if r(no_matches)==0 { mata: _calipmatch(boundaries,"`generate'",`maxmatches',"`calipermatch'","`caliperwidth'") qui compress `generate' - matrix `case_matches'=r(matchsuccess) + matrix `case_matches' = r(matchsuccess) matrix `case_matches' = (`cases_total' - `case_matches''* J(rowsof(`case_matches'),1,1)) \ `case_matches' } else { @@ -136,6 +134,22 @@ set matastrict on mata: void _calipmatch(real matrix boundaries, string scalar genvar, real scalar maxmatch, string scalar calipvars, string scalar calipwidth) { + // Objective: + // Perform caliper matching using the specified caliper variables and caliper widths, matching each case observation to one or + // many controls. Identify the matches within pre-specified groups, and store a variable containing integers that define a group + // of matched cases and controls. + // + // Inputs: + // Dataset with the same sort order as it had when `find_group_boundaries`' was run. + // - boundaries: G x 4 matrix output by find_group_boundaries() + // - genvar: variable containing all missing values, which will be populated with matching groups + // - maxmatch: a positive integer indicating the maximum number of control obs to match to each case obs + // - calipvars: a list of numeric variables for caliper matching + // - calipwidth: a list of caliper widths, specifying the maximum distance between case and control variables in each calipvar + // + // Outputs: + // The values of "genvar" are filled with integers that describe each group of matched cases and controls. + // - r(matchsuccess) is a Stata return matrix tabulating the number of cases successfully matched to {1, ..., maxmatch} controls real scalar matchgrp matchgrp = st_varindex(genvar) @@ -148,52 +162,77 @@ void _calipmatch(real matrix boundaries, string scalar genvar, real scalar maxma real scalar curmatch curmatch = 0 + + real scalar highestmatch + highestmatch = 0 real colvector matchsuccess matchsuccess = J(maxmatch, 1, 0) real scalar brow + real rowvector casematchcount + real scalar caseindex + real colvector matchedcontrolindex + real matrix minties real scalar caseobs real scalar controlobs - real scalar casematchcount + real scalar matchattempt real rowvector matchvals - real rowvector controlvals - real matrix matchbounds + real matrix controlvals + real matrix diffvals for (brow=1; brow<=rows(boundaries); brow++) { - for (caseobs=boundaries[brow,3]; caseobs<=boundaries[brow,4]; caseobs++) { - - curmatch++ - casematchcount=0 - _st_store(caseobs, matchgrp, curmatch) - - matchvals = st_data(caseobs, matchvars) - matchbounds = (matchvals-tolerance)\(matchvals+tolerance) + casematchcount = J(boundaries[brow,4] - boundaries[brow,3] + 1, 1, 0) + + for (matchattempt=1; matchattempt<=maxmatch; matchattempt++) { + + for (caseobs=boundaries[brow,3]; caseobs<=boundaries[brow,4]; caseobs++) { - for (controlobs=boundaries[brow,1]; controlobs<=boundaries[brow,2]; controlobs++) { + caseindex = caseobs - boundaries[brow,3] + 1 - if (_st_data(controlobs, matchgrp)!=.) continue + // Set the value of the match group + if (matchattempt==1) { + highestmatch++ + curmatch = highestmatch + _st_store(caseobs, matchgrp, curmatch) + } + else { + if (casematchcount[caseindex,1] < matchattempt - 1) continue + curmatch = _st_data(caseobs, matchgrp) + } + + // Store matchvar values for the case and for the controls that have not yet been matched + matchvals = st_data(caseobs, matchvars) + controlvals = st_data((boundaries[brow,1], boundaries[brow,2]), matchvars) :* editvalue(st_data((boundaries[brow,1], boundaries[brow,2]), matchgrp):==., 0, .) - controlvals = st_data(controlobs, matchvars) + // Store difference in matchvar values if they are within tolerance + diffvals = (controlvals :- matchvals) + diffvals = diffvals :* editvalue(abs(diffvals) :<= tolerance, 0, .) - if (controlvals>=matchbounds[1,.] & controlvals<=matchbounds[2,.]) { - casematchcount++ - _st_store(controlobs, matchgrp, curmatch) + // Find closest control to match + minindex(rowsum(diffvals :^2, 1), 1, matchedcontrolindex, minties) + + // If a match is found, store it + if (rows(matchedcontrolindex)>0) { + casematchcount[caseindex,1] = casematchcount[caseindex,1] + 1 + _st_store(boundaries[brow,1] + matchedcontrolindex[1,1] - 1, matchgrp, curmatch) } - if (casematchcount==maxmatch) break - - } + // If zero matches were found for a case, remove its matchgrp value and reuse it for the next case + if (matchattempt==1 & casematchcount[caseindex,1]==0) { + highestmatch-- + _st_store(caseobs, matchgrp, .) + } - if (casematchcount==0) { - curmatch-- - _st_store(caseobs, matchgrp, .) - } - else { - matchsuccess[casematchcount,1] = matchsuccess[casematchcount,1]+1 } + } + for (caseindex=1; caseindex <= boundaries[brow,4] - boundaries[brow,3] + 1; caseindex++) { + matchattempt = casematchcount[caseindex,1] + if (matchattempt > 0) { + matchsuccess[matchattempt,1] = matchsuccess[matchattempt,1] + 1 + } } } @@ -204,6 +243,24 @@ void _calipmatch(real matrix boundaries, string scalar genvar, real scalar maxma } real matrix find_group_boundaries(string scalar grpvars, string scalar casevar, real scalar startobs, real scalar endobs) { + // Objective: + // For each set of distinct values of "grpvars", identify the starting and ending observation for cases and controls. + // + // Inputs: + // Dataset sorted by the variables specified by "grpvars casevar" within the rows [startobs, endobs] + // + // - grpvars: one or more variables for which each distinct set of values constitutes a group + // - casevar: a variable which takes values {0,1} + // - startobs: the first observation to process + // - endobs: the last observation to process + // + // Outputs: + // Return a matrix with dimensions G x 4, where G is the number of distinct groups containing both cases and controls. + // Col 1 = the first obs in a group with casevar==0 + // Col 2 = the last obs in a group with casevar==0 + // Col 3 = the first obs in a group with casevar==1 + // Col 4 = the last obs in a group with casevar==1 + real matrix boundaries boundaries = (startobs, ., ., .) @@ -229,9 +286,9 @@ real matrix find_group_boundaries(string scalar grpvars, string scalar casevar, nextcol=2 currow=currow+1 } - else { // only one value of casevar in prev group --> skip group + else { // only one value of casevar (all controls or all cases) in prev group --> skip group boundaries[currow,1]=obs - } + } } else if (_st_data(obs, casevarnum)!=_st_data(obs-1, casevarnum)) { boundaries[currow,2]=obs-1 @@ -247,10 +304,8 @@ real matrix find_group_boundaries(string scalar grpvars, string scalar casevar, return (boundaries) } else { - if (currow>1) return (boundaries[1..rows(boundaries)-1, .]) else st_numscalar("r(no_matches)",1) - } } diff --git a/calipmatch.pkg b/calipmatch.pkg new file mode 100644 index 0000000..ab6447a --- /dev/null +++ b/calipmatch.pkg @@ -0,0 +1,26 @@ +d 'CALIPMATCH': module for caliper matching without replacement +d +d calipmatch matches case observations to control observations +d using "calipers", generating a new variable with a unique value +d for each group of matched cases and controls. It performs 1:1 or +d 1:m matching without replacement. Matched observations will have +d values within +/- the caliper width for every caliper matching +d variable. Matched observations will also have identical values +d for every exact matching variable, if any exact matching +d variables are specified. +d +d KW: matching +d KW: caliper +d +d Requires: Stata version 13 +d +d Distribution-Date: 20221026 +d +d Author: Michael Stepner +d Support: email software@michaelstepner.com +d +d Author: Allan Garland +d Support: email +d +f calipmatch.ado +f calipmatch.sthlp diff --git a/calipmatch.sthlp b/calipmatch.sthlp index d0bab84..7274064 100644 --- a/calipmatch.sthlp +++ b/calipmatch.sthlp @@ -1,5 +1,5 @@ {smcl} -{* *! version 1.0.0 9may2017}{...} +{* *! version 1.1.0 26oct2022}{...} {viewerjumpto "Syntax" "calipmatch##syntax"}{...} {viewerjumpto "Description" "calipmatch##description"}{...} {viewerjumpto "Options" "calipmatch##options"}{...} @@ -52,8 +52,8 @@ matching{p_end} {pstd} {cmd:calipmatch} matches case observations to control observations using "calipers", -generating a new variable with a unique value for each group of matched cases and controls. -It performs 1:1 or 1:m matching without replacement. +generating a new variable with a unique value for each group of matched cases and controls. It +performs 1:1 or 1:m matching without replacement. {pstd} Matched observations must have values within +/- the caliper @@ -61,11 +61,16 @@ width for every caliper matching variable. Matched observations must also have i for every exact matching variable, if any exact matching variables are specified. {pstd} -Controls are randomly matched to cases without replacement. For each case, {cmd:calipmatch} -searches for matching controls until it either finds the pre-specified maximum number of -matches or runs out of controls. The search is performed greedily: it is possible that -some cases end up unmatched because all possible matching controls have already been matched with -another case. +Controls are matched to cases without replacement, using an efficient (greedy) algorithm that approximately maximizes +the number of successful matches, while minimizing the sum of squared differences in the caliper matching +variables when multiple valid matches exist. + +{pstd} +The cases are processed in random order. For each case, {cmd:calipmatch} searches for matching controls. If +any valid matches exist, it selects the matching control which minimizes the sum of squared differences across +caliper matching variables. If {opt maxmatches(#)}>1, then after completing the search for a first matching +control observation for each case, the algorithm will search for a second matching control observation for +each case, etc. {marker options}{...} @@ -85,15 +90,11 @@ each observation is a case (=1) or a control (=0). Observations with a missing value are excluded from matching. {phang}{opt max:matches(#)} sets the maximum number of controls to be matched -with each case. Setting {opt maxmatches(1)} performs a 1:1 matching: {cmd:calipmatch} -searches for one matching control observation for each case observation. - -{pmore}By setting {opt maxmatches(#)} greater than 1, {cmd:calipmatch} will proceed in -random order through -the cases and search for matching control observations until it either finds the -maximum number of matches or runs out of controls. The search is performed greedily: it -is possible that some cases end up unmatched because all possible matching -controls have already been matched with another case. +with each case. Setting {opt maxmatches(1)} performs a 1:1 match where {cmd:calipmatch} +searches for one matching control observation for each case observation. By setting +{opt maxmatches(#)} greater than 1 {cmd:calipmatch} will try to assign +a first valid matching control observation for every case observation, then search +for a second matching control observation, and onward. {phang}{opth caliperm:atch(varlist)} is a list of one or more numeric variables to use for caliper matching. Matched observations must have values within +/- the caliper @@ -135,11 +136,10 @@ all values are stored as precise integers. {marker author}{...} {title:Authors} -{pstd}Michael Stepner{p_end} -{pstd}Massachusetts Institute of Technology{p_end} -{pstd}stepner@mit.edu{p_end} +{pstd}{bf:{browse "https://michaelstepner.com":Michael Stepner}}{p_end} +{pstd}software@michaelstepner.com{p_end} -{pstd}Allan Garland, M.D. M.A.{p_end} +{pstd}{bf:Allan Garland}{p_end} {pstd}University of Manitoba Faculty of Medicine{p_end} {pstd}agarland@hsc.mb.ca{p_end} diff --git a/stata.toc b/stata.toc new file mode 100644 index 0000000..4c2fd4c --- /dev/null +++ b/stata.toc @@ -0,0 +1,4 @@ +v 3 +d calipmatch development repository +d by Michael Stepner (software@michaelstepner.com) +p calipmatch module for caliper matching without replacement diff --git a/test_calipmatch.do b/test_calipmatch.do index 52ebd2e..5151395 100644 --- a/test_calipmatch.do +++ b/test_calipmatch.do @@ -1,23 +1,29 @@ cscript "calipmatch" adofile calipmatch program define test_calipmatch - - syntax [if] [in], GENerate(name) CASEvar(varname) MAXmatches(integer) CALIPERMatch(varlist numeric) CALIPERWidth(numlist >0) [EXACTmatch(varlist)] - calipmatch `if' `in', generate(`generate') casevar(`casevar') maxmatches(`maxmatches') calipermatch(`calipermatch') /// - caliperwidth(`caliperwidth') exactmatch(`exactmatch') - + * Run calipmatch with verbatim arguments + calipmatch `0' + + * Testing after successful run if (_rc==0) { + * Assign arguments to locals using the same syntax as calipmatch + syntax [if] [in], GENerate(name) CASEvar(varname numeric) MAXmatches(numlist integer >0 max=1) CALIPERMatch(varlist numeric) CALIPERWidth(numlist >0) [EXACTmatch(varlist)] + * Store returned objects - local cases_total=r(cases_total) - local cases_matched=r(cases_matched) - local match_rate=r(match_rate) - matrix matches=r(matches) + local cases_total = r(cases_total) + local cases_matched = r(cases_matched) + local match_rate = r(match_rate) + matrix matches = r(matches) * Exactly one case per matchgroup - egen casecount=sum(`casevar'), by(`generate') - qui assert casecount==1 if !mi(`generate') + egen casecount = sum(`casevar'), by(`generate') + cap assert casecount == 1 if !mi(`generate') + if (_rc!=0) { + di as error "More than one case per matchgroup" + exit 9 + } * Highest matchgroup value = number of matched cases sum `generate', meanonly @@ -25,16 +31,16 @@ program define test_calipmatch else assert `cases_matched'==0 * All matched obs are within caliper width - local c=0 + local c = 0 foreach var of varlist `calipermatch' { local ++c local width : word `c' of `caliperwidth' - qui gen caseval=`var' if `casevar'==1 & !mi(`generate') - qui egen matchval=mean(caseval), by(`generate') + qui gen caseval = `var' if `casevar'==1 & !mi(`generate') + qui egen matchval = mean(caseval), by(`generate') - qui gen valdiff=`var'-matchval + qui gen valdiff = `var' - matchval sum valdiff, meanonly if r(N)>0 { assert r(min)>=-`width' @@ -49,10 +55,10 @@ program define test_calipmatch if ("`exactmatch'"!="") { foreach var of varlist `exactmatch' { - qui gen caseval=`var' if `casevar'==1 & !mi(`generate') - qui egen matchval=mean(caseval), by(`generate') + qui gen caseval = `var' if `casevar'==1 & !mi(`generate') + qui egen matchval = mean(caseval), by(`generate') - qui gen valdiff=`var'-matchval + qui gen valdiff = `var' - matchval sum valdiff, meanonly if r(N)>0 { assert r(min)==0 @@ -74,53 +80,104 @@ program define test_calipmatch assert r(sum)==`cases_matched' * Tabulation of number of controls matched to each case reported correctly - qui gen control=1-`casevar' - qui egen matched_controls=sum(control), by(`generate') - qui replace matched_controls=0 if mi(`generate') - qui replace matched_controls=. if `casevar'!=1 + qui gen control = 1 - `casevar' + qui egen matched_controls = sum(control), by(`generate') + qui replace matched_controls = 0 if mi(`generate') + qui replace matched_controls = . if `casevar'!=1 forvalues m=0/`maxmatches' { qui count if matched_controls==`m' - assert r(N)==matches[`=`m'+1',1] + cap assert r(N)==matches[`=`m'+1',1] + if (_rc!=0) { + di as error "Incorrect report for number of matched controls" + exit 9 + } } - } - end -*** One caliper matching variable +*============================================================================ +* New dataset: one caliper matching variable +*============================================================================ + clear set seed 4585239 +set sortseed 789045789 set obs 200 gen byte case=(_n<=20) gen byte income_percentile=ceil(runiform() * 100) -* Valid test +*---------------------------------------------------------------------------- +* Valid inputs +*---------------------------------------------------------------------------- + test_calipmatch, gen(matchgroup) case(case) maxmatches(1) /// calipermatch(income_percentile) caliperwidth(5) keep case income_percentile +*---------------------------------------------------------------------------- +* Invalid syntax +*---------------------------------------------------------------------------- + +* maximum matches is positive, but not an integer +rcof `"test_calipmatch, gen(matchgroup) case(case) maxmatches(.3) calipermatch(income_percentile) caliperwidth(5)"' /// + == 126 + +* maximum matches is a negative integer +rcof `"test_calipmatch, gen(matchgroup) case(case) maxmatches(-1) calipermatch(income_percentile) caliperwidth(5)"' /// + == 125 + +* multiple maximum matches specified +rcof `"test_calipmatch, gen(matchgroup) case(case) maxmatches(1 5) calipermatch(income_percentile) caliperwidth(5)"' /// + == 123 + +* caliper variable is ambiguous +gen byte income_percentile2=ceil(rnormal() * 100) +rcof `"test_calipmatch, gen(matchgroup) case(case) maxmatches(1) calipermatch(income_perc) caliperwidth(5)"' /// + == 111 +drop income_percentile2 + +* caliper variable does not exist +rcof `"test_calipmatch, gen(matchgroup) case(case) maxmatches(1) calipermatch(nonsense) caliperwidth(5)"' /// + == 111 + +* caliper width is negative +rcof `"test_calipmatch, gen(matchgroup) case(case) maxmatches(1) calipermatch(income_percentile) caliperwidth(-5)"' /// + == 125 + +*---------------------------------------------------------------------------- +* Invalid data +*---------------------------------------------------------------------------- + +* string case variable +drop case +gen case=cond(_n<=20,"case","ctrl") +rcof `"test_calipmatch, gen(matchgroup) case(case) maxmatches(1) calipermatch(income_percentile) caliperwidth(5) "' /// + == 109 +drop case +gen byte case=(_n<=20) + +* generate a variable that already exists +gen matchgroup=. +rcof `"test_calipmatch, gen(matchgroup) case(case) maxmatches(1) calipermatch(income_percentile) caliperwidth(5)"' /// + == 110 +drop matchgroup + * if statement that matches no observations rcof `"test_calipmatch if income_percentile>100, gen(matchgroup) case(case) maxmatches(1) calipermatch(income_percentile) caliperwidth(5)"' /// == 2000 - + * no controls replace case=1 rcof `"test_calipmatch, gen(matchgroup) case(case) maxmatches(1) calipermatch(income_percentile) caliperwidth(5)"' /// == 2001 - + * no cases replace case=0 rcof `"test_calipmatch, gen(matchgroup) case(case) maxmatches(1) calipermatch(income_percentile) caliperwidth(5)"' /// == 2001 - -* generate a variable that already exists -gen matchgroup=. -rcof `"test_calipmatch, gen(matchgroup) case(case) maxmatches(1) calipermatch(income_percentile) caliperwidth(5)"' /// - == 110 -drop matchgroup * case/control variable not always 0 or 1 in sample replace case=(_n<=20) @@ -131,50 +188,73 @@ rcof `"test_calipmatch, gen(matchgroup) case(case) maxmatches(1) calipermatch(in * case/control variable not always 0 or 1, but not in sample test_calipmatch in 2/200, gen(matchgroup) case(case) maxmatches(1) calipermatch(income_percentile) caliperwidth(5) keep case income_percentile - -*** One caliper matching variable and one exact matching variable -gen byte sex=round(runiform()) -replace case=(_n<=20) +*============================================================================ +* New dataset: one caliper matching variable, with a perfect match per case +*============================================================================ -* Valid test -test_calipmatch, gen(matchgroup) case(case) maxmatches(1) /// - calipermatch(income_percentile) caliperwidth(5) exactmatch(sex) -keep case income_percentile sex +clear +set obs 100 +gen byte income_percentile = _n +expand 2, gen(case) + +*---------------------------------------------------------------------------- +* Valid inputs, test performance of matching algorithm +*---------------------------------------------------------------------------- + +* perfect match is found for each case, despite other valid matches +test_calipmatch, gen(matchgroup) case(case) maxmatches(1) calipermatch(income_percentile) caliperwidth(5) + +assert !mi(matchgroup) +egen n = count(income_percentile), by(matchgroup) +egen sd = sd(income_percentile), by(matchgroup) + +assert n == 2 +assert sd == 0 +keep case income_percentile + + +*============================================================================ +* New dataset: one caliper matching variable and one exact matching variable +*============================================================================ + +clear +set obs 200 +gen byte case = (_n<=20) +gen byte income_percentile = ceil(runiform() * 100) +gen byte sex = round(runiform()) + +*---------------------------------------------------------------------------- +* Valid inputs +*---------------------------------------------------------------------------- -* no controls among one matching group -replace case=1 if sex==1 -test_calipmatch, gen(matchgroup) case(case) maxmatches(1) /// - calipermatch(income_percentile) caliperwidth(5) exactmatch(sex) -keep case income_percentile sex - -* no cases among one matching group -replace case=0 if sex==1 -test_calipmatch, gen(matchgroup) case(case) maxmatches(1) /// - calipermatch(income_percentile) caliperwidth(5) exactmatch(sex) -keep case income_percentile sex - -* no matching groups with both cases and controls -replace case=1 if sex==0 test_calipmatch, gen(matchgroup) case(case) maxmatches(1) /// calipermatch(income_percentile) caliperwidth(5) exactmatch(sex) -assert matchgroup==. keep case income_percentile sex -* string case variable -drop case -gen case=cond(_n<=20,"case","ctrl") -rcof `"test_calipmatch, gen(matchgroup) case(case) maxmatches(1) calipermatch(income_percentile) caliperwidth(5) exactmatch(sex)"' /// - == 109 - -drop case -gen byte case=(_n<=20) +*---------------------------------------------------------------------------- +* Invalid syntax +*---------------------------------------------------------------------------- + +* exact variable is ambiguous +gen byte sex2=round(runiform()) +rcof `"test_calipmatch, gen(matchgroup) case(case) maxmatches(1) calipermatch(income_percentile) caliperwidth(5) exactmatch(se)"' /// + == 111 +drop sex2 + +* exact variable does not exist +rcof `"test_calipmatch, gen(matchgroup) case(case) maxmatches(1) calipermatch(income_percentile) caliperwidth(5) exactmatch(nonsense)"' /// + == 111 + +*---------------------------------------------------------------------------- +* Invalid data +*---------------------------------------------------------------------------- * float exact matching variable recast float sex rcof `"test_calipmatch, gen(matchgroup) case(case) maxmatches(1) calipermatch(income_percentile) caliperwidth(5) exactmatch(sex)"' /// == 198 - + * string exact matching variable rename sex sex_numeric gen sex=cond(sex_numeric==0,"M","F") @@ -185,22 +265,60 @@ drop sex rename sex_numeric sex recast byte sex -*** Many caliper and exact matching variables, m:1 match +*---------------------------------------------------------------------------- +* Edge cases +*---------------------------------------------------------------------------- + +* no controls among one matching group -> no matches in that group +replace case=1 if sex==1 +test_calipmatch, gen(matchgroup) case(case) maxmatches(1) /// + calipermatch(income_percentile) caliperwidth(5) exactmatch(sex) + +assert mi(matchgroup) if sex==1 +keep case income_percentile sex + +* no cases among one matching group -> no matches in that group +replace case=0 if sex==1 +test_calipmatch, gen(matchgroup) case(case) maxmatches(1) /// + calipermatch(income_percentile) caliperwidth(5) exactmatch(sex) + +assert mi(matchgroup) if sex==1 +keep case income_percentile sex + +* no matching groups with both cases and controls -> no matches in any group +replace case=1 if sex==0 +test_calipmatch, gen(matchgroup) case(case) maxmatches(1) /// + calipermatch(income_percentile) caliperwidth(5) exactmatch(sex) + +assert mi(matchgroup) +keep case income_percentile sex + + +*============================================================================ +* New dataset: many caliper variables and many exact matching variables +*============================================================================ clear set obs 50000 gen byte case=(_n<=5000) gen byte sex=round(runiform()) -gen byte age = 44 + ceil(runiform()*17) gen byte self_emp = (runiform()<0.1) -gen byte prov = ceil(runiform()*9) +gen byte prov = ceil(runiform()*10) +gen byte age = 44 + ceil(runiform()*17) gen byte income_percentile=ceil(runiform() * 100) -* Valid test +*---------------------------------------------------------------------------- +* Valid inputs +*---------------------------------------------------------------------------- + test_calipmatch, gen(matchgroup) case(case) maxmatches(5) /// exactmatch(sex self_emp prov) calipermatch(age income_percentile) caliperwidth(3 5) -keep case sex age self_emp prov income_percentile +keep case sex self_emp prov age income_percentile + +*---------------------------------------------------------------------------- +* Invalid syntax +*---------------------------------------------------------------------------- * Not enough caliper widths rcof `"test_calipmatch, gen(matchgroup) case(case) maxmatches(5) exactmatch(sex self_emp prov) calipermatch(age income_percentile) caliperwidth(3)"' /// @@ -210,3 +328,47 @@ rcof `"test_calipmatch, gen(matchgroup) case(case) maxmatches(5) exactmatch(sex rcof `"test_calipmatch, gen(matchgroup) case(case) maxmatches(5) exactmatch(sex self_emp prov) calipermatch(age income_percentile) caliperwidth(3 5 5)"' /// == 123 +*============================================================================ +* New dataset: two caliper matching variables, with different optimal +* matches under different distance metrics +*============================================================================ + +clear +set obs 5 + +gen byte case = 0 +replace case = 1 in 1 + +gen byte income_percentile = 40 +replace income_percentile = 47 in 2 +replace income_percentile = 52 in 3 +replace income_percentile = 41 in 4 +replace income_percentile = 55 in 5 + +gen byte age = 40 +replace age = 47 in 2 +replace age = 55 in 4 + +gen float sse = (income_percentile - income_percentile[1])^2 + (age - age[1])^2 + +list + +*---------------------------------------------------------------------------- +* Valid inputs, test performance of matching algorithm +*---------------------------------------------------------------------------- + +* matches minimize sum of squares +test_calipmatch, gen(matchgroup) case(case) maxmatches(1) /// + calipermatch(income_percentile age) caliperwidth(100 100) + +sum sse if case==0, meanonly +assert cond(_n==2, sse==r(min), sse!=r(min)) // test that obs 2 is global min + +assert matchgroup == 1 in 2 // test that obs 2 is matched +assert matchgroup == . in 3/5 + +keep case income_percentile age + +*---------------------------------------------------------------------------- + +di "Successfully completed all tests."@@ -44,7 +51,7 @@ The help file looks best when viewed in Stata using `help calipmatch`.
calipmatch matches case observations to control observations using "calipers", generating a new variable with a unique value for each group - of matched cases and controls. It performs 1:1 or 1:m matching without + of matched cases and controls. It performs 1:1 or 1:m matching without replacement.
Matched observations must have values within +/- the caliper width for @@ -52,12 +59,18 @@ The help file looks best when viewed in Stata using `help calipmatch`. identical values for every exact matching variable, if any exact matching variables are specified.
- Controls are randomly matched to cases without replacement. For each - case, calipmatch searches for matching controls until it either finds the - pre-specified maximum number of matches or runs out of controls. The - search is performed greedily: it is possible that some cases end up - unmatched because all possible matching controls have already been - matched with another case. + Controls are matched to cases without replacement, using an efficient + (greedy) algorithm that approximately maximizes the number of successful + matches, while minimizing the sum of squared differences in the caliper + matching variables when multiple valid matches exist. +
+ The cases are processed in random order. For each case, calipmatch + searches for matching controls. If any valid matches exist, it selects + the matching control which minimizes the sum of squared differences + across caliper matching variables. If maxmatches(#)>1, then after + completing the search for a first matching control observation for each + case, the algorithm will search for a second matching control observation + for each case, etc.
Options @@ -78,16 +91,12 @@ The help file looks best when viewed in Stata using `help calipmatch`. a missing value are excluded from matching.
maxmatches(#) sets the maximum number of controls to be matched with each - case. Setting maxmatches(1) performs a 1:1 matching: calipmatch + case. Setting maxmatches(1) performs a 1:1 match where calipmatch searches for one matching control observation for each case - observation. -
- By setting maxmatches(#) greater than 1, calipmatch will proceed in - random order through the cases and search for matching control - observations until it either finds the maximum number of matches or - runs out of controls. The search is performed greedily: it is - possible that some cases end up unmatched because all possible - matching controls have already been matched with another case. + observation. By setting maxmatches(#) greater than 1 calipmatch will + try to assign a first valid matching control observation for every + case observation, then search for a second matching control + observation, and onward.
calipermatch(varlist) is a list of one or more numeric variables to use for caliper matching. Matched observations must have values within @@ -126,12 +135,12 @@ The help file looks best when viewed in Stata using `help calipmatch`.
- Michael Stepner - Massachusetts Institute of Technology - stepner@mit.edu + Michael Stepner + software@michaelstepner.com
- Allan Garland, M.D. M.A. + Allan Garland University of Manitoba Faculty of Medicine agarland@hsc.mb.ca