From f665508390a62fb3a9a48f5b2e95dcb78588812e Mon Sep 17 00:00:00 2001 From: jethaaly Date: Fri, 28 Oct 2022 19:02:41 -0400 Subject: [PATCH 01/26] WIP: add standardized sse as default --- calipmatch.ado | 20 ++++++++++++++++---- test_calipmatch.do | 2 +- 2 files changed, 17 insertions(+), 5 deletions(-) diff --git a/calipmatch.ado b/calipmatch.ado index 39351f6..3eba125 100644 --- a/calipmatch.ado +++ b/calipmatch.ado @@ -12,7 +12,7 @@ human-readable summary can be accessed at http://creativecommons.org/publicdomai program define calipmatch, sortpreserve rclass version 13.0 - syntax [if] [in], GENerate(name) CASEvar(varname numeric) MAXmatches(numlist integer >0 max=1) CALIPERMatch(varlist numeric) CALIPERWidth(numlist >0) [EXACTmatch(varlist)] + syntax [if] [in], GENerate(name) CASEvar(varname numeric) MAXmatches(numlist integer >0 max=1) CALIPERMatch(varlist numeric) CALIPERWidth(numlist >0) [EXACTmatch(varlist)] [brief] * Verify there are same number of caliper vars as caliper widths if (`: word count `calipermatch'' != `: word count `caliperwidth'') { @@ -88,9 +88,20 @@ program define calipmatch, sortpreserve rclass tempname case_matches if r(no_matches)==0 { - mata: _calipmatch(boundaries,"`generate'",`maxmatches',"`calipermatch'","`caliperwidth'") + + if ("`brief'"=="") { + foreach var of varlist `calipermatch' { + tempvar std_`var' + qui egen `std_`var'' = std(`var') if `touse' == 1 + local std_calipermatch `std_calipermatch' `std_`var'' + } + mata: _calipmatch(boundaries,"`generate'",`maxmatches',"`calipermatch'","`caliperwidth'", "`std_calipermatch'") + } + else { + mata: _calipmatch(boundaries,"`generate'",`maxmatches',"`calipermatch'","`caliperwidth'") + } + qui compress `generate' - matrix `case_matches' = r(matchsuccess) matrix `case_matches' = (`cases_total' - `case_matches''* J(rowsof(`case_matches'),1,1)) \ `case_matches' } @@ -133,7 +144,8 @@ set matastrict on mata: -void _calipmatch(real matrix boundaries, string scalar genvar, real scalar maxmatch, string scalar calipvars, string scalar calipwidth) { +void _calipmatch(real matrix boundaries, string scalar genvar, real scalar maxmatch, string scalar calipvars, string scalar calipwidth, +| string scalar std_calipvars) { // Objective: // Perform caliper matching using the specified caliper variables and caliper widths, matching each case observation to one or // many controls. Identify the matches within pre-specified groups, and store a variable containing integers that define a group diff --git a/test_calipmatch.do b/test_calipmatch.do index 5151395..92563f4 100644 --- a/test_calipmatch.do +++ b/test_calipmatch.do @@ -9,7 +9,7 @@ program define test_calipmatch if (_rc==0) { * Assign arguments to locals using the same syntax as calipmatch - syntax [if] [in], GENerate(name) CASEvar(varname numeric) MAXmatches(numlist integer >0 max=1) CALIPERMatch(varlist numeric) CALIPERWidth(numlist >0) [EXACTmatch(varlist)] + syntax [if] [in], GENerate(name) CASEvar(varname numeric) MAXmatches(numlist integer >0 max=1) CALIPERMatch(varlist numeric) CALIPERWidth(numlist >0) [EXACTmatch(varlist)] [brief] * Store returned objects local cases_total = r(cases_total) From cf8b574698f9f099f2c611104d18e8f5b1d11aed Mon Sep 17 00:00:00 2001 From: jethaaly Date: Fri, 28 Oct 2022 19:03:52 -0400 Subject: [PATCH 02/26] Test for minimize standardized sse --- test_calipmatch.do | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/test_calipmatch.do b/test_calipmatch.do index 92563f4..da56c37 100644 --- a/test_calipmatch.do +++ b/test_calipmatch.do @@ -369,6 +369,25 @@ assert matchgroup == . in 3/5 keep case income_percentile age +* matches minimize sum of normalized squares +replace age = 1000*age +egen std_income_percentile = std(income_percentile) +egen std_age = std(age) + +gen float sse = (income_percentile - income_percentile[1])^2 + (age - age[1])^2 +gen float std_sse = (std_income_percentile - std_income_percentile[1])^2 + (std_age - std_age[1])^2 + +test_calipmatch, gen(matchgroup) case(case) maxmatches(1) /// + calipermatch(income_percentile age) caliperwidth(100 100000) + +sum std_sse if case==0, meanonly +assert cond(_n==2, std_sse==r(min), std_sse!=r(min)) // test that obs 2 is global min + +assert matchgroup == 1 in 2 // test that obs 2 is matched +assert matchgroup == . in 3/5 + +keep case income_percentile age + *---------------------------------------------------------------------------- di "Successfully completed all tests." From fdb73a6b005ff5d2f07b8037fef7203ceb92b946 Mon Sep 17 00:00:00 2001 From: jethaaly Date: Fri, 28 Oct 2022 19:16:09 -0400 Subject: [PATCH 03/26] Add: standardized distance default, non-standard option --- calipmatch.ado | 27 +++++++++++++++++++++------ 1 file changed, 21 insertions(+), 6 deletions(-) diff --git a/calipmatch.ado b/calipmatch.ado index 3eba125..cd3712c 100644 --- a/calipmatch.ado +++ b/calipmatch.ado @@ -192,6 +192,15 @@ void _calipmatch(real matrix boundaries, string scalar genvar, real scalar maxma real rowvector matchvals real matrix controlvals real matrix diffvals + + if (args() > 5) { + real rowvector std_matchvars + std_matchvars = st_varindex(tokens(std_calipvars)) + + real rowvector std_matchvals + real matrix std_controlvals + real matrix std_diffvals + } for (brow=1; brow<=rows(boundaries); brow++) { @@ -214,16 +223,22 @@ void _calipmatch(real matrix boundaries, string scalar genvar, real scalar maxma curmatch = _st_data(caseobs, matchgrp) } - // Store matchvar values for the case and for the controls that have not yet been matched + // Store matchvar values for the case and for the controls that have not yet been matched, and calculate difference matchvals = st_data(caseobs, matchvars) controlvals = st_data((boundaries[brow,1], boundaries[brow,2]), matchvars) :* editvalue(st_data((boundaries[brow,1], boundaries[brow,2]), matchgrp):==., 0, .) - - // Store difference in matchvar values if they are within tolerance diffvals = (controlvals :- matchvals) - diffvals = diffvals :* editvalue(abs(diffvals) :<= tolerance, 0, .) - + // Find closest control to match - minindex(rowsum(diffvals :^2, 1), 1, matchedcontrolindex, minties) + if (args() >5) { + std_matchvals = st_data(caseobs, std_matchvars) + std_controlvals = st_data((boundaries[brow,1], boundaries[brow,2]), std_matchvars) :* editvalue(st_data((boundaries[brow,1], boundaries[brow,2]), matchgrp):==., 0, .) + std_diffvals = (std_controlvals :- std_matchvals) :* editvalue(abs(diffvals) :<= tolerance, 0, .) + minindex(rowsum(std_diffvals :^2, 1), 1, matchedcontrolindex, minties) + } + else { + diffvals = diffvals :* editvalue(abs(diffvals) :<= tolerance, 0, .) + minindex(rowsum(diffvals :^2, 1), 1, matchedcontrolindex, minties) + } // If a match is found, store it if (rows(matchedcontrolindex)>0) { From 8e26e27ee484cb32203ed69f2574891303a3e66d Mon Sep 17 00:00:00 2001 From: jethaaly Date: Fri, 28 Oct 2022 19:17:55 -0400 Subject: [PATCH 04/26] Rename option nostandard --- calipmatch.ado | 4 ++-- test_calipmatch.do | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/calipmatch.ado b/calipmatch.ado index cd3712c..d09693f 100644 --- a/calipmatch.ado +++ b/calipmatch.ado @@ -12,7 +12,7 @@ human-readable summary can be accessed at http://creativecommons.org/publicdomai program define calipmatch, sortpreserve rclass version 13.0 - syntax [if] [in], GENerate(name) CASEvar(varname numeric) MAXmatches(numlist integer >0 max=1) CALIPERMatch(varlist numeric) CALIPERWidth(numlist >0) [EXACTmatch(varlist)] [brief] + syntax [if] [in], GENerate(name) CASEvar(varname numeric) MAXmatches(numlist integer >0 max=1) CALIPERMatch(varlist numeric) CALIPERWidth(numlist >0) [EXACTmatch(varlist)] [nostandard] * Verify there are same number of caliper vars as caliper widths if (`: word count `calipermatch'' != `: word count `caliperwidth'') { @@ -89,7 +89,7 @@ program define calipmatch, sortpreserve rclass if r(no_matches)==0 { - if ("`brief'"=="") { + if ("`nostandard'"=="") { foreach var of varlist `calipermatch' { tempvar std_`var' qui egen `std_`var'' = std(`var') if `touse' == 1 diff --git a/test_calipmatch.do b/test_calipmatch.do index da56c37..d01cb9a 100644 --- a/test_calipmatch.do +++ b/test_calipmatch.do @@ -9,7 +9,7 @@ program define test_calipmatch if (_rc==0) { * Assign arguments to locals using the same syntax as calipmatch - syntax [if] [in], GENerate(name) CASEvar(varname numeric) MAXmatches(numlist integer >0 max=1) CALIPERMatch(varlist numeric) CALIPERWidth(numlist >0) [EXACTmatch(varlist)] [brief] + syntax [if] [in], GENerate(name) CASEvar(varname numeric) MAXmatches(numlist integer >0 max=1) CALIPERMatch(varlist numeric) CALIPERWidth(numlist >0) [EXACTmatch(varlist)] [nostandard] * Store returned objects local cases_total = r(cases_total) From 3e98073a4c33f3ba07ba286db16d827e4c1e395c Mon Sep 17 00:00:00 2001 From: jethaaly Date: Fri, 28 Oct 2022 19:30:51 -0400 Subject: [PATCH 05/26] Update inputs of calipmatch --- calipmatch.ado | 1 + 1 file changed, 1 insertion(+) diff --git a/calipmatch.ado b/calipmatch.ado index d09693f..4292c9e 100644 --- a/calipmatch.ado +++ b/calipmatch.ado @@ -158,6 +158,7 @@ void _calipmatch(real matrix boundaries, string scalar genvar, real scalar maxma // - maxmatch: a positive integer indicating the maximum number of control obs to match to each case obs // - calipvars: a list of numeric variables for caliper matching // - calipwidth: a list of caliper widths, specifying the maximum distance between case and control variables in each calipvar + // - std_calipvars (optional): a list of numeric variables for caliper matching, but standardized by the in-sample mean and s.d. // // Outputs: // The values of "genvar" are filled with integers that describe each group of matched cases and controls. From 7eca8f5910985df516df8032e1af327123d3f875 Mon Sep 17 00:00:00 2001 From: jethaaly Date: Fri, 28 Oct 2022 19:33:15 -0400 Subject: [PATCH 06/26] Update syntax --- calipmatch.ado | 2 +- test_calipmatch.do | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/calipmatch.ado b/calipmatch.ado index 4292c9e..cbb6796 100644 --- a/calipmatch.ado +++ b/calipmatch.ado @@ -12,7 +12,7 @@ human-readable summary can be accessed at http://creativecommons.org/publicdomai program define calipmatch, sortpreserve rclass version 13.0 - syntax [if] [in], GENerate(name) CASEvar(varname numeric) MAXmatches(numlist integer >0 max=1) CALIPERMatch(varlist numeric) CALIPERWidth(numlist >0) [EXACTmatch(varlist)] [nostandard] + syntax [if] [in], GENerate(name) CASEvar(varname numeric) MAXmatches(numlist integer >0 max=1) CALIPERMatch(varlist numeric) CALIPERWidth(numlist >0) [EXACTmatch(varlist) nostandard] * Verify there are same number of caliper vars as caliper widths if (`: word count `calipermatch'' != `: word count `caliperwidth'') { diff --git a/test_calipmatch.do b/test_calipmatch.do index d01cb9a..d7f687d 100644 --- a/test_calipmatch.do +++ b/test_calipmatch.do @@ -9,7 +9,7 @@ program define test_calipmatch if (_rc==0) { * Assign arguments to locals using the same syntax as calipmatch - syntax [if] [in], GENerate(name) CASEvar(varname numeric) MAXmatches(numlist integer >0 max=1) CALIPERMatch(varlist numeric) CALIPERWidth(numlist >0) [EXACTmatch(varlist)] [nostandard] + syntax [if] [in], GENerate(name) CASEvar(varname numeric) MAXmatches(numlist integer >0 max=1) CALIPERMatch(varlist numeric) CALIPERWidth(numlist >0) [EXACTmatch(varlist) nostandard] * Store returned objects local cases_total = r(cases_total) From fc648cc7bf838b4232ed56f49499427bc7048fe6 Mon Sep 17 00:00:00 2001 From: jethaaly Date: Fri, 28 Oct 2022 19:47:26 -0400 Subject: [PATCH 07/26] Update help file --- calipmatch.sthlp | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/calipmatch.sthlp b/calipmatch.sthlp index 7274064..d67c6af 100644 --- a/calipmatch.sthlp +++ b/calipmatch.sthlp @@ -44,6 +44,7 @@ matching{p_end} {syntab :Optional} {synopt :{opth exactm:atch(varlist)}}list of integer variables to match on exactly{p_end} +{synopt :{opth nostandard(varlist)}} distance using sum of squares; default is standardized sum of squares {p_end} {synoptline} @@ -67,7 +68,7 @@ variables when multiple valid matches exist. {pstd} The cases are processed in random order. For each case, {cmd:calipmatch} searches for matching controls. If -any valid matches exist, it selects the matching control which minimizes the sum of squared differences across +any valid matches exist, it selects the matching control which minimizes the standardized sum of squared differences across caliper matching variables. If {opt maxmatches(#)}>1, then after completing the search for a first matching control observation for each case, the algorithm will search for a second matching control observation for each case, etc. @@ -115,6 +116,8 @@ matching variables, they must also have identical values for every exact matchin {it:int} or {it:long}. This enables speedy exact matching, by ensuring that all values are stored as precise integers. +{phang}{opth nostandard} calculates distance between cases and controls using the sum of squares. +When specified, matches will be sensitive to the scale of caliper variables. This can be used to weight caliper variables. {marker saved_results}{...} {title:Saved results} From f693627e371e512979aa13eb95b2368063ab61ce Mon Sep 17 00:00:00 2001 From: jethaaly Date: Fri, 28 Oct 2022 20:01:06 -0400 Subject: [PATCH 08/26] Update formatting of help file --- calipmatch.sthlp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/calipmatch.sthlp b/calipmatch.sthlp index d67c6af..0dc9d57 100644 --- a/calipmatch.sthlp +++ b/calipmatch.sthlp @@ -44,7 +44,7 @@ matching{p_end} {syntab :Optional} {synopt :{opth exactm:atch(varlist)}}list of integer variables to match on exactly{p_end} -{synopt :{opth nostandard(varlist)}} distance using sum of squares; default is standardized sum of squares {p_end} +{synopt :{bf:nostandard}} distance using sum of squares; default is standardized sum of squares {p_end} {synoptline} @@ -116,7 +116,7 @@ matching variables, they must also have identical values for every exact matchin {it:int} or {it:long}. This enables speedy exact matching, by ensuring that all values are stored as precise integers. -{phang}{opth nostandard} calculates distance between cases and controls using the sum of squares. +{phang}{bf:nostandard} calculates distance between cases and controls using the sum of squares. When specified, matches will be sensitive to the scale of caliper variables. This can be used to weight caliper variables. {marker saved_results}{...} From 6363b384f44af5ad47a0923bda953b8625e66e37 Mon Sep 17 00:00:00 2001 From: jethaaly Date: Fri, 28 Oct 2022 20:06:45 -0400 Subject: [PATCH 09/26] Update formatting and name of option --- calipmatch.ado | 4 ++-- calipmatch.sthlp | 6 +++--- test_calipmatch.do | 2 +- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/calipmatch.ado b/calipmatch.ado index cbb6796..bcd3eec 100644 --- a/calipmatch.ado +++ b/calipmatch.ado @@ -12,7 +12,7 @@ human-readable summary can be accessed at http://creativecommons.org/publicdomai program define calipmatch, sortpreserve rclass version 13.0 - syntax [if] [in], GENerate(name) CASEvar(varname numeric) MAXmatches(numlist integer >0 max=1) CALIPERMatch(varlist numeric) CALIPERWidth(numlist >0) [EXACTmatch(varlist) nostandard] + syntax [if] [in], GENerate(name) CASEvar(varname numeric) MAXmatches(numlist integer >0 max=1) CALIPERMatch(varlist numeric) CALIPERWidth(numlist >0) [EXACTmatch(varlist) nostandardize] * Verify there are same number of caliper vars as caliper widths if (`: word count `calipermatch'' != `: word count `caliperwidth'') { @@ -89,7 +89,7 @@ program define calipmatch, sortpreserve rclass if r(no_matches)==0 { - if ("`nostandard'"=="") { + if ("`nostandardize'"=="") { foreach var of varlist `calipermatch' { tempvar std_`var' qui egen `std_`var'' = std(`var') if `touse' == 1 diff --git a/calipmatch.sthlp b/calipmatch.sthlp index 0dc9d57..bd0a7db 100644 --- a/calipmatch.sthlp +++ b/calipmatch.sthlp @@ -24,7 +24,7 @@ Create a variable indicating groups of matched cases and controls {opt max:matches(#)} {opth caliperm:atch(varlist)} {opth caliperw:idth(numlist)} -[{opth exactm:atch(varlist)}] +[{opth exactm:atch(varlist)} {bf: nostandardize}] {synoptset 23 tabbed}{...} @@ -44,7 +44,7 @@ matching{p_end} {syntab :Optional} {synopt :{opth exactm:atch(varlist)}}list of integer variables to match on exactly{p_end} -{synopt :{bf:nostandard}} distance using sum of squares; default is standardized sum of squares {p_end} +{synopt :{bf:nostandardize}} distance using sum of squares; default is standardized sum of squares {p_end} {synoptline} @@ -116,7 +116,7 @@ matching variables, they must also have identical values for every exact matchin {it:int} or {it:long}. This enables speedy exact matching, by ensuring that all values are stored as precise integers. -{phang}{bf:nostandard} calculates distance between cases and controls using the sum of squares. +{phang}{bf:nostandardize} calculates distance between cases and controls using the sum of squared differences. When specified, matches will be sensitive to the scale of caliper variables. This can be used to weight caliper variables. {marker saved_results}{...} diff --git a/test_calipmatch.do b/test_calipmatch.do index d7f687d..55c73e8 100644 --- a/test_calipmatch.do +++ b/test_calipmatch.do @@ -9,7 +9,7 @@ program define test_calipmatch if (_rc==0) { * Assign arguments to locals using the same syntax as calipmatch - syntax [if] [in], GENerate(name) CASEvar(varname numeric) MAXmatches(numlist integer >0 max=1) CALIPERMatch(varlist numeric) CALIPERWidth(numlist >0) [EXACTmatch(varlist) nostandard] + syntax [if] [in], GENerate(name) CASEvar(varname numeric) MAXmatches(numlist integer >0 max=1) CALIPERMatch(varlist numeric) CALIPERWidth(numlist >0) [EXACTmatch(varlist) nostandardize] * Store returned objects local cases_total = r(cases_total) From 4691c9a1087f4dec1538b7421aca1bfbde568cc6 Mon Sep 17 00:00:00 2001 From: jethaaly Date: Fri, 28 Oct 2022 22:07:38 -0400 Subject: [PATCH 10/26] Fix syntax error and sse test --- calipmatch.ado | 5 +++-- calipmatch.sthlp | 6 +++--- test_calipmatch.do | 53 +++++++++++++++++++++++----------------------- 3 files changed, 32 insertions(+), 32 deletions(-) diff --git a/calipmatch.ado b/calipmatch.ado index bcd3eec..02060c5 100644 --- a/calipmatch.ado +++ b/calipmatch.ado @@ -12,7 +12,7 @@ human-readable summary can be accessed at http://creativecommons.org/publicdomai program define calipmatch, sortpreserve rclass version 13.0 - syntax [if] [in], GENerate(name) CASEvar(varname numeric) MAXmatches(numlist integer >0 max=1) CALIPERMatch(varlist numeric) CALIPERWidth(numlist >0) [EXACTmatch(varlist) nostandardize] + syntax [if] [in], GENerate(name) CASEvar(varname numeric) MAXmatches(numlist integer >0 max=1) CALIPERMatch(varlist numeric) CALIPERWidth(numlist >0) [EXACTmatch(varlist) NOstandardize] * Verify there are same number of caliper vars as caliper widths if (`: word count `calipermatch'' != `: word count `caliperwidth'') { @@ -89,7 +89,7 @@ program define calipmatch, sortpreserve rclass if r(no_matches)==0 { - if ("`nostandardize'"=="") { + if "`nostandardize'"=="" { foreach var of varlist `calipermatch' { tempvar std_`var' qui egen `std_`var'' = std(`var') if `touse' == 1 @@ -97,6 +97,7 @@ program define calipmatch, sortpreserve rclass } mata: _calipmatch(boundaries,"`generate'",`maxmatches',"`calipermatch'","`caliperwidth'", "`std_calipermatch'") } + else { mata: _calipmatch(boundaries,"`generate'",`maxmatches',"`calipermatch'","`caliperwidth'") } diff --git a/calipmatch.sthlp b/calipmatch.sthlp index bd0a7db..beb6474 100644 --- a/calipmatch.sthlp +++ b/calipmatch.sthlp @@ -24,7 +24,7 @@ Create a variable indicating groups of matched cases and controls {opt max:matches(#)} {opth caliperm:atch(varlist)} {opth caliperw:idth(numlist)} -[{opth exactm:atch(varlist)} {bf: nostandardize}] +[{opth exactm:atch(varlist)} {bf: {ul: no}standardize}] {synoptset 23 tabbed}{...} @@ -44,7 +44,7 @@ matching{p_end} {syntab :Optional} {synopt :{opth exactm:atch(varlist)}}list of integer variables to match on exactly{p_end} -{synopt :{bf:nostandardize}} distance using sum of squares; default is standardized sum of squares {p_end} +{synopt :{bf: {ul:no}standardize}} distance using sum of squares; default is standardized sum of squares {p_end} {synoptline} @@ -116,7 +116,7 @@ matching variables, they must also have identical values for every exact matchin {it:int} or {it:long}. This enables speedy exact matching, by ensuring that all values are stored as precise integers. -{phang}{bf:nostandardize} calculates distance between cases and controls using the sum of squared differences. +{phang}{bf: {ul:no}standardize} calculates distance between cases and controls using the sum of squared differences. When specified, matches will be sensitive to the scale of caliper variables. This can be used to weight caliper variables. {marker saved_results}{...} diff --git a/test_calipmatch.do b/test_calipmatch.do index 55c73e8..e9a8487 100644 --- a/test_calipmatch.do +++ b/test_calipmatch.do @@ -9,7 +9,7 @@ program define test_calipmatch if (_rc==0) { * Assign arguments to locals using the same syntax as calipmatch - syntax [if] [in], GENerate(name) CASEvar(varname numeric) MAXmatches(numlist integer >0 max=1) CALIPERMatch(varlist numeric) CALIPERWidth(numlist >0) [EXACTmatch(varlist) nostandardize] + syntax [if] [in], GENerate(name) CASEvar(varname numeric) MAXmatches(numlist integer >0 max=1) CALIPERMatch(varlist numeric) CALIPERWidth(numlist >0) [EXACTmatch(varlist) NOstandardize] * Store returned objects local cases_total = r(cases_total) @@ -345,40 +345,23 @@ replace income_percentile = 52 in 3 replace income_percentile = 41 in 4 replace income_percentile = 55 in 5 -gen byte age = 40 -replace age = 47 in 2 -replace age = 55 in 4 - -gen float sse = (income_percentile - income_percentile[1])^2 + (age - age[1])^2 - -list - +gen int age_days = 14600 +replace age_days = 17155 in 2 +replace age_days = 20075 in 4 + *---------------------------------------------------------------------------- * Valid inputs, test performance of matching algorithm *---------------------------------------------------------------------------- -* matches minimize sum of squares -test_calipmatch, gen(matchgroup) case(case) maxmatches(1) /// - calipermatch(income_percentile age) caliperwidth(100 100) - -sum sse if case==0, meanonly -assert cond(_n==2, sse==r(min), sse!=r(min)) // test that obs 2 is global min - -assert matchgroup == 1 in 2 // test that obs 2 is matched -assert matchgroup == . in 3/5 - -keep case income_percentile age - * matches minimize sum of normalized squares -replace age = 1000*age egen std_income_percentile = std(income_percentile) -egen std_age = std(age) +egen std_age_days = std(age_days) -gen float sse = (income_percentile - income_percentile[1])^2 + (age - age[1])^2 -gen float std_sse = (std_income_percentile - std_income_percentile[1])^2 + (std_age - std_age[1])^2 +gen float std_sse = (std_income_percentile - std_income_percentile[1])^2 + (std_age_days - std_age_days[1])^2 +list test_calipmatch, gen(matchgroup) case(case) maxmatches(1) /// - calipermatch(income_percentile age) caliperwidth(100 100000) + calipermatch(income_percentile age_days) caliperwidth(100 36500) sum std_sse if case==0, meanonly assert cond(_n==2, std_sse==r(min), std_sse!=r(min)) // test that obs 2 is global min @@ -386,7 +369,23 @@ assert cond(_n==2, std_sse==r(min), std_sse!=r(min)) // test that obs 2 is glob assert matchgroup == 1 in 2 // test that obs 2 is matched assert matchgroup == . in 3/5 -keep case income_percentile age +keep case income_percentile age_days + +* matches minimize sum of squares when nostandardize is specified +gen float sse = (income_percentile - income_percentile[1])^2 + (age_days - age_days[1])^2 +list + +test_calipmatch, gen(matchgroup) case(case) maxmatches(1) /// + calipermatch(income_percentile age_days) caliperwidth(100 36500) nostandardize + +sum sse if case==0, meanonly +assert cond(_n==3, sse==r(min), sse!=r(min)) // test that obs 3 is global min + +assert matchgroup == 1 in 3 // test that obs 3 is matched +assert matchgroup == . in 2 +assert matchgroup == . in 4/5 + +keep case income_percentile age_days *---------------------------------------------------------------------------- From 048ca8cce3cdafeefc2ad84bfb46f72f4d2083f7 Mon Sep 17 00:00:00 2001 From: OppInsights-Bot Date: Sat, 29 Oct 2022 02:11:49 +0000 Subject: [PATCH 11/26] README: update embedded calipmatch.sthlp --- README.md | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/README.md b/README.md index e08003d..153ec90 100644 --- a/README.md +++ b/README.md @@ -26,7 +26,7 @@ The help file can be explored interactively in Stata using `help calipmatch`.

calipmatch [if] [in], generate(newvar) casevar(varname) maxmatches(#) calipermatch(varlist) caliperwidth(numlist) [exactmatch( - varlist)] + varlist) nostandardize]

options Description @@ -44,6 +44,8 @@ The help file can be explored interactively in Stata using `help calipmatch`.

Optional exactmatch(varlist) list of integer variables to match on exactly + nostandardize distance using sum of squares; default is + standardized sum of squares -------------------------------------------------------------------------

@@ -66,11 +68,11 @@ The help file can be explored interactively in Stata using `help calipmatch`.

The cases are processed in random order. For each case, calipmatch searches for matching controls. If any valid matches exist, it selects - the matching control which minimizes the sum of squared differences - across caliper matching variables. If maxmatches(#)>1, then after - completing the search for a first matching control observation for each - case, the algorithm will search for a second matching control observation - for each case, etc. + the matching control which minimizes the standardized sum of squared + differences across caliper matching variables. If maxmatches(#)>1, then + after completing the search for a first matching control observation for + each case, the algorithm will search for a second matching control + observation for each case, etc.

Options @@ -119,6 +121,10 @@ The help file can be explored interactively in Stata using `help calipmatch`. This enables speedy exact matching, by ensuring that all values are stored as precise integers.

+ nostandardize calculates distance between cases and controls using the + sum of squared differences. When specified, matches will be + sensitive to the scale of caliper variables. This can be used to + weight caliper variables.

Saved results

From ae2220b0a6a70fa0c8ab3e5a70dcaea796295b50 Mon Sep 17 00:00:00 2001 From: jethaaly Date: Mon, 14 Nov 2022 12:05:25 -0500 Subject: [PATCH 12/26] Update: use su+gen instead of egen,undo changes to _calipmatch function --- calipmatch.ado | 45 +++++++++++++++------------------------------ 1 file changed, 15 insertions(+), 30 deletions(-) diff --git a/calipmatch.ado b/calipmatch.ado index 02060c5..3560d45 100644 --- a/calipmatch.ado +++ b/calipmatch.ado @@ -90,12 +90,18 @@ program define calipmatch, sortpreserve rclass if r(no_matches)==0 { if "`nostandardize'"=="" { + local i = 0 foreach var of varlist `calipermatch' { tempvar std_`var' - qui egen `std_`var'' = std(`var') if `touse' == 1 + local ++i + local width : word `i' of `caliperwidth' + qui su `var' if `touse' `in' + qui gen `std_`var'' = (`var' - r(mean))/r(sd) + local std_`var'_width = `width'/r(sd) local std_calipermatch `std_calipermatch' `std_`var'' + local std_caliperwidth `std_caliperwidth' `std_`var'_width' } - mata: _calipmatch(boundaries,"`generate'",`maxmatches',"`calipermatch'","`caliperwidth'", "`std_calipermatch'") + mata: _calipmatch(boundaries,"`generate'",`maxmatches',"`std_calipermatch'","`std_caliperwidth'") } else { @@ -145,8 +151,7 @@ set matastrict on mata: -void _calipmatch(real matrix boundaries, string scalar genvar, real scalar maxmatch, string scalar calipvars, string scalar calipwidth, -| string scalar std_calipvars) { +void _calipmatch(real matrix boundaries, string scalar genvar, real scalar maxmatch, string scalar calipvars, string scalar calipwidth) { // Objective: // Perform caliper matching using the specified caliper variables and caliper widths, matching each case observation to one or // many controls. Identify the matches within pre-specified groups, and store a variable containing integers that define a group @@ -159,24 +164,20 @@ void _calipmatch(real matrix boundaries, string scalar genvar, real scalar maxma // - maxmatch: a positive integer indicating the maximum number of control obs to match to each case obs // - calipvars: a list of numeric variables for caliper matching // - calipwidth: a list of caliper widths, specifying the maximum distance between case and control variables in each calipvar - // - std_calipvars (optional): a list of numeric variables for caliper matching, but standardized by the in-sample mean and s.d. // // Outputs: // The values of "genvar" are filled with integers that describe each group of matched cases and controls. // - r(matchsuccess) is a Stata return matrix tabulating the number of cases successfully matched to {1, ..., maxmatch} controls - real scalar matchgrp matchgrp = st_varindex(genvar) real rowvector matchvars matchvars = st_varindex(tokens(calipvars)) - real rowvector tolerance tolerance = strtoreal(tokens(calipwidth)) real scalar curmatch curmatch = 0 - real scalar highestmatch highestmatch = 0 @@ -194,15 +195,6 @@ void _calipmatch(real matrix boundaries, string scalar genvar, real scalar maxma real rowvector matchvals real matrix controlvals real matrix diffvals - - if (args() > 5) { - real rowvector std_matchvars - std_matchvars = st_varindex(tokens(std_calipvars)) - - real rowvector std_matchvals - real matrix std_controlvals - real matrix std_diffvals - } for (brow=1; brow<=rows(boundaries); brow++) { @@ -225,22 +217,16 @@ void _calipmatch(real matrix boundaries, string scalar genvar, real scalar maxma curmatch = _st_data(caseobs, matchgrp) } - // Store matchvar values for the case and for the controls that have not yet been matched, and calculate difference + // Store matchvar values for the case and for the controls that have not yet been matched matchvals = st_data(caseobs, matchvars) controlvals = st_data((boundaries[brow,1], boundaries[brow,2]), matchvars) :* editvalue(st_data((boundaries[brow,1], boundaries[brow,2]), matchgrp):==., 0, .) + + // Store difference in matchvar values if they are within tolerance diffvals = (controlvals :- matchvals) - + diffvals = diffvals :* editvalue(abs(diffvals) :<= tolerance, 0, .) + // Find closest control to match - if (args() >5) { - std_matchvals = st_data(caseobs, std_matchvars) - std_controlvals = st_data((boundaries[brow,1], boundaries[brow,2]), std_matchvars) :* editvalue(st_data((boundaries[brow,1], boundaries[brow,2]), matchgrp):==., 0, .) - std_diffvals = (std_controlvals :- std_matchvals) :* editvalue(abs(diffvals) :<= tolerance, 0, .) - minindex(rowsum(std_diffvals :^2, 1), 1, matchedcontrolindex, minties) - } - else { - diffvals = diffvals :* editvalue(abs(diffvals) :<= tolerance, 0, .) - minindex(rowsum(diffvals :^2, 1), 1, matchedcontrolindex, minties) - } + minindex(rowsum(diffvals :^2, 1), 1, matchedcontrolindex, minties) // If a match is found, store it if (rows(matchedcontrolindex)>0) { @@ -268,7 +254,6 @@ void _calipmatch(real matrix boundaries, string scalar genvar, real scalar maxma stata("return clear") st_matrix("r(matchsuccess)",matchsuccess) - } real matrix find_group_boundaries(string scalar grpvars, string scalar casevar, real scalar startobs, real scalar endobs) { From b260505974b54a973c1e94c51c26fa367bd75c82 Mon Sep 17 00:00:00 2001 From: jethaaly Date: Mon, 14 Nov 2022 12:08:27 -0500 Subject: [PATCH 13/26] Format: add spaces between some lines --- calipmatch.ado | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/calipmatch.ado b/calipmatch.ado index 3560d45..0d8e68b 100644 --- a/calipmatch.ado +++ b/calipmatch.ado @@ -168,16 +168,19 @@ void _calipmatch(real matrix boundaries, string scalar genvar, real scalar maxma // Outputs: // The values of "genvar" are filled with integers that describe each group of matched cases and controls. // - r(matchsuccess) is a Stata return matrix tabulating the number of cases successfully matched to {1, ..., maxmatch} controls + real scalar matchgrp matchgrp = st_varindex(genvar) real rowvector matchvars matchvars = st_varindex(tokens(calipvars)) + real rowvector tolerance tolerance = strtoreal(tokens(calipwidth)) real scalar curmatch curmatch = 0 + real scalar highestmatch highestmatch = 0 @@ -254,6 +257,7 @@ void _calipmatch(real matrix boundaries, string scalar genvar, real scalar maxma stata("return clear") st_matrix("r(matchsuccess)",matchsuccess) + } real matrix find_group_boundaries(string scalar grpvars, string scalar casevar, real scalar startobs, real scalar endobs) { From 7769d5fbb52af8ade94aa10117dae4521044a517 Mon Sep 17 00:00:00 2001 From: jethaaly Date: Mon, 14 Nov 2022 21:43:58 -0500 Subject: [PATCH 14/26] Temporary test and required modification to ado file --- calipmatch.ado | 2 +- test_calipmatch.do | 46 +++++++++++++++++++++++++++++++++++++++++++++- 2 files changed, 46 insertions(+), 2 deletions(-) diff --git a/calipmatch.ado b/calipmatch.ado index 0d8e68b..a93a79c 100644 --- a/calipmatch.ado +++ b/calipmatch.ado @@ -9,7 +9,6 @@ human-readable summary can be accessed at http://creativecommons.org/publicdomai */ * Why did I include a formal license? Jeff Atwood gives good reasons: https://blog.codinghorror.com/pick-a-license-any-license/ - program define calipmatch, sortpreserve rclass version 13.0 syntax [if] [in], GENerate(name) CASEvar(varname numeric) MAXmatches(numlist integer >0 max=1) CALIPERMatch(varlist numeric) CALIPERWidth(numlist >0) [EXACTmatch(varlist) NOstandardize] @@ -58,6 +57,7 @@ program define calipmatch, sortpreserve rclass * Sort into groups for caliper matching, randomizing order of cases and controls tempvar rand + set seed 4585239 /// TO REMOVE! gen float `rand'=runiform() sort `touse' `exactmatch' `casevar' `rand' diff --git a/test_calipmatch.do b/test_calipmatch.do index e9a8487..d072592 100644 --- a/test_calipmatch.do +++ b/test_calipmatch.do @@ -387,6 +387,50 @@ assert matchgroup == . in 4/5 keep case income_percentile age_days +*============================================================================ +* New dataset: one caliper matching variable, with different scales +*============================================================================ + +clear +set obs 2000 + +gen byte case=(_n<=200) + +gen byte income_percentile=ceil(runiform() * 100) +gen byte age = 44 + ceil(runiform()*17) + +test_calipmatch, gen(matchgroup_1) case(case) maxmatches(1) /// + calipermatch(income_percentile age) caliperwidth(5 3) + +keep case income_percentile age matchgroup_1 + +test_calipmatch, gen(matchgroup_2) case(case) maxmatches(1) /// + calipermatch(income_percentile age) caliperwidth(5 3) nostandardize + +keep case income_percentile age matchgroup_1 matchgroup_2 + +gen int days_over_44 = (age-44)*365 + +test_calipmatch, gen(matchgroup_3) case(case) maxmatches(1) /// + calipermatch(income_percentile days_over_44) caliperwidth(5 1095) + +keep case income_percentile age days_over_44 matchgroup_1 matchgroup_2 matchgroup_3 + +test_calipmatch, gen(matchgroup_4) case(case) maxmatches(1) /// + calipermatch(income_percentile days_over_44) caliperwidth(5 1095) nostandardize + +keep case income_percentile age days_over_44 matchgroup_1 matchgroup_2 matchgroup_3 matchgroup_4 + +gen std_diff = abs(matchgroup_1 - matchgroup_3) +su std_diff, meanonly +assert r(max) == 0 + +gen diff = abs(matchgroup_2 - matchgroup_4) +su diff, meanonly +assert r(max) != 0 + +keep case income_percentile age + *---------------------------------------------------------------------------- -di "Successfully completed all tests." +di "Successfully completed all tests." \ No newline at end of file From 25761d120584819116ef7528885727f85b78ca03 Mon Sep 17 00:00:00 2001 From: jethaaly Date: Mon, 14 Nov 2022 21:51:36 -0500 Subject: [PATCH 15/26] Fix comment on ado --- calipmatch.ado | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/calipmatch.ado b/calipmatch.ado index a93a79c..3003090 100644 --- a/calipmatch.ado +++ b/calipmatch.ado @@ -57,7 +57,7 @@ program define calipmatch, sortpreserve rclass * Sort into groups for caliper matching, randomizing order of cases and controls tempvar rand - set seed 4585239 /// TO REMOVE! + set seed 4585239 // TO REMOVE! gen float `rand'=runiform() sort `touse' `exactmatch' `casevar' `rand' From badfaf922fe4f88e22b300f5617c21d5f26c65f6 Mon Sep 17 00:00:00 2001 From: jethaaly Date: Mon, 14 Nov 2022 22:19:40 -0500 Subject: [PATCH 16/26] Format edits to test file --- test_calipmatch.do | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/test_calipmatch.do b/test_calipmatch.do index d072592..ee8a672 100644 --- a/test_calipmatch.do +++ b/test_calipmatch.do @@ -388,9 +388,10 @@ assert matchgroup == . in 4/5 keep case income_percentile age_days *============================================================================ -* New dataset: one caliper matching variable, with different scales +* New dataset: two caliper matching variables, with different scales *============================================================================ +* matches are scale invariant clear set obs 2000 @@ -421,12 +422,12 @@ test_calipmatch, gen(matchgroup_4) case(case) maxmatches(1) /// keep case income_percentile age days_over_44 matchgroup_1 matchgroup_2 matchgroup_3 matchgroup_4 -gen std_diff = abs(matchgroup_1 - matchgroup_3) -su std_diff, meanonly +gen match_diffs_std = abs(matchgroup_1 - matchgroup_3) +su match_diffs_std, meanonly assert r(max) == 0 -gen diff = abs(matchgroup_2 - matchgroup_4) -su diff, meanonly +gen match_diffs = abs(matchgroup_2 - matchgroup_4) +su match_diffs, meanonly assert r(max) != 0 keep case income_percentile age From 0af2b16fa5ec849b69d5366b480823fa03d2cf8a Mon Sep 17 00:00:00 2001 From: jethaaly Date: Tue, 15 Nov 2022 13:17:45 -0500 Subject: [PATCH 17/26] Seems to work --- calipmatch.ado | 1 - test_calipmatch.do | 34 ++++++++-------------------------- 2 files changed, 8 insertions(+), 27 deletions(-) diff --git a/calipmatch.ado b/calipmatch.ado index 3003090..8cd4529 100644 --- a/calipmatch.ado +++ b/calipmatch.ado @@ -57,7 +57,6 @@ program define calipmatch, sortpreserve rclass * Sort into groups for caliper matching, randomizing order of cases and controls tempvar rand - set seed 4585239 // TO REMOVE! gen float `rand'=runiform() sort `touse' `exactmatch' `casevar' `rand' diff --git a/test_calipmatch.do b/test_calipmatch.do index ee8a672..bdc8701 100644 --- a/test_calipmatch.do +++ b/test_calipmatch.do @@ -400,37 +400,19 @@ gen byte case=(_n<=200) gen byte income_percentile=ceil(runiform() * 100) gen byte age = 44 + ceil(runiform()*17) +set seed 4585239 +set sortseed 789045789 + test_calipmatch, gen(matchgroup_1) case(case) maxmatches(1) /// calipermatch(income_percentile age) caliperwidth(5 3) -keep case income_percentile age matchgroup_1 - -test_calipmatch, gen(matchgroup_2) case(case) maxmatches(1) /// - calipermatch(income_percentile age) caliperwidth(5 3) nostandardize - -keep case income_percentile age matchgroup_1 matchgroup_2 - -gen int days_over_44 = (age-44)*365 - -test_calipmatch, gen(matchgroup_3) case(case) maxmatches(1) /// - calipermatch(income_percentile days_over_44) caliperwidth(5 1095) +drop casecount matched_case control matched_controls -keep case income_percentile age days_over_44 matchgroup_1 matchgroup_2 matchgroup_3 - -test_calipmatch, gen(matchgroup_4) case(case) maxmatches(1) /// - calipermatch(income_percentile days_over_44) caliperwidth(5 1095) nostandardize - -keep case income_percentile age days_over_44 matchgroup_1 matchgroup_2 matchgroup_3 matchgroup_4 - -gen match_diffs_std = abs(matchgroup_1 - matchgroup_3) -su match_diffs_std, meanonly -assert r(max) == 0 - -gen match_diffs = abs(matchgroup_2 - matchgroup_4) -su match_diffs, meanonly -assert r(max) != 0 +set seed 4585239 +set sortseed 789045789 -keep case income_percentile age +test_calipmatch, gen(matchgroup_2) case(case) maxmatches(1) /// + calipermatch(income_percentile age) caliperwidth(5 3) *---------------------------------------------------------------------------- From 02fee72b7d89fe5574c0fa86d0995c5857d443a4 Mon Sep 17 00:00:00 2001 From: jethaaly Date: Tue, 15 Nov 2022 13:33:17 -0500 Subject: [PATCH 18/26] WIP: testfor scale differences --- test_calipmatch.do | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/test_calipmatch.do b/test_calipmatch.do index bdc8701..65b3ca6 100644 --- a/test_calipmatch.do +++ b/test_calipmatch.do @@ -399,6 +399,7 @@ gen byte case=(_n<=200) gen byte income_percentile=ceil(runiform() * 100) gen byte age = 44 + ceil(runiform()*17) +gen int days_over_44 = (age - 44)*365 set seed 4585239 set sortseed 789045789 @@ -412,7 +413,7 @@ set seed 4585239 set sortseed 789045789 test_calipmatch, gen(matchgroup_2) case(case) maxmatches(1) /// - calipermatch(income_percentile age) caliperwidth(5 3) + calipermatch(income_percentile days_over_44) caliperwidth(5 1095) *---------------------------------------------------------------------------- From f20af1394274ca27dad432a4c62308e3dc21b5ee Mon Sep 17 00:00:00 2001 From: jethaaly Date: Tue, 15 Nov 2022 13:39:16 -0500 Subject: [PATCH 19/26] Add: test matches are scale invariant --- test_calipmatch.do | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/test_calipmatch.do b/test_calipmatch.do index 65b3ca6..ae061c1 100644 --- a/test_calipmatch.do +++ b/test_calipmatch.do @@ -415,6 +415,32 @@ set sortseed 789045789 test_calipmatch, gen(matchgroup_2) case(case) maxmatches(1) /// calipermatch(income_percentile days_over_44) caliperwidth(5 1095) +drop casecount matched_case control matched_controls + +gen match_diffs_std = abs(matchgroup_1 - matchgroup_2) +su match_diffs_std, meanonly +assert r(max) == 0 + +set seed 4585239 +set sortseed 789045789 + +test_calipmatch, gen(matchgroup_3) case(case) maxmatches(1) /// + calipermatch(income_percentile age) caliperwidth(5 3) nostandardize + +drop casecount matched_case control matched_controls + +set seed 4585239 +set sortseed 789045789 + +test_calipmatch, gen(matchgroup_4) case(case) maxmatches(1) /// + calipermatch(income_percentile days_over_44) caliperwidth(5 1095) nostandardize + +gen match_diffs = abs(matchgroup_3 - matchgroup_4) +su match_diffs, meanonly +assert r(max) != 0 + +keep case income_percentile age + *---------------------------------------------------------------------------- di "Successfully completed all tests." \ No newline at end of file From c3b39549655ba41ea53e4e24eb33c1a2ecc45e85 Mon Sep 17 00:00:00 2001 From: jethaaly Date: Tue, 15 Nov 2022 13:43:40 -0500 Subject: [PATCH 20/26] Add test scale invariance of matches --- test_calipmatch.do | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/test_calipmatch.do b/test_calipmatch.do index ae061c1..938e0a5 100644 --- a/test_calipmatch.do +++ b/test_calipmatch.do @@ -391,6 +391,10 @@ keep case income_percentile age_days * New dataset: two caliper matching variables, with different scales *============================================================================ +*---------------------------------------------------------------------------- +* Valid inputs, test performance of matching algorithm +*---------------------------------------------------------------------------- + * matches are scale invariant clear set obs 2000 @@ -421,6 +425,7 @@ gen match_diffs_std = abs(matchgroup_1 - matchgroup_2) su match_diffs_std, meanonly assert r(max) == 0 +* matches are scale dependent when nostandardize is specified set seed 4585239 set sortseed 789045789 From 7f644e9d2b2077ac74b6c4826db309e7e500eb2c Mon Sep 17 00:00:00 2001 From: jethaaly Date: Tue, 15 Nov 2022 13:53:21 -0500 Subject: [PATCH 21/26] Format test of scale and shift invariance --- test_calipmatch.do | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/test_calipmatch.do b/test_calipmatch.do index 938e0a5..c7afee3 100644 --- a/test_calipmatch.do +++ b/test_calipmatch.do @@ -388,14 +388,9 @@ assert matchgroup == . in 4/5 keep case income_percentile age_days *============================================================================ -* New dataset: two caliper matching variables, with different scales +* New dataset: two caliper matching variables, with scaling and a shift *============================================================================ -*---------------------------------------------------------------------------- -* Valid inputs, test performance of matching algorithm -*---------------------------------------------------------------------------- - -* matches are scale invariant clear set obs 2000 @@ -405,6 +400,11 @@ gen byte income_percentile=ceil(runiform() * 100) gen byte age = 44 + ceil(runiform()*17) gen int days_over_44 = (age - 44)*365 +*---------------------------------------------------------------------------- +* Valid inputs, test performance of matching algorithm +*---------------------------------------------------------------------------- + +* matches are scale and shift invariant set seed 4585239 set sortseed 789045789 @@ -425,7 +425,7 @@ gen match_diffs_std = abs(matchgroup_1 - matchgroup_2) su match_diffs_std, meanonly assert r(max) == 0 -* matches are scale dependent when nostandardize is specified +* matches are scale and shift dependent when nostandardize is specified set seed 4585239 set sortseed 789045789 From 7a8b8a07feadbfa3238678cc1d16e62b898cd0cf Mon Sep 17 00:00:00 2001 From: Michael Stepner Date: Wed, 16 Nov 2022 09:26:03 -0500 Subject: [PATCH 22/26] Tweak formatting & efficiency of standardizing code --- calipmatch.ado | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/calipmatch.ado b/calipmatch.ado index 8cd4529..09f7d72 100644 --- a/calipmatch.ado +++ b/calipmatch.ado @@ -9,9 +9,10 @@ human-readable summary can be accessed at http://creativecommons.org/publicdomai */ * Why did I include a formal license? Jeff Atwood gives good reasons: https://blog.codinghorror.com/pick-a-license-any-license/ + program define calipmatch, sortpreserve rclass version 13.0 - syntax [if] [in], GENerate(name) CASEvar(varname numeric) MAXmatches(numlist integer >0 max=1) CALIPERMatch(varlist numeric) CALIPERWidth(numlist >0) [EXACTmatch(varlist) NOstandardize] + syntax [if] [in], GENerate(name) CASEvar(varname numeric) MAXmatches(numlist integer >0 max=1) CALIPERMatch(varlist numeric) CALIPERWidth(numlist >0) [EXACTmatch(varlist) nostandardize] * Verify there are same number of caliper vars as caliper widths if (`: word count `calipermatch'' != `: word count `caliperwidth'') { @@ -89,25 +90,28 @@ program define calipmatch, sortpreserve rclass if r(no_matches)==0 { if "`nostandardize'"=="" { + * Create standardized caliper vars (subtract mean, divide by SD) local i = 0 foreach var of varlist `calipermatch' { + local ++i tempvar std_`var' - local ++i local width : word `i' of `caliperwidth' - qui su `var' if `touse' `in' - qui gen `std_`var'' = (`var' - r(mean))/r(sd) - local std_`var'_width = `width'/r(sd) + + qui sum `var' in `=_N-`insample_total'+1'/`=_N' + qui gen `std_`var'' = (`var' - r(mean)) / r(sd) in `=_N-`insample_total'+1'/`=_N' + local std_calipermatch `std_calipermatch' `std_`var'' - local std_caliperwidth `std_caliperwidth' `std_`var'_width' + local std_caliperwidth `std_caliperwidth' `=`width'/r(sd)' } + mata: _calipmatch(boundaries,"`generate'",`maxmatches',"`std_calipermatch'","`std_caliperwidth'") } - else { mata: _calipmatch(boundaries,"`generate'",`maxmatches',"`calipermatch'","`caliperwidth'") } qui compress `generate' + matrix `case_matches' = r(matchsuccess) matrix `case_matches' = (`cases_total' - `case_matches''* J(rowsof(`case_matches'),1,1)) \ `case_matches' } From e2083619d0ce4ff4c7dbaab08297e8b782093f78 Mon Sep 17 00:00:00 2001 From: Michael Stepner Date: Wed, 16 Nov 2022 09:28:10 -0500 Subject: [PATCH 23/26] Further formatting tweaks --- calipmatch.ado | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/calipmatch.ado b/calipmatch.ado index 09f7d72..560762f 100644 --- a/calipmatch.ado +++ b/calipmatch.ado @@ -94,14 +94,13 @@ program define calipmatch, sortpreserve rclass local i = 0 foreach var of varlist `calipermatch' { local ++i - tempvar std_`var' - local width : word `i' of `caliperwidth' + tempvar std_`var' qui sum `var' in `=_N-`insample_total'+1'/`=_N' qui gen `std_`var'' = (`var' - r(mean)) / r(sd) in `=_N-`insample_total'+1'/`=_N' local std_calipermatch `std_calipermatch' `std_`var'' - local std_caliperwidth `std_caliperwidth' `=`width'/r(sd)' + local std_caliperwidth `std_caliperwidth' `=`: word `i' of `caliperwidth'' / r(sd)' } mata: _calipmatch(boundaries,"`generate'",`maxmatches',"`std_calipermatch'","`std_caliperwidth'") From 28d202af0b987b955d5463f357dbcf7748377f16 Mon Sep 17 00:00:00 2001 From: jethaaly Date: Wed, 16 Nov 2022 11:16:48 -0500 Subject: [PATCH 24/26] Bug fix: capitalize NOstandardize option --- calipmatch.ado | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/calipmatch.ado b/calipmatch.ado index 560762f..ae7c0ea 100644 --- a/calipmatch.ado +++ b/calipmatch.ado @@ -12,7 +12,7 @@ human-readable summary can be accessed at http://creativecommons.org/publicdomai program define calipmatch, sortpreserve rclass version 13.0 - syntax [if] [in], GENerate(name) CASEvar(varname numeric) MAXmatches(numlist integer >0 max=1) CALIPERMatch(varlist numeric) CALIPERWidth(numlist >0) [EXACTmatch(varlist) nostandardize] + syntax [if] [in], GENerate(name) CASEvar(varname numeric) MAXmatches(numlist integer >0 max=1) CALIPERMatch(varlist numeric) CALIPERWidth(numlist >0) [EXACTmatch(varlist) NOstandardize] * Verify there are same number of caliper vars as caliper widths if (`: word count `calipermatch'' != `: word count `caliperwidth'') { From f877349fd64d6f8226015dbcc57340feede52c30 Mon Sep 17 00:00:00 2001 From: jethaaly Date: Wed, 16 Nov 2022 11:41:02 -0500 Subject: [PATCH 25/26] Update syntax to nostandardize --- calipmatch.ado | 4 ++-- calipmatch.sthlp | 6 +++--- test_calipmatch.do | 2 +- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/calipmatch.ado b/calipmatch.ado index ae7c0ea..41ac156 100644 --- a/calipmatch.ado +++ b/calipmatch.ado @@ -12,7 +12,7 @@ human-readable summary can be accessed at http://creativecommons.org/publicdomai program define calipmatch, sortpreserve rclass version 13.0 - syntax [if] [in], GENerate(name) CASEvar(varname numeric) MAXmatches(numlist integer >0 max=1) CALIPERMatch(varlist numeric) CALIPERWidth(numlist >0) [EXACTmatch(varlist) NOstandardize] + syntax [if] [in], GENerate(name) CASEvar(varname numeric) MAXmatches(numlist integer >0 max=1) CALIPERMatch(varlist numeric) CALIPERWidth(numlist >0) [EXACTmatch(varlist) nostandardize] * Verify there are same number of caliper vars as caliper widths if (`: word count `calipermatch'' != `: word count `caliperwidth'') { @@ -89,7 +89,7 @@ program define calipmatch, sortpreserve rclass if r(no_matches)==0 { - if "`nostandardize'"=="" { + if "`standardize'"=="" { * Create standardized caliper vars (subtract mean, divide by SD) local i = 0 foreach var of varlist `calipermatch' { diff --git a/calipmatch.sthlp b/calipmatch.sthlp index beb6474..b9d2980 100644 --- a/calipmatch.sthlp +++ b/calipmatch.sthlp @@ -24,7 +24,7 @@ Create a variable indicating groups of matched cases and controls {opt max:matches(#)} {opth caliperm:atch(varlist)} {opth caliperw:idth(numlist)} -[{opth exactm:atch(varlist)} {bf: {ul: no}standardize}] +[{opth exactm:atch(varlist)} {bf: nostandardize}] {synoptset 23 tabbed}{...} @@ -44,7 +44,7 @@ matching{p_end} {syntab :Optional} {synopt :{opth exactm:atch(varlist)}}list of integer variables to match on exactly{p_end} -{synopt :{bf: {ul:no}standardize}} distance using sum of squares; default is standardized sum of squares {p_end} +{synopt :{bf: nostandardize}} distance using sum of squares; default is standardized sum of squares {p_end} {synoptline} @@ -116,7 +116,7 @@ matching variables, they must also have identical values for every exact matchin {it:int} or {it:long}. This enables speedy exact matching, by ensuring that all values are stored as precise integers. -{phang}{bf: {ul:no}standardize} calculates distance between cases and controls using the sum of squared differences. +{phang}{bf: nostandardize} calculates distance between cases and controls using the sum of squared differences. When specified, matches will be sensitive to the scale of caliper variables. This can be used to weight caliper variables. {marker saved_results}{...} diff --git a/test_calipmatch.do b/test_calipmatch.do index c7afee3..84a8494 100644 --- a/test_calipmatch.do +++ b/test_calipmatch.do @@ -9,7 +9,7 @@ program define test_calipmatch if (_rc==0) { * Assign arguments to locals using the same syntax as calipmatch - syntax [if] [in], GENerate(name) CASEvar(varname numeric) MAXmatches(numlist integer >0 max=1) CALIPERMatch(varlist numeric) CALIPERWidth(numlist >0) [EXACTmatch(varlist) NOstandardize] + syntax [if] [in], GENerate(name) CASEvar(varname numeric) MAXmatches(numlist integer >0 max=1) CALIPERMatch(varlist numeric) CALIPERWidth(numlist >0) [EXACTmatch(varlist) nostandardize] * Store returned objects local cases_total = r(cases_total) From 4ae6eaf63d3401886db3c825f0301b4a6e126145 Mon Sep 17 00:00:00 2001 From: OppInsights-Bot Date: Wed, 16 Nov 2022 16:42:10 +0000 Subject: [PATCH 26/26] README: update embedded calipmatch.sthlp --- README.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index 153ec90..5760e17 100644 --- a/README.md +++ b/README.md @@ -26,7 +26,7 @@ The help file can be explored interactively in Stata using `help calipmatch`.

calipmatch [if] [in], generate(newvar) casevar(varname) maxmatches(#) calipermatch(varlist) caliperwidth(numlist) [exactmatch( - varlist) nostandardize] + varlist) nostandardize]

options Description @@ -44,7 +44,7 @@ The help file can be explored interactively in Stata using `help calipmatch`.

Optional exactmatch(varlist) list of integer variables to match on exactly - nostandardize distance using sum of squares; default is + nostandardize distance using sum of squares; default is standardized sum of squares -------------------------------------------------------------------------

@@ -121,7 +121,7 @@ The help file can be explored interactively in Stata using `help calipmatch`. This enables speedy exact matching, by ensuring that all values are stored as precise integers.

- nostandardize calculates distance between cases and controls using the + nostandardize calculates distance between cases and controls using the sum of squared differences. When specified, matches will be sensitive to the scale of caliper variables. This can be used to weight caliper variables.