From 53a0c333aaa11a1d1fe6430c8a84457b1a805c3f Mon Sep 17 00:00:00 2001 From: Michael Stepner Date: Sat, 26 Oct 2019 16:46:09 -0400 Subject: [PATCH 01/50] Make caliper matching less greedy Before: Found up to maxmatches for each case, then tried to find up to maxmatches for the next case. So some cases would go unmatched because the only available control was used as an nth match for a previous case. Now: Find a first match for each case, then a second, etc, up to maxmatches. So any case that can have at least one match gets at least one. --- calipmatch.ado | 70 ++++++++++++++++++++++++++++++++------------------ 1 file changed, 45 insertions(+), 25 deletions(-) diff --git a/calipmatch.ado b/calipmatch.ado index 0f54c29..c0c31d6 100644 --- a/calipmatch.ado +++ b/calipmatch.ado @@ -1,4 +1,4 @@ -*! version 1.0.0 9may2017 Michael Stepner and Allan Garland, stepner@mit.edu +*! version 1.1.0 26oct2019 Michael Stepner and Allan Garland, stepner@mit.edu /* CC0 license information: To the extent possible under law, the author has dedicated all copyright and related and neighboring rights @@ -148,52 +148,72 @@ void _calipmatch(real matrix boundaries, string scalar genvar, real scalar maxma real scalar curmatch curmatch = 0 + real scalar highestmatch + highestmatch = 0 real colvector matchsuccess matchsuccess = J(maxmatch, 1, 0) real scalar brow + real rowvector casematchcount + real scalar caseindex real scalar caseobs real scalar controlobs - real scalar casematchcount + real scalar matchattempt real rowvector matchvals real rowvector controlvals real matrix matchbounds for (brow=1; brow<=rows(boundaries); brow++) { - for (caseobs=boundaries[brow,3]; caseobs<=boundaries[brow,4]; caseobs++) { - - curmatch++ - casematchcount=0 - _st_store(caseobs, matchgrp, curmatch) - - matchvals = st_data(caseobs, matchvars) - matchbounds = (matchvals-tolerance)\(matchvals+tolerance) + casematchcount = J(boundaries[brow,4] - boundaries[brow,3] + 1, 1, 0) + + for (matchattempt=1; matchattempt<=maxmatch; matchattempt++) { + + for (caseobs=boundaries[brow,3]; caseobs<=boundaries[brow,4]; caseobs++) { - for (controlobs=boundaries[brow,1]; controlobs<=boundaries[brow,2]; controlobs++) { + caseindex = caseobs - boundaries[brow,3] + 1 - if (_st_data(controlobs, matchgrp)!=.) continue + if (matchattempt==1) { + highestmatch++ + curmatch = highestmatch + _st_store(caseobs, matchgrp, curmatch) + } + else { + if (casematchcount[caseindex,1] < matchattempt - 1) continue + curmatch = _st_data(caseobs, matchgrp) + } + + matchvals = st_data(caseobs, matchvars) + matchbounds = (matchvals-tolerance)\(matchvals+tolerance) - controlvals = st_data(controlobs, matchvars) + for (controlobs=boundaries[brow,1]; controlobs<=boundaries[brow,2]; controlobs++) { + + if (_st_data(controlobs, matchgrp)!=.) continue + + controlvals = st_data(controlobs, matchvars) + + if (controlvals>=matchbounds[1,.] & controlvals<=matchbounds[2,.]) { + casematchcount[caseindex,1] = casematchcount[caseindex,1] + 1 + _st_store(controlobs, matchgrp, curmatch) + break + } - if (controlvals>=matchbounds[1,.] & controlvals<=matchbounds[2,.]) { - casematchcount++ - _st_store(controlobs, matchgrp, curmatch) } - if (casematchcount==maxmatch) break - - } + if (matchattempt==1 & casematchcount[caseindex,1]==0) { + highestmatch-- + _st_store(caseobs, matchgrp, .) + } - if (casematchcount==0) { - curmatch-- - _st_store(caseobs, matchgrp, .) - } - else { - matchsuccess[casematchcount,1] = matchsuccess[casematchcount,1]+1 } + } + for (caseindex=1; caseindex <= boundaries[brow,4] - boundaries[brow,3] + 1; caseindex++) { + matchattempt = casematchcount[caseindex,1] + if (matchattempt > 0) { + matchsuccess[matchattempt,1] = matchsuccess[matchattempt,1] + 1 + } } } From c28ce5611150615d2f1e94085740cdf09fcd8dd7 Mon Sep 17 00:00:00 2001 From: Michael Stepner Date: Sat, 26 Oct 2019 17:00:21 -0400 Subject: [PATCH 02/50] Make caliper matches closer When multiple controls exist within the caliper tolerance, always select the closest one (in terms of sum of squared errors) --- calipmatch.ado | 29 ++++++++++++++++------------- 1 file changed, 16 insertions(+), 13 deletions(-) diff --git a/calipmatch.ado b/calipmatch.ado index c0c31d6..1e6e770 100644 --- a/calipmatch.ado +++ b/calipmatch.ado @@ -157,12 +157,14 @@ void _calipmatch(real matrix boundaries, string scalar genvar, real scalar maxma real scalar brow real rowvector casematchcount real scalar caseindex + real colvector matchedcontrolindex + real matrix minties real scalar caseobs real scalar controlobs real scalar matchattempt real rowvector matchvals - real rowvector controlvals - real matrix matchbounds + real matrix controlvals + real matrix diffvals for (brow=1; brow<=rows(boundaries); brow++) { @@ -184,23 +186,24 @@ void _calipmatch(real matrix boundaries, string scalar genvar, real scalar maxma curmatch = _st_data(caseobs, matchgrp) } + // Store matchvar values for the case and for the controls that have not yet been matched matchvals = st_data(caseobs, matchvars) - matchbounds = (matchvals-tolerance)\(matchvals+tolerance) + controlvals = st_data((boundaries[brow,1], boundaries[brow,2]), matchvars) :* editvalue(st_data((boundaries[brow,1], boundaries[brow,2]), matchgrp):==., 0, .) - for (controlobs=boundaries[brow,1]; controlobs<=boundaries[brow,2]; controlobs++) { + // Store difference in matchvar values if they are within tolerance + diffvals = (controlvals :- matchvals) + diffvals = diffvals :* editvalue(abs(diffvals) :<= tolerance, 0, .) - if (_st_data(controlobs, matchgrp)!=.) continue - - controlvals = st_data(controlobs, matchvars) - - if (controlvals>=matchbounds[1,.] & controlvals<=matchbounds[2,.]) { - casematchcount[caseindex,1] = casematchcount[caseindex,1] + 1 - _st_store(controlobs, matchgrp, curmatch) - break - } + // Find closest case + minindex(rowsum(diffvals :^2, 1), 1, matchedcontrolindex, minties) + // If a match is found, store it + if (rows(matchedcontrolindex)>0) { + casematchcount[caseindex,1] = casematchcount[caseindex,1] + 1 + _st_store(boundaries[brow,1] + matchedcontrolindex[1,1] - 1, matchgrp, curmatch) } + // If zero matches were found for a case, remove its matchgrp value and reuse it for the next case if (matchattempt==1 & casematchcount[caseindex,1]==0) { highestmatch-- _st_store(caseobs, matchgrp, .) From 060c3dcd10a921a566659f3fe445e3f55fafed52 Mon Sep 17 00:00:00 2001 From: Michael Stepner Date: Sat, 26 Oct 2019 17:02:01 -0400 Subject: [PATCH 03/50] Fix a comment --- calipmatch.ado | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/calipmatch.ado b/calipmatch.ado index 1e6e770..79975c6 100644 --- a/calipmatch.ado +++ b/calipmatch.ado @@ -194,7 +194,7 @@ void _calipmatch(real matrix boundaries, string scalar genvar, real scalar maxma diffvals = (controlvals :- matchvals) diffvals = diffvals :* editvalue(abs(diffvals) :<= tolerance, 0, .) - // Find closest case + // Find closest control to match minindex(rowsum(diffvals :^2, 1), 1, matchedcontrolindex, minties) // If a match is found, store it From 80f93ee5ecfe4305b0ff5972b4e3db6ab97e511e Mon Sep 17 00:00:00 2001 From: Michael Stepner Date: Fri, 10 Jun 2022 17:02:05 -0400 Subject: [PATCH 04/50] Store CC0 license only once in LICENSE --- LICENSE | 125 +++++++++++++++++++++++++++++++++++++++++-- LICENSE_cc0_fulltext | 121 ----------------------------------------- 2 files changed, 120 insertions(+), 126 deletions(-) delete mode 100644 LICENSE_cc0_fulltext diff --git a/LICENSE b/LICENSE index 762776b..1625c17 100644 --- a/LICENSE +++ b/LICENSE @@ -1,6 +1,121 @@ -To the extent possible under law, the author has dedicated all copyright and related and neighboring rights -to this software to the public domain worldwide. This software is distributed without any warranty. +Creative Commons Legal Code -This code is licensed under the CC0 1.0 Universal license. You should have received a copy of the -CC0 Public Domain Dedication along with this software (see the file 'LICENSE_cc0_fulltext'). A human-readable -summary as well as the full legal text can be accessed at http://creativecommons.org/publicdomain/zero/1.0/ \ No newline at end of file +CC0 1.0 Universal + + CREATIVE COMMONS CORPORATION IS NOT A LAW FIRM AND DOES NOT PROVIDE + LEGAL SERVICES. DISTRIBUTION OF THIS DOCUMENT DOES NOT CREATE AN + ATTORNEY-CLIENT RELATIONSHIP. CREATIVE COMMONS PROVIDES THIS + INFORMATION ON AN "AS-IS" BASIS. CREATIVE COMMONS MAKES NO WARRANTIES + REGARDING THE USE OF THIS DOCUMENT OR THE INFORMATION OR WORKS + PROVIDED HEREUNDER, AND DISCLAIMS LIABILITY FOR DAMAGES RESULTING FROM + THE USE OF THIS DOCUMENT OR THE INFORMATION OR WORKS PROVIDED + HEREUNDER. + +Statement of Purpose + +The laws of most jurisdictions throughout the world automatically confer +exclusive Copyright and Related Rights (defined below) upon the creator +and subsequent owner(s) (each and all, an "owner") of an original work of +authorship and/or a database (each, a "Work"). + +Certain owners wish to permanently relinquish those rights to a Work for +the purpose of contributing to a commons of creative, cultural and +scientific works ("Commons") that the public can reliably and without fear +of later claims of infringement build upon, modify, incorporate in other +works, reuse and redistribute as freely as possible in any form whatsoever +and for any purposes, including without limitation commercial purposes. +These owners may contribute to the Commons to promote the ideal of a free +culture and the further production of creative, cultural and scientific +works, or to gain reputation or greater distribution for their Work in +part through the use and efforts of others. + +For these and/or other purposes and motivations, and without any +expectation of additional consideration or compensation, the person +associating CC0 with a Work (the "Affirmer"), to the extent that he or she +is an owner of Copyright and Related Rights in the Work, voluntarily +elects to apply CC0 to the Work and publicly distribute the Work under its +terms, with knowledge of his or her Copyright and Related Rights in the +Work and the meaning and intended legal effect of CC0 on those rights. + +1. Copyright and Related Rights. A Work made available under CC0 may be +protected by copyright and related or neighboring rights ("Copyright and +Related Rights"). Copyright and Related Rights include, but are not +limited to, the following: + + i. the right to reproduce, adapt, distribute, perform, display, + communicate, and translate a Work; + ii. moral rights retained by the original author(s) and/or performer(s); +iii. publicity and privacy rights pertaining to a person's image or + likeness depicted in a Work; + iv. rights protecting against unfair competition in regards to a Work, + subject to the limitations in paragraph 4(a), below; + v. rights protecting the extraction, dissemination, use and reuse of data + in a Work; + vi. database rights (such as those arising under Directive 96/9/EC of the + European Parliament and of the Council of 11 March 1996 on the legal + protection of databases, and under any national implementation + thereof, including any amended or successor version of such + directive); and +vii. other similar, equivalent or corresponding rights throughout the + world based on applicable law or treaty, and any national + implementations thereof. + +2. Waiver. To the greatest extent permitted by, but not in contravention +of, applicable law, Affirmer hereby overtly, fully, permanently, +irrevocably and unconditionally waives, abandons, and surrenders all of +Affirmer's Copyright and Related Rights and associated claims and causes +of action, whether now known or unknown (including existing as well as +future claims and causes of action), in the Work (i) in all territories +worldwide, (ii) for the maximum duration provided by applicable law or +treaty (including future time extensions), (iii) in any current or future +medium and for any number of copies, and (iv) for any purpose whatsoever, +including without limitation commercial, advertising or promotional +purposes (the "Waiver"). Affirmer makes the Waiver for the benefit of each +member of the public at large and to the detriment of Affirmer's heirs and +successors, fully intending that such Waiver shall not be subject to +revocation, rescission, cancellation, termination, or any other legal or +equitable action to disrupt the quiet enjoyment of the Work by the public +as contemplated by Affirmer's express Statement of Purpose. + +3. Public License Fallback. Should any part of the Waiver for any reason +be judged legally invalid or ineffective under applicable law, then the +Waiver shall be preserved to the maximum extent permitted taking into +account Affirmer's express Statement of Purpose. In addition, to the +extent the Waiver is so judged Affirmer hereby grants to each affected +person a royalty-free, non transferable, non sublicensable, non exclusive, +irrevocable and unconditional license to exercise Affirmer's Copyright and +Related Rights in the Work (i) in all territories worldwide, (ii) for the +maximum duration provided by applicable law or treaty (including future +time extensions), (iii) in any current or future medium and for any number +of copies, and (iv) for any purpose whatsoever, including without +limitation commercial, advertising or promotional purposes (the +"License"). The License shall be deemed effective as of the date CC0 was +applied by Affirmer to the Work. Should any part of the License for any +reason be judged legally invalid or ineffective under applicable law, such +partial invalidity or ineffectiveness shall not invalidate the remainder +of the License, and in such case Affirmer hereby affirms that he or she +will not (i) exercise any of his or her remaining Copyright and Related +Rights in the Work or (ii) assert any associated claims and causes of +action with respect to the Work, in either case contrary to Affirmer's +express Statement of Purpose. + +4. Limitations and Disclaimers. + + a. No trademark or patent rights held by Affirmer are waived, abandoned, + surrendered, licensed or otherwise affected by this document. + b. Affirmer offers the Work as-is and makes no representations or + warranties of any kind concerning the Work, express, implied, + statutory or otherwise, including without limitation warranties of + title, merchantability, fitness for a particular purpose, non + infringement, or the absence of latent or other defects, accuracy, or + the present or absence of errors, whether or not discoverable, all to + the greatest extent permissible under applicable law. + c. Affirmer disclaims responsibility for clearing rights of other persons + that may apply to the Work or any use thereof, including without + limitation any person's Copyright and Related Rights in the Work. + Further, Affirmer disclaims responsibility for obtaining any necessary + consents, permissions or other rights required for any use of the + Work. + d. Affirmer understands and acknowledges that Creative Commons is not a + party to this document and has no duty or obligation with respect to + this CC0 or use of the Work. \ No newline at end of file diff --git a/LICENSE_cc0_fulltext b/LICENSE_cc0_fulltext deleted file mode 100644 index 1625c17..0000000 --- a/LICENSE_cc0_fulltext +++ /dev/null @@ -1,121 +0,0 @@ -Creative Commons Legal Code - -CC0 1.0 Universal - - CREATIVE COMMONS CORPORATION IS NOT A LAW FIRM AND DOES NOT PROVIDE - LEGAL SERVICES. DISTRIBUTION OF THIS DOCUMENT DOES NOT CREATE AN - ATTORNEY-CLIENT RELATIONSHIP. CREATIVE COMMONS PROVIDES THIS - INFORMATION ON AN "AS-IS" BASIS. CREATIVE COMMONS MAKES NO WARRANTIES - REGARDING THE USE OF THIS DOCUMENT OR THE INFORMATION OR WORKS - PROVIDED HEREUNDER, AND DISCLAIMS LIABILITY FOR DAMAGES RESULTING FROM - THE USE OF THIS DOCUMENT OR THE INFORMATION OR WORKS PROVIDED - HEREUNDER. - -Statement of Purpose - -The laws of most jurisdictions throughout the world automatically confer -exclusive Copyright and Related Rights (defined below) upon the creator -and subsequent owner(s) (each and all, an "owner") of an original work of -authorship and/or a database (each, a "Work"). - -Certain owners wish to permanently relinquish those rights to a Work for -the purpose of contributing to a commons of creative, cultural and -scientific works ("Commons") that the public can reliably and without fear -of later claims of infringement build upon, modify, incorporate in other -works, reuse and redistribute as freely as possible in any form whatsoever -and for any purposes, including without limitation commercial purposes. -These owners may contribute to the Commons to promote the ideal of a free -culture and the further production of creative, cultural and scientific -works, or to gain reputation or greater distribution for their Work in -part through the use and efforts of others. - -For these and/or other purposes and motivations, and without any -expectation of additional consideration or compensation, the person -associating CC0 with a Work (the "Affirmer"), to the extent that he or she -is an owner of Copyright and Related Rights in the Work, voluntarily -elects to apply CC0 to the Work and publicly distribute the Work under its -terms, with knowledge of his or her Copyright and Related Rights in the -Work and the meaning and intended legal effect of CC0 on those rights. - -1. Copyright and Related Rights. A Work made available under CC0 may be -protected by copyright and related or neighboring rights ("Copyright and -Related Rights"). Copyright and Related Rights include, but are not -limited to, the following: - - i. the right to reproduce, adapt, distribute, perform, display, - communicate, and translate a Work; - ii. moral rights retained by the original author(s) and/or performer(s); -iii. publicity and privacy rights pertaining to a person's image or - likeness depicted in a Work; - iv. rights protecting against unfair competition in regards to a Work, - subject to the limitations in paragraph 4(a), below; - v. rights protecting the extraction, dissemination, use and reuse of data - in a Work; - vi. database rights (such as those arising under Directive 96/9/EC of the - European Parliament and of the Council of 11 March 1996 on the legal - protection of databases, and under any national implementation - thereof, including any amended or successor version of such - directive); and -vii. other similar, equivalent or corresponding rights throughout the - world based on applicable law or treaty, and any national - implementations thereof. - -2. Waiver. To the greatest extent permitted by, but not in contravention -of, applicable law, Affirmer hereby overtly, fully, permanently, -irrevocably and unconditionally waives, abandons, and surrenders all of -Affirmer's Copyright and Related Rights and associated claims and causes -of action, whether now known or unknown (including existing as well as -future claims and causes of action), in the Work (i) in all territories -worldwide, (ii) for the maximum duration provided by applicable law or -treaty (including future time extensions), (iii) in any current or future -medium and for any number of copies, and (iv) for any purpose whatsoever, -including without limitation commercial, advertising or promotional -purposes (the "Waiver"). Affirmer makes the Waiver for the benefit of each -member of the public at large and to the detriment of Affirmer's heirs and -successors, fully intending that such Waiver shall not be subject to -revocation, rescission, cancellation, termination, or any other legal or -equitable action to disrupt the quiet enjoyment of the Work by the public -as contemplated by Affirmer's express Statement of Purpose. - -3. Public License Fallback. Should any part of the Waiver for any reason -be judged legally invalid or ineffective under applicable law, then the -Waiver shall be preserved to the maximum extent permitted taking into -account Affirmer's express Statement of Purpose. In addition, to the -extent the Waiver is so judged Affirmer hereby grants to each affected -person a royalty-free, non transferable, non sublicensable, non exclusive, -irrevocable and unconditional license to exercise Affirmer's Copyright and -Related Rights in the Work (i) in all territories worldwide, (ii) for the -maximum duration provided by applicable law or treaty (including future -time extensions), (iii) in any current or future medium and for any number -of copies, and (iv) for any purpose whatsoever, including without -limitation commercial, advertising or promotional purposes (the -"License"). The License shall be deemed effective as of the date CC0 was -applied by Affirmer to the Work. Should any part of the License for any -reason be judged legally invalid or ineffective under applicable law, such -partial invalidity or ineffectiveness shall not invalidate the remainder -of the License, and in such case Affirmer hereby affirms that he or she -will not (i) exercise any of his or her remaining Copyright and Related -Rights in the Work or (ii) assert any associated claims and causes of -action with respect to the Work, in either case contrary to Affirmer's -express Statement of Purpose. - -4. Limitations and Disclaimers. - - a. No trademark or patent rights held by Affirmer are waived, abandoned, - surrendered, licensed or otherwise affected by this document. - b. Affirmer offers the Work as-is and makes no representations or - warranties of any kind concerning the Work, express, implied, - statutory or otherwise, including without limitation warranties of - title, merchantability, fitness for a particular purpose, non - infringement, or the absence of latent or other defects, accuracy, or - the present or absence of errors, whether or not discoverable, all to - the greatest extent permissible under applicable law. - c. Affirmer disclaims responsibility for clearing rights of other persons - that may apply to the Work or any use thereof, including without - limitation any person's Copyright and Related Rights in the Work. - Further, Affirmer disclaims responsibility for obtaining any necessary - consents, permissions or other rights required for any use of the - Work. - d. Affirmer understands and acknowledges that Creative Commons is not a - party to this document and has no duty or obligation with respect to - this CC0 or use of the Work. \ No newline at end of file From e9e653830d770bbf5d4ce25b5661d69d97cd4b5c Mon Sep 17 00:00:00 2001 From: Michael Stepner Date: Fri, 10 Jun 2022 17:04:35 -0400 Subject: [PATCH 05/50] Make calipmatch installable from Github --- README.md | 6 +++++- calipmatch.pkg | 26 ++++++++++++++++++++++++++ stata.toc | 4 ++++ 3 files changed, 35 insertions(+), 1 deletion(-) create mode 100644 calipmatch.pkg create mode 100644 stata.toc diff --git a/README.md b/README.md index ae9139c..7605a62 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,10 @@ ## Installation -Install **calipmatch** in Stata from the SSC repository: `ssc install calipmatch` +This beta version of **calipmatch** can be installed using: + +``` +net install project, from(https://github.com/michaelstepner/calipmatch/raw/develop) +``` ## Stata help file diff --git a/calipmatch.pkg b/calipmatch.pkg new file mode 100644 index 0000000..efa99ac --- /dev/null +++ b/calipmatch.pkg @@ -0,0 +1,26 @@ +d 'CALIPMATCH': module for caliper matching without replacement +d +d calipmatch matches case observations to control observations +d using "calipers", generating a new variable with a unique value +d for each group of matched cases and controls. It performs 1:1 or +d 1:m matching without replacement. Matched observations will have +d values within +/- the caliper width for every caliper matching +d variable. Matched observations will also have identical values +d for every exact matching variable, if any exact matching +d variables are specified. +d +d KW: matching +d KW: caliper +d +d Requires: Stata version 13 +d +d Distribution-Date: 20220610 +d +d Author: Michael Stepner +d Support: email software@michaelstepner.com +d +d Author: Allan Garland +d Support: email +d +f calipmatch.ado +f calipmatch.sthlp diff --git a/stata.toc b/stata.toc new file mode 100644 index 0000000..4c2fd4c --- /dev/null +++ b/stata.toc @@ -0,0 +1,4 @@ +v 3 +d calipmatch development repository +d by Michael Stepner (software@michaelstepner.com) +p calipmatch module for caliper matching without replacement From 007ed9ffcebd5ee7ac97a5d3f284632768d4c30c Mon Sep 17 00:00:00 2001 From: Michael Stepner Date: Fri, 10 Jun 2022 17:09:37 -0400 Subject: [PATCH 06/50] Improve code formatting and comments --- calipmatch.ado | 39 ++++++++++++++++++++++++++------------- 1 file changed, 26 insertions(+), 13 deletions(-) diff --git a/calipmatch.ado b/calipmatch.ado index 79975c6..d8f6bb4 100644 --- a/calipmatch.ado +++ b/calipmatch.ado @@ -1,4 +1,4 @@ -*! version 1.1.0 26oct2019 Michael Stepner and Allan Garland, stepner@mit.edu +*! version 1.1.0 10jun2022 Michael Stepner and Allan Garland, software@michaelstepner.com /* CC0 license information: To the extent possible under law, the author has dedicated all copyright and related and neighboring rights @@ -15,11 +15,9 @@ program define calipmatch, sortpreserve rclass syntax [if] [in], GENerate(name) CASEvar(varname numeric) MAXmatches(integer) CALIPERMatch(varlist numeric) CALIPERWidth(numlist >0) [EXACTmatch(varlist)] * Verify there are same number of caliper vars as caliper widths - local caliper_var_count : word count `calipermatch' - local caliper_width_count : word count `caliperwidth' - if (`caliper_var_count'!=`caliper_width_count') { + if (`: word count `calipermatch'' != `: word count `caliperwidth'') { di as error "must specify the same number of caliper widths as caliper matching variables." - if (`caliper_var_count'<`caliper_width_count') exit 123 + if (`: word count `calipermatch'' < `: word count `caliperwidth'') exit 123 else exit 122 } @@ -82,18 +80,18 @@ program define calipmatch, sortpreserve rclass exit 2001 } - * Find group boundaries + * Find group boundaries within exact-match groups mata: boundaries=find_group_boundaries("`exactmatch'", "`casevar'", `=_N-`insample_total'+1', `=_N') - * Perform matching within each group - qui gen long `generate'=. + * Perform caliper matching within each exact-match group + qui gen long `generate' = . tempname case_matches if r(no_matches)==0 { mata: _calipmatch(boundaries,"`generate'",`maxmatches',"`calipermatch'","`caliperwidth'") qui compress `generate' - matrix `case_matches'=r(matchsuccess) + matrix `case_matches' = r(matchsuccess) matrix `case_matches' = (`cases_total' - `case_matches''* J(rowsof(`case_matches'),1,1)) \ `case_matches' } else { @@ -148,6 +146,7 @@ void _calipmatch(real matrix boundaries, string scalar genvar, real scalar maxma real scalar curmatch curmatch = 0 + real scalar highestmatch highestmatch = 0 @@ -176,6 +175,7 @@ void _calipmatch(real matrix boundaries, string scalar genvar, real scalar maxma caseindex = caseobs - boundaries[brow,3] + 1 + // Set the value of the match group if (matchattempt==1) { highestmatch++ curmatch = highestmatch @@ -227,6 +227,21 @@ void _calipmatch(real matrix boundaries, string scalar genvar, real scalar maxma } real matrix find_group_boundaries(string scalar grpvars, string scalar casevar, real scalar startobs, real scalar endobs) { + // Inputs: + // Dataset sorted by the variables specified by "grpvars casevar" within the rows [startobs, endobs] + // + // - grpvars: one or more variables for which each distinct set of values constitutes a group + // - casevar: a variable which takes values {0,1} + // - startobs: the first observation to process + // - endobs: the last observation to process + // + // Outputs: + // return a matrix with dimensions G x 4, where G is the number of distinct groups containing both cases and controls. + // Col 1 = the first obs in a group with casevar==0 + // Col 2 = the last obs in a group with casevar==0 + // Col 3 = the first obs in a group with casevar==1 + // Col 4 = the last obs in a group with casevar==1 + real matrix boundaries boundaries = (startobs, ., ., .) @@ -252,9 +267,9 @@ real matrix find_group_boundaries(string scalar grpvars, string scalar casevar, nextcol=2 currow=currow+1 } - else { // only one value of casevar in prev group --> skip group + else { // only one value of casevar (all controls or all cases) in prev group --> skip group boundaries[currow,1]=obs - } + } } else if (_st_data(obs, casevarnum)!=_st_data(obs-1, casevarnum)) { boundaries[currow,2]=obs-1 @@ -270,10 +285,8 @@ real matrix find_group_boundaries(string scalar grpvars, string scalar casevar, return (boundaries) } else { - if (currow>1) return (boundaries[1..rows(boundaries)-1, .]) else st_numscalar("r(no_matches)",1) - } } From 2d5077aa6ccb0ba3edb6502857bcf42ced0aa7ab Mon Sep 17 00:00:00 2001 From: Michael Stepner Date: Fri, 10 Jun 2022 17:09:58 -0400 Subject: [PATCH 07/50] Document improved algorithm in -help calipmatch- --- README.md | 41 +++++++++++++++++++++-------------------- calipmatch.sthlp | 42 +++++++++++++++++++++--------------------- 2 files changed, 42 insertions(+), 41 deletions(-) diff --git a/README.md b/README.md index 7605a62..71e80ba 100644 --- a/README.md +++ b/README.md @@ -48,7 +48,7 @@ The help file looks best when viewed in Stata using `help calipmatch`.

calipmatch matches case observations to control observations using "calipers", generating a new variable with a unique value for each group - of matched cases and controls. It performs 1:1 or 1:m matching without + of matched cases and controls. It performs 1:1 or 1:m matching without replacement.

Matched observations must have values within +/- the caliper width for @@ -56,12 +56,18 @@ The help file looks best when viewed in Stata using `help calipmatch`. identical values for every exact matching variable, if any exact matching variables are specified.

- Controls are randomly matched to cases without replacement. For each - case, calipmatch searches for matching controls until it either finds the - pre-specified maximum number of matches or runs out of controls. The - search is performed greedily: it is possible that some cases end up - unmatched because all possible matching controls have already been - matched with another case. + Controls are matched to cases without replacement, using an efficient + (greedy) algorithm that approximately maximizes the number of successful + matches, while minimizing the sum of squared differences in the caliper + matching variables when multiple valid matches exist. +

+ The cases are processed in random order. For each case, calipmatch + searches for matching controls. If any valid matches exist, it selects + the matching control which minimizes the sum of squared differences + across caliper matching variables. If maxmatches(#)>1, then after + completing the search for a first matching control observation for each + case, the algorithm will search for a second matching control observation + for each case, etc.

Options @@ -82,16 +88,12 @@ The help file looks best when viewed in Stata using `help calipmatch`. a missing value are excluded from matching.

maxmatches(#) sets the maximum number of controls to be matched with each - case. Setting maxmatches(1) performs a 1:1 matching: calipmatch + case. Setting maxmatches(1) performs a 1:1 match where calipmatch searches for one matching control observation for each case - observation. -

- By setting maxmatches(#) greater than 1, calipmatch will proceed in - random order through the cases and search for matching control - observations until it either finds the maximum number of matches or - runs out of controls. The search is performed greedily: it is - possible that some cases end up unmatched because all possible - matching controls have already been matched with another case. + observation. By setting maxmatches(#) greater than 1 calipmatch will + try to assign a first valid matching control observation for every + case observation, then search for a second matching control + observation, and onward.

calipermatch(varlist) is a list of one or more numeric variables to use for caliper matching. Matched observations must have values within @@ -130,11 +132,10 @@ The help file looks best when viewed in Stata using `help calipmatch`.

Authors

- Michael Stepner - Massachusetts Institute of Technology - stepner@mit.edu + Michael Stepner + software@michaelstepner.com

- Allan Garland, M.D. M.A. + Allan Garland University of Manitoba Faculty of Medicine agarland@hsc.mb.ca

diff --git a/calipmatch.sthlp b/calipmatch.sthlp index d0bab84..ea88bcd 100644 --- a/calipmatch.sthlp +++ b/calipmatch.sthlp @@ -1,5 +1,5 @@ {smcl} -{* *! version 1.0.0 9may2017}{...} +{* *! version 1.1.0 10jun2022}{...} {viewerjumpto "Syntax" "calipmatch##syntax"}{...} {viewerjumpto "Description" "calipmatch##description"}{...} {viewerjumpto "Options" "calipmatch##options"}{...} @@ -52,8 +52,8 @@ matching{p_end} {pstd} {cmd:calipmatch} matches case observations to control observations using "calipers", -generating a new variable with a unique value for each group of matched cases and controls. -It performs 1:1 or 1:m matching without replacement. +generating a new variable with a unique value for each group of matched cases and controls. It +performs 1:1 or 1:m matching without replacement. {pstd} Matched observations must have values within +/- the caliper @@ -61,11 +61,16 @@ width for every caliper matching variable. Matched observations must also have i for every exact matching variable, if any exact matching variables are specified. {pstd} -Controls are randomly matched to cases without replacement. For each case, {cmd:calipmatch} -searches for matching controls until it either finds the pre-specified maximum number of -matches or runs out of controls. The search is performed greedily: it is possible that -some cases end up unmatched because all possible matching controls have already been matched with -another case. +Controls are matched to cases without replacement, using an efficient (greedy) algorithm that approximately maximizes +the number of successful matches, while minimizing the sum of squared differences in the caliper matching +variables when multiple valid matches exist. + +{pstd} +The cases are processed in random order. For each case, {cmd:calipmatch} searches for matching controls. If +any valid matches exist, it selects the matching control which minimizes the sum of squared differences across +caliper matching variables. If {opt maxmatches(#)}>1, then after completing the search for a first matching +control observation for each case, the algorithm will search for a second matching control observation for +each case, etc. {marker options}{...} @@ -85,15 +90,11 @@ each observation is a case (=1) or a control (=0). Observations with a missing value are excluded from matching. {phang}{opt max:matches(#)} sets the maximum number of controls to be matched -with each case. Setting {opt maxmatches(1)} performs a 1:1 matching: {cmd:calipmatch} -searches for one matching control observation for each case observation. - -{pmore}By setting {opt maxmatches(#)} greater than 1, {cmd:calipmatch} will proceed in -random order through -the cases and search for matching control observations until it either finds the -maximum number of matches or runs out of controls. The search is performed greedily: it -is possible that some cases end up unmatched because all possible matching -controls have already been matched with another case. +with each case. Setting {opt maxmatches(1)} performs a 1:1 match where {cmd:calipmatch} +searches for one matching control observation for each case observation. By setting +{opt maxmatches(#)} greater than 1 {cmd:calipmatch} will try to assign +a first valid matching control observation for every case observation, then search +for a second matching control observation, and onward. {phang}{opth caliperm:atch(varlist)} is a list of one or more numeric variables to use for caliper matching. Matched observations must have values within +/- the caliper @@ -135,11 +136,10 @@ all values are stored as precise integers. {marker author}{...} {title:Authors} -{pstd}Michael Stepner{p_end} -{pstd}Massachusetts Institute of Technology{p_end} -{pstd}stepner@mit.edu{p_end} +{pstd}{bf:{browse "https://michaelstepner.com":Michael Stepner}}{p_end} +{pstd}software@michaelstepner.com{p_end} -{pstd}Allan Garland, M.D. M.A.{p_end} +{pstd}{bf:Allan Garland}{p_end} {pstd}University of Manitoba Faculty of Medicine{p_end} {pstd}agarland@hsc.mb.ca{p_end} From 3bd34bec6856be59f8a9e4d61abe6b27d723194b Mon Sep 17 00:00:00 2001 From: Michael Stepner Date: Fri, 10 Jun 2022 17:19:53 -0400 Subject: [PATCH 08/50] Fix typo in README code --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 71e80ba..2845946 100644 --- a/README.md +++ b/README.md @@ -3,7 +3,7 @@ This beta version of **calipmatch** can be installed using: ``` -net install project, from(https://github.com/michaelstepner/calipmatch/raw/develop) +net install calipmatch, from(https://github.com/michaelstepner/calipmatch/raw/develop) ``` ## Stata help file From d2460f62b43f9ad4fedb34a49645facd85924f97 Mon Sep 17 00:00:00 2001 From: Michael Stepner Date: Fri, 10 Jun 2022 22:15:37 -0400 Subject: [PATCH 09/50] Add documentation comments to Mata functions --- calipmatch.ado | 21 ++++++++++++++++++++- 1 file changed, 20 insertions(+), 1 deletion(-) diff --git a/calipmatch.ado b/calipmatch.ado index d8f6bb4..90e4260 100644 --- a/calipmatch.ado +++ b/calipmatch.ado @@ -134,6 +134,22 @@ set matastrict on mata: void _calipmatch(real matrix boundaries, string scalar genvar, real scalar maxmatch, string scalar calipvars, string scalar calipwidth) { + // Objective: + // Perform caliper matching using the specified caliper variables and caliper widths, matching each case observation to one or + // many controls. Identify the matches within pre-specified groups, and store a variable containing integers that define a group + // of matched cases and controls. + // + // Inputs: + // Dataset with the same sort order as it had when `find_group_boundaries`' was run. + // - boundaries: G x 4 matrix output by find_group_boundaries() + // - genvar: variable containing all missing values, which will be populated with matching groups + // - maxmatch: a positive integer indicating the maximum number of control obs to match to each case obs + // - calipvars: a list of numeric variables for caliper matching + // - calipwidth: a list of caliper widths, specifying the maximum distance between case and control variables in each calipvar + // + // Outputs: + // The values of "genvar" are filled with integers that describe each group of matched cases and controls. + // - r(matchsuccess) is a Stata return matrix tabulating the number of cases successfully matched to {1, ..., maxmatch} controls real scalar matchgrp matchgrp = st_varindex(genvar) @@ -227,6 +243,9 @@ void _calipmatch(real matrix boundaries, string scalar genvar, real scalar maxma } real matrix find_group_boundaries(string scalar grpvars, string scalar casevar, real scalar startobs, real scalar endobs) { + // Objective: + // For each set of distinct values of "grpvars", identify the starting and ending observation for cases and controls. + // // Inputs: // Dataset sorted by the variables specified by "grpvars casevar" within the rows [startobs, endobs] // @@ -236,7 +255,7 @@ real matrix find_group_boundaries(string scalar grpvars, string scalar casevar, // - endobs: the last observation to process // // Outputs: - // return a matrix with dimensions G x 4, where G is the number of distinct groups containing both cases and controls. + // Return a matrix with dimensions G x 4, where G is the number of distinct groups containing both cases and controls. // Col 1 = the first obs in a group with casevar==0 // Col 2 = the last obs in a group with casevar==0 // Col 3 = the first obs in a group with casevar==1 From e4218523c0e081e519c364203e22ab1057188ebd Mon Sep 17 00:00:00 2001 From: jethaaly Date: Sat, 8 Oct 2022 20:29:48 -0400 Subject: [PATCH 10/50] Add unit tests for incorrectly specified maximum matches and caliper variables --- test_calipmatch.do | 22 ++++++++++++++++++++-- 1 file changed, 20 insertions(+), 2 deletions(-) diff --git a/test_calipmatch.do b/test_calipmatch.do index 52ebd2e..62d5c82 100644 --- a/test_calipmatch.do +++ b/test_calipmatch.do @@ -105,7 +105,25 @@ keep case income_percentile * if statement that matches no observations rcof `"test_calipmatch if income_percentile>100, gen(matchgroup) case(case) maxmatches(1) calipermatch(income_percentile) caliperwidth(5)"' /// == 2000 - + +***NEW TEST * maximum matches is positive, but not an integer +rcof `"test_calipmatch, gen(matchgroup) case(case) maxmatches(.3) calipermatch(income_percentile) caliperwidth(5)"' /// + == 198 + +***NEW TEST * caliper variable is ambiguous +gen byte income_percentile2=ceil(rnormal() * 100) +rcof `"test_calipmatch, gen(matchgroup) case(case) maxmatches(1) calipermatch(income_perc) caliperwidth(5)"' /// + == 111 +drop income_percentile2 + +***NEW TEST * caliper variable is does not exist +rcof `"test_calipmatch, gen(matchgroup) case(case) maxmatches(1) calipermatch(nonsense) caliperwidth(5)"' /// + == 111 + +***NEW TEST * caliper width is negative +rcof `"test_calipmatch, gen(matchgroup) case(case) maxmatches(1) calipermatch(income_percentile) caliperwidth(-5)"' /// + == 125 + * no controls replace case=1 rcof `"test_calipmatch, gen(matchgroup) case(case) maxmatches(1) calipermatch(income_percentile) caliperwidth(5)"' /// @@ -131,7 +149,7 @@ rcof `"test_calipmatch, gen(matchgroup) case(case) maxmatches(1) calipermatch(in * case/control variable not always 0 or 1, but not in sample test_calipmatch in 2/200, gen(matchgroup) case(case) maxmatches(1) calipermatch(income_percentile) caliperwidth(5) keep case income_percentile - + *** One caliper matching variable and one exact matching variable gen byte sex=round(runiform()) From 93c536888d22ea2012b9bad1419617fb9de9aa73 Mon Sep 17 00:00:00 2001 From: jethaaly Date: Sat, 8 Oct 2022 22:18:19 -0400 Subject: [PATCH 11/50] Added Performance Tests for Perfect Matching situation --- test_calipmatch.do | 71 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 71 insertions(+) diff --git a/test_calipmatch.do b/test_calipmatch.do index 62d5c82..90ebdb2 100644 --- a/test_calipmatch.do +++ b/test_calipmatch.do @@ -228,3 +228,74 @@ rcof `"test_calipmatch, gen(matchgroup) case(case) maxmatches(5) exactmatch(sex rcof `"test_calipmatch, gen(matchgroup) case(case) maxmatches(5) exactmatch(sex self_emp prov) calipermatch(age income_percentile) caliperwidth(3 5 5)"' /// == 123 +*** NEW TESTS *** Performance Test - perfect match in the controls +* One caliper, perfect match +clear +set seed 4585239 + +set obs 200 +gen byte case=(_n<=20) +gen byte income_percentile_ex =max(21,ceil(runiform() * 100)) + +forvalues m=1/20 { + qui replace income_percentile_ex = `m' in `m' + local t = `m'+20 + qui replace income_percentile_ex = `m' in `t' +} + +test_calipmatch, gen(matchgroup) case(case) maxmatches(1) calipermatch(income_percentile_ex) caliperwidth(5) +forvalues m=1/20 { + assert matchgroup[`m'] == matchgroup[`m'+20] +} +keep case income_percentile_ex + +* One caliper and one exact matching variable + +gen byte sex_ex=round(runiform()) +replace case=(_n<=20) +forvalues m=1/20 { + qui replace sex_ex = mod(`m',2) in `m' + local t = `m'+20 + qui replace sex_ex = mod(`t',2) in `t' +} + +test_calipmatch, gen(matchgroup) case(case) maxmatches(1) /// + calipermatch(income_percentile_ex) caliperwidth(5) exactmatch(sex_ex) +forvalues m=1/20 { + assert matchgroup[`m'] == matchgroup[`m'+20] +} + + +* Many caliper and many exact matching variables, m:1 match + +clear +set obs 50000 + +gen byte case=(_n<=1000) +gen byte sex_ex=round(runiform()) +gen byte age_ex = 44 + ceil(runiform()*17) +gen byte self_emp_ex = (runiform()<0.1) +gen byte prov_ex = ceil(runiform()*9) +gen byte cal_val_ex=max(10001, ceil(runiform() * 100)) + +forvalues m=1/1000 { + forvalues t=0/4 { + local s = `m' + `t'*1000 + qui replace sex_ex = mod(`m',2) in `s' + qui replace age_ex = mod(`m',30)+44 in `s' + qui replace self_emp_ex = mod(`m',2) in `s' + qui replace prov_ex = mod(`m',2) in `s' + qui replace cal_val_ex = mod(`m',1000) in `s' + } +} + +test_calipmatch, gen(matchgroup) case(case) maxmatches(4) /// + exactmatch(sex_ex self_emp_ex prov_ex) calipermatch(age_ex cal_val_ex) caliperwidth(3 5) + + +forvalues m=1/1000 { + forvalues t=1/4 { + local s = `m' + `t'*1000 + assert matchgroup[`m'] == matchgroup[`s'] + } +} \ No newline at end of file From fe89574a8bee14eaf2a1da8075f055fb5740c8ae Mon Sep 17 00:00:00 2001 From: jethaaly Date: Sat, 8 Oct 2022 22:34:45 -0400 Subject: [PATCH 12/50] Add test for exact match is ambiguous --- test_calipmatch.do | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/test_calipmatch.do b/test_calipmatch.do index 90ebdb2..7c6fec5 100644 --- a/test_calipmatch.do +++ b/test_calipmatch.do @@ -160,6 +160,12 @@ test_calipmatch, gen(matchgroup) case(case) maxmatches(1) /// calipermatch(income_percentile) caliperwidth(5) exactmatch(sex) keep case income_percentile sex +***NEW TEST * exact variable is ambiguous +gen byte sex2=round(runiform()) +rcof `"test_calipmatch, gen(matchgroup) case(case) maxmatches(1) calipermatch(income_percentile) caliperwidth(5) exactmatch(se)"' /// + == 111 +drop sex2 + * no controls among one matching group replace case=1 if sex==1 test_calipmatch, gen(matchgroup) case(case) maxmatches(1) /// From ca4755f7c2cc196aff8cd4e0c128b12045c51dcd Mon Sep 17 00:00:00 2001 From: jethaaly Date: Sat, 8 Oct 2022 22:38:07 -0400 Subject: [PATCH 13/50] Add test for exact variable does not exist and fix typo in caliper variable does not exist --- test_calipmatch.do | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/test_calipmatch.do b/test_calipmatch.do index 7c6fec5..1b3d84e 100644 --- a/test_calipmatch.do +++ b/test_calipmatch.do @@ -116,7 +116,7 @@ rcof `"test_calipmatch, gen(matchgroup) case(case) maxmatches(1) calipermatch(in == 111 drop income_percentile2 -***NEW TEST * caliper variable is does not exist +***NEW TEST * caliper variable does not exist rcof `"test_calipmatch, gen(matchgroup) case(case) maxmatches(1) calipermatch(nonsense) caliperwidth(5)"' /// == 111 @@ -166,6 +166,16 @@ rcof `"test_calipmatch, gen(matchgroup) case(case) maxmatches(1) calipermatch(in == 111 drop sex2 +***NEW TEST * exact variable is ambiguous +gen byte sex2=round(runiform()) +rcof `"test_calipmatch, gen(matchgroup) case(case) maxmatches(1) calipermatch(income_percentile) caliperwidth(5) exactmatch(se)"' /// + == 111 +drop sex2 + +***NEW TEST * exact variable does not exist +rcof `"test_calipmatch, gen(matchgroup) case(case) maxmatches(1) calipermatch(income_percentile) caliperwidth(5) exactmatch(nonsense)"' /// + == 111 + * no controls among one matching group replace case=1 if sex==1 test_calipmatch, gen(matchgroup) case(case) maxmatches(1) /// From 13dfe939a0d2d217f4f504846a1fe8d7634090a0 Mon Sep 17 00:00:00 2001 From: jethaaly Date: Sat, 8 Oct 2022 22:47:01 -0400 Subject: [PATCH 14/50] Removed duplicate of exact variable is ambiguous test --- test_calipmatch.do | 6 ------ 1 file changed, 6 deletions(-) diff --git a/test_calipmatch.do b/test_calipmatch.do index 1b3d84e..4d75acb 100644 --- a/test_calipmatch.do +++ b/test_calipmatch.do @@ -166,12 +166,6 @@ rcof `"test_calipmatch, gen(matchgroup) case(case) maxmatches(1) calipermatch(in == 111 drop sex2 -***NEW TEST * exact variable is ambiguous -gen byte sex2=round(runiform()) -rcof `"test_calipmatch, gen(matchgroup) case(case) maxmatches(1) calipermatch(income_percentile) caliperwidth(5) exactmatch(se)"' /// - == 111 -drop sex2 - ***NEW TEST * exact variable does not exist rcof `"test_calipmatch, gen(matchgroup) case(case) maxmatches(1) calipermatch(income_percentile) caliperwidth(5) exactmatch(nonsense)"' /// == 111 From a66abc66c94e2bcd2439c9114d9de044a433cb32 Mon Sep 17 00:00:00 2001 From: jethaaly Date: Tue, 11 Oct 2022 02:27:28 -0400 Subject: [PATCH 15/50] Add condition to maxmatch syntax that maxmatch be greater than 0, and add corresponding test --- calipmatch.ado | 2 +- test_calipmatch.do | 7 +++++++ 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/calipmatch.ado b/calipmatch.ado index 90e4260..8f7ddef 100644 --- a/calipmatch.ado +++ b/calipmatch.ado @@ -12,7 +12,7 @@ human-readable summary can be accessed at http://creativecommons.org/publicdomai program define calipmatch, sortpreserve rclass version 13.0 - syntax [if] [in], GENerate(name) CASEvar(varname numeric) MAXmatches(integer) CALIPERMatch(varlist numeric) CALIPERWidth(numlist >0) [EXACTmatch(varlist)] + syntax [if] [in], GENerate(name) CASEvar(varname numeric) MAXmatches(numlist >0 integer) CALIPERMatch(varlist numeric) CALIPERWidth(numlist >0) [EXACTmatch(varlist)] * Verify there are same number of caliper vars as caliper widths if (`: word count `calipermatch'' != `: word count `caliperwidth'') { diff --git a/test_calipmatch.do b/test_calipmatch.do index 4d75acb..1afe8f2 100644 --- a/test_calipmatch.do +++ b/test_calipmatch.do @@ -110,6 +110,13 @@ rcof `"test_calipmatch if income_percentile>100, gen(matchgroup) case(case) maxm rcof `"test_calipmatch, gen(matchgroup) case(case) maxmatches(.3) calipermatch(income_percentile) caliperwidth(5)"' /// == 198 +/* +*Uncomment this test if the change suggested in Update 2 is implemented. Otherwise it will fail +***NEW TEST * maximum matches is a negative integer +rcof `"test_calipmatch, gen(matchgroup) case(case) maxmatches(.3) calipermatch(income_percentile) caliperwidth(5)"' /// + == 125 +*/ + ***NEW TEST * caliper variable is ambiguous gen byte income_percentile2=ceil(rnormal() * 100) rcof `"test_calipmatch, gen(matchgroup) case(case) maxmatches(1) calipermatch(income_perc) caliperwidth(5)"' /// From a2068613dae0be39f04220c9f9c4859cf0f18d4a Mon Sep 17 00:00:00 2001 From: jethaaly Date: Tue, 11 Oct 2022 02:44:45 -0400 Subject: [PATCH 16/50] Edit note for maximimum match is negative integer test --- test_calipmatch.do | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test_calipmatch.do b/test_calipmatch.do index 1afe8f2..b3fea25 100644 --- a/test_calipmatch.do +++ b/test_calipmatch.do @@ -111,7 +111,7 @@ rcof `"test_calipmatch, gen(matchgroup) case(case) maxmatches(.3) calipermatch(i == 198 /* -*Uncomment this test if the change suggested in Update 2 is implemented. Otherwise it will fail +*Uncomment this test if the change suggested in Update 2 is implemented. ***NEW TEST * maximum matches is a negative integer rcof `"test_calipmatch, gen(matchgroup) case(case) maxmatches(.3) calipermatch(income_percentile) caliperwidth(5)"' /// == 125 From 966e3dfce7b237a70de165dc3c3d907858c6564e Mon Sep 17 00:00:00 2001 From: Michael Stepner Date: Tue, 11 Oct 2022 12:18:28 -0400 Subject: [PATCH 17/50] Install Stata in GHA Runner --- .github/workflows/stata_tests.yml | 25 +++++++++++++++++++++++-- 1 file changed, 23 insertions(+), 2 deletions(-) diff --git a/.github/workflows/stata_tests.yml b/.github/workflows/stata_tests.yml index 8607ddf..bc009e7 100644 --- a/.github/workflows/stata_tests.yml +++ b/.github/workflows/stata_tests.yml @@ -9,5 +9,26 @@ jobs: timeout-minutes: 30 # change max time from default 6hr steps: - - name: Hello World - run: echo "Hello World" + - name: Install Stata + run: | + # Install Stata + mkdir -p /tmp/statafiles + curl -u oi:${OI_HTTPS_PW} https://d2bx6aas1fcmzl.cloudfront.net/stata16/Stata16Linux64.tar.gz --output /tmp/statafiles/Stata16Linux64.tar.gz + cd /tmp/statafiles + tar -zxf ./Stata16Linux64.tar.gz + mkdir -p /usr/local/stata16 + cd /usr/local/stata16 + + # The following command returns 1 even though it's ok + set +e + yes | /tmp/statafiles/install + set -e + + cd /usr/local/bin + ln -s /usr/local/stata16/stata-mp . + ln -s /usr/local/stata16/xstata-mp . + curl -u oi:${OI_HTTPS_PW} https://d2bx6aas1fcmzl.cloudfront.net/stata16/stata.lic --output /usr/local/stata16/stata.lic + rm -r /tmp/statafiles + cd /tmp + env: + OI_HTTPS_PW: ${{ secrets.OI_HTTPS_PW }} From 6535706d97486fa497a31fdaf996fd796ce8a757 Mon Sep 17 00:00:00 2001 From: Michael Stepner Date: Tue, 11 Oct 2022 12:22:03 -0400 Subject: [PATCH 18/50] Add sudo to mkdir MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit See if this resolves: mkdir: cannot create directory ‘/usr/local/stata16’: Permission denied https://github.com/michaelstepner/calipmatch/actions/runs/3228506690/jobs/5284691399 --- .github/workflows/stata_tests.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/stata_tests.yml b/.github/workflows/stata_tests.yml index bc009e7..371ac1f 100644 --- a/.github/workflows/stata_tests.yml +++ b/.github/workflows/stata_tests.yml @@ -16,7 +16,7 @@ jobs: curl -u oi:${OI_HTTPS_PW} https://d2bx6aas1fcmzl.cloudfront.net/stata16/Stata16Linux64.tar.gz --output /tmp/statafiles/Stata16Linux64.tar.gz cd /tmp/statafiles tar -zxf ./Stata16Linux64.tar.gz - mkdir -p /usr/local/stata16 + sudo mkdir -p /usr/local/stata16 cd /usr/local/stata16 # The following command returns 1 even though it's ok From 2769e6c54082e2ecad278ac2ceb77ab0ab339a79 Mon Sep 17 00:00:00 2001 From: Michael Stepner Date: Tue, 11 Oct 2022 12:24:14 -0400 Subject: [PATCH 19/50] Add more sudo to install Stata --- .github/workflows/stata_tests.yml | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/.github/workflows/stata_tests.yml b/.github/workflows/stata_tests.yml index 371ac1f..132026c 100644 --- a/.github/workflows/stata_tests.yml +++ b/.github/workflows/stata_tests.yml @@ -21,14 +21,17 @@ jobs: # The following command returns 1 even though it's ok set +e - yes | /tmp/statafiles/install + sudo yes | /tmp/statafiles/install set -e cd /usr/local/bin - ln -s /usr/local/stata16/stata-mp . - ln -s /usr/local/stata16/xstata-mp . - curl -u oi:${OI_HTTPS_PW} https://d2bx6aas1fcmzl.cloudfront.net/stata16/stata.lic --output /usr/local/stata16/stata.lic + sudo ln -s /usr/local/stata16/stata-mp . + sudo ln -s /usr/local/stata16/xstata-mp . + sudo curl -u oi:${OI_HTTPS_PW} https://d2bx6aas1fcmzl.cloudfront.net/stata16/stata.lic --output /usr/local/stata16/stata.lic rm -r /tmp/statafiles cd /tmp env: OI_HTTPS_PW: ${{ secrets.OI_HTTPS_PW }} + + - name: Run Stata + run: stata-mp From 2b4bc0df72ad3530b3747342a5ee9c9b87217b0b Mon Sep 17 00:00:00 2001 From: Michael Stepner Date: Tue, 11 Oct 2022 12:26:43 -0400 Subject: [PATCH 20/50] Fix sudo with pipe --- .github/workflows/stata_tests.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/stata_tests.yml b/.github/workflows/stata_tests.yml index 132026c..94c2d9b 100644 --- a/.github/workflows/stata_tests.yml +++ b/.github/workflows/stata_tests.yml @@ -21,7 +21,7 @@ jobs: # The following command returns 1 even though it's ok set +e - sudo yes | /tmp/statafiles/install + sudo sh -c 'yes | /tmp/statafiles/install' set -e cd /usr/local/bin From fa806ef8d0c9044c039057ca13f353eb5c4e4993 Mon Sep 17 00:00:00 2001 From: Michael Stepner Date: Tue, 11 Oct 2022 12:31:11 -0400 Subject: [PATCH 21/50] Run calipmatch Stata tests in GHA --- .github/workflows/stata_tests.yml | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/.github/workflows/stata_tests.yml b/.github/workflows/stata_tests.yml index 94c2d9b..699c0e7 100644 --- a/.github/workflows/stata_tests.yml +++ b/.github/workflows/stata_tests.yml @@ -33,5 +33,8 @@ jobs: env: OI_HTTPS_PW: ${{ secrets.OI_HTTPS_PW }} - - name: Run Stata - run: stata-mp + - name: Check out code repository + uses: actions/checkout@v2 + + - name: Run tests + run: stata-mp -b do test_calipmatch.do From 75df89e6bd095fdec72094538be0b08dec56ce2b Mon Sep 17 00:00:00 2001 From: Michael Stepner Date: Tue, 11 Oct 2022 12:40:34 -0400 Subject: [PATCH 22/50] Run tests on PR commit; print output; catch errors --- .github/workflows/stata_tests.yml | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/.github/workflows/stata_tests.yml b/.github/workflows/stata_tests.yml index 699c0e7..6fce63b 100644 --- a/.github/workflows/stata_tests.yml +++ b/.github/workflows/stata_tests.yml @@ -1,6 +1,8 @@ name: stata_tests on: + pull_request: + types: [opened, synchronize, reopened] workflow_dispatch: jobs: @@ -37,4 +39,10 @@ jobs: uses: actions/checkout@v2 - name: Run tests - run: stata-mp -b do test_calipmatch.do + run: | + stata-mp -b do test_calipmatch.do + cat test_calipmatch.log + if egrep --before-context=1 --max-count=1 "^r\([0-9]+\);$" test_calipmatch.log + then + exit 1 + fi From 28c0b6632b6a08b538c305812a4e20e9eb01c019 Mon Sep 17 00:00:00 2001 From: Michael Stepner Date: Tue, 11 Oct 2022 12:43:47 -0400 Subject: [PATCH 23/50] Deliberate Stata error to see if GHA detects it --- test_calipmatch.do | 1 + 1 file changed, 1 insertion(+) diff --git a/test_calipmatch.do b/test_calipmatch.do index 52ebd2e..7e1c7f0 100644 --- a/test_calipmatch.do +++ b/test_calipmatch.do @@ -210,3 +210,4 @@ rcof `"test_calipmatch, gen(matchgroup) case(case) maxmatches(5) exactmatch(sex rcof `"test_calipmatch, gen(matchgroup) case(case) maxmatches(5) exactmatch(sex self_emp prov) calipermatch(age income_percentile) caliperwidth(3 5 5)"' /// == 123 +error 1 From 332aeb56dee5c02a6e0dd2b17ca7671390b37fa6 Mon Sep 17 00:00:00 2001 From: Michael Stepner Date: Tue, 11 Oct 2022 12:45:41 -0400 Subject: [PATCH 24/50] Revert "Deliberate Stata error to see if GHA detects it" This reverts commit 28c0b6632b6a08b538c305812a4e20e9eb01c019. --- test_calipmatch.do | 1 - 1 file changed, 1 deletion(-) diff --git a/test_calipmatch.do b/test_calipmatch.do index 7e1c7f0..52ebd2e 100644 --- a/test_calipmatch.do +++ b/test_calipmatch.do @@ -210,4 +210,3 @@ rcof `"test_calipmatch, gen(matchgroup) case(case) maxmatches(5) exactmatch(sex rcof `"test_calipmatch, gen(matchgroup) case(case) maxmatches(5) exactmatch(sex self_emp prov) calipermatch(age income_percentile) caliperwidth(3 5 5)"' /// == 123 -error 1 From ad791f2c714aee04989e75684628097f8dec1eb3 Mon Sep 17 00:00:00 2001 From: Michael Stepner Date: Tue, 11 Oct 2022 14:24:40 -0400 Subject: [PATCH 25/50] Indicate when all tests are successful --- test_calipmatch.do | 3 +++ 1 file changed, 3 insertions(+) diff --git a/test_calipmatch.do b/test_calipmatch.do index 52ebd2e..83a286a 100644 --- a/test_calipmatch.do +++ b/test_calipmatch.do @@ -210,3 +210,6 @@ rcof `"test_calipmatch, gen(matchgroup) case(case) maxmatches(5) exactmatch(sex rcof `"test_calipmatch, gen(matchgroup) case(case) maxmatches(5) exactmatch(sex self_emp prov) calipermatch(age income_percentile) caliperwidth(3 5 5)"' /// == 123 +*------------------------------------------------------------------------------- + +di "Successfully completed all tests." From de104a808c31913b669b210fe77ba8bfe572af13 Mon Sep 17 00:00:00 2001 From: Michael Stepner Date: Wed, 12 Oct 2022 03:01:17 -0400 Subject: [PATCH 26/50] Set sortseed; provide more informative error msgs --- test_calipmatch.do | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/test_calipmatch.do b/test_calipmatch.do index 83a286a..296be2f 100644 --- a/test_calipmatch.do +++ b/test_calipmatch.do @@ -17,7 +17,11 @@ program define test_calipmatch * Exactly one case per matchgroup egen casecount=sum(`casevar'), by(`generate') - qui assert casecount==1 if !mi(`generate') + cap assert casecount==1 if !mi(`generate') + if (_rc!=0) { + di as error "More than one case per matchgroup" + exit 9 + } * Highest matchgroup value = number of matched cases sum `generate', meanonly @@ -81,7 +85,11 @@ program define test_calipmatch forvalues m=0/`maxmatches' { qui count if matched_controls==`m' - assert r(N)==matches[`=`m'+1',1] + cap assert r(N)==matches[`=`m'+1',1] + if (_rc!=0) { + di as error "Incorrect report for number of matched controls" + exit 9 + } } } @@ -92,6 +100,8 @@ end *** One caliper matching variable clear set seed 4585239 +set sortseed 789045789 + set obs 200 gen byte case=(_n<=20) From 2cb31f9fa7597d2ee9304c1c6585c3991b1724c5 Mon Sep 17 00:00:00 2001 From: jethaaly Date: Wed, 12 Oct 2022 04:17:43 -0400 Subject: [PATCH 27/50] Added syntax requirement to max match in ado, corrected performance test and organized tests in test file --- calipmatch.ado | 2 +- test_calipmatch.do | 204 +++++++++++++++++++-------------------------- 2 files changed, 88 insertions(+), 118 deletions(-) diff --git a/calipmatch.ado b/calipmatch.ado index 8f7ddef..76cc373 100644 --- a/calipmatch.ado +++ b/calipmatch.ado @@ -12,7 +12,7 @@ human-readable summary can be accessed at http://creativecommons.org/publicdomai program define calipmatch, sortpreserve rclass version 13.0 - syntax [if] [in], GENerate(name) CASEvar(varname numeric) MAXmatches(numlist >0 integer) CALIPERMatch(varlist numeric) CALIPERWidth(numlist >0) [EXACTmatch(varlist)] + syntax [if] [in], GENerate(name) CASEvar(varname numeric) MAXmatches(numlist integer >0 max=1) CALIPERMatch(varlist numeric) CALIPERWidth(numlist >0) [EXACTmatch(varlist)] * Verify there are same number of caliper vars as caliper widths if (`: word count `calipermatch'' != `: word count `caliperwidth'') { diff --git a/test_calipmatch.do b/test_calipmatch.do index 3abcdb3..29c7ebb 100644 --- a/test_calipmatch.do +++ b/test_calipmatch.do @@ -89,7 +89,10 @@ program define test_calipmatch end +*------------------------------------------------------------------------------- *** One caliper matching variable +*------------------------------------------------------------------------------- + clear set seed 4585239 @@ -102,34 +105,57 @@ test_calipmatch, gen(matchgroup) case(case) maxmatches(1) /// calipermatch(income_percentile) caliperwidth(5) keep case income_percentile -* if statement that matches no observations -rcof `"test_calipmatch if income_percentile>100, gen(matchgroup) case(case) maxmatches(1) calipermatch(income_percentile) caliperwidth(5)"' /// - == 2000 +*------------------------------------------------------------------------------- +* Syntax +*------------------------------------------------------------------------------- -***NEW TEST * maximum matches is positive, but not an integer +* string case variable +drop case +gen case=cond(_n<=20,"case","ctrl") +rcof `"test_calipmatch, gen(matchgroup) case(case) maxmatches(1) calipermatch(income_percentile) caliperwidth(5) "' /// + == 109 +drop case +gen byte case=(_n<=20) + +* maximum matches is positive, but not an integer rcof `"test_calipmatch, gen(matchgroup) case(case) maxmatches(.3) calipermatch(income_percentile) caliperwidth(5)"' /// == 198 -/* -*Uncomment this test if the change suggested in Update 2 is implemented. -***NEW TEST * maximum matches is a negative integer -rcof `"test_calipmatch, gen(matchgroup) case(case) maxmatches(.3) calipermatch(income_percentile) caliperwidth(5)"' /// +* maximum matches is a negative integer +rcof `"test_calipmatch, gen(matchgroup) case(case) maxmatches(-1) calipermatch(income_percentile) caliperwidth(5)"' /// == 125 -*/ -***NEW TEST * caliper variable is ambiguous +* multiple maximum matches specified +rcof `"test_calipmatch, gen(matchgroup) case(case) maxmatches(1 5) calipermatch(income_percentile) caliperwidth(5)"' /// + == 125 + +* caliper variable is ambiguous gen byte income_percentile2=ceil(rnormal() * 100) rcof `"test_calipmatch, gen(matchgroup) case(case) maxmatches(1) calipermatch(income_perc) caliperwidth(5)"' /// == 111 drop income_percentile2 -***NEW TEST * caliper variable does not exist +* caliper variable does not exist rcof `"test_calipmatch, gen(matchgroup) case(case) maxmatches(1) calipermatch(nonsense) caliperwidth(5)"' /// == 111 -***NEW TEST * caliper width is negative +* caliper width is negative rcof `"test_calipmatch, gen(matchgroup) case(case) maxmatches(1) calipermatch(income_percentile) caliperwidth(-5)"' /// == 125 + +*------------------------------------------------------------------------------- +* Necessary Conditions and Set-up for Matching +*------------------------------------------------------------------------------- + +* generate a variable that already exists +gen matchgroup=. +rcof `"test_calipmatch, gen(matchgroup) case(case) maxmatches(1) calipermatch(income_percentile) caliperwidth(5)"' /// + == 110 +drop matchgroup + +* if statement that matches no observations +rcof `"test_calipmatch if income_percentile>100, gen(matchgroup) case(case) maxmatches(1) calipermatch(income_percentile) caliperwidth(5)"' /// + == 2000 * no controls replace case=1 @@ -139,13 +165,7 @@ rcof `"test_calipmatch, gen(matchgroup) case(case) maxmatches(1) calipermatch(in * no cases replace case=0 rcof `"test_calipmatch, gen(matchgroup) case(case) maxmatches(1) calipermatch(income_percentile) caliperwidth(5)"' /// - == 2001 - -* generate a variable that already exists -gen matchgroup=. -rcof `"test_calipmatch, gen(matchgroup) case(case) maxmatches(1) calipermatch(income_percentile) caliperwidth(5)"' /// - == 110 -drop matchgroup + == 2001 * case/control variable not always 0 or 1 in sample replace case=(_n<=20) @@ -157,7 +177,24 @@ rcof `"test_calipmatch, gen(matchgroup) case(case) maxmatches(1) calipermatch(in test_calipmatch in 2/200, gen(matchgroup) case(case) maxmatches(1) calipermatch(income_percentile) caliperwidth(5) keep case income_percentile +*------------------------------------------------------------------------------- +* Performance +*------------------------------------------------------------------------------- + +* perfect match exists for each case +clear +set obs 100 +gen byte income_percentile = _n +expand 2, gen(case) +test_calipmatch, gen(matchgroup) case(case) maxmatches(1) calipermatch(income_percentile) caliperwidth(5) +forvalues m=1/100 { + assert matchgroup[`m'] == matchgroup[`m'+100] +} + + +*------------------------------------------------------------------------------- *** One caliper matching variable and one exact matching variable +*------------------------------------------------------------------------------- gen byte sex=round(runiform()) replace case=(_n<=20) @@ -167,16 +204,39 @@ test_calipmatch, gen(matchgroup) case(case) maxmatches(1) /// calipermatch(income_percentile) caliperwidth(5) exactmatch(sex) keep case income_percentile sex -***NEW TEST * exact variable is ambiguous +*------------------------------------------------------------------------------- +* Syntax +*------------------------------------------------------------------------------- + +* exact variable is ambiguous gen byte sex2=round(runiform()) rcof `"test_calipmatch, gen(matchgroup) case(case) maxmatches(1) calipermatch(income_percentile) caliperwidth(5) exactmatch(se)"' /// == 111 drop sex2 -***NEW TEST * exact variable does not exist +* exact variable does not exist rcof `"test_calipmatch, gen(matchgroup) case(case) maxmatches(1) calipermatch(income_percentile) caliperwidth(5) exactmatch(nonsense)"' /// == 111 +* float exact matching variable +recast float sex +rcof `"test_calipmatch, gen(matchgroup) case(case) maxmatches(1) calipermatch(income_percentile) caliperwidth(5) exactmatch(sex)"' /// + == 198 + +* string exact matching variable +rename sex sex_numeric +gen sex=cond(sex_numeric==0,"M","F") +rcof `"test_calipmatch, gen(matchgroup) case(case) maxmatches(1) calipermatch(income_percentile) caliperwidth(5) exactmatch(sex)"' /// + == 198 + +drop sex +rename sex_numeric sex +recast byte sex + +*------------------------------------------------------------------------------- +* Necessary Conditions and Set-up for Matching +*------------------------------------------------------------------------------- + * no controls among one matching group replace case=1 if sex==1 test_calipmatch, gen(matchgroup) case(case) maxmatches(1) /// @@ -196,31 +256,10 @@ test_calipmatch, gen(matchgroup) case(case) maxmatches(1) /// assert matchgroup==. keep case income_percentile sex -* string case variable -drop case -gen case=cond(_n<=20,"case","ctrl") -rcof `"test_calipmatch, gen(matchgroup) case(case) maxmatches(1) calipermatch(income_percentile) caliperwidth(5) exactmatch(sex)"' /// - == 109 - -drop case -gen byte case=(_n<=20) - -* float exact matching variable -recast float sex -rcof `"test_calipmatch, gen(matchgroup) case(case) maxmatches(1) calipermatch(income_percentile) caliperwidth(5) exactmatch(sex)"' /// - == 198 - -* string exact matching variable -rename sex sex_numeric -gen sex=cond(sex_numeric==0,"M","F") -rcof `"test_calipmatch, gen(matchgroup) case(case) maxmatches(1) calipermatch(income_percentile) caliperwidth(5) exactmatch(sex)"' /// - == 198 - -drop sex -rename sex_numeric sex -recast byte sex +*------------------------------------------------------------------------------- *** Many caliper and exact matching variables, m:1 match +*------------------------------------------------------------------------------- clear set obs 50000 @@ -237,6 +276,10 @@ test_calipmatch, gen(matchgroup) case(case) maxmatches(5) /// exactmatch(sex self_emp prov) calipermatch(age income_percentile) caliperwidth(3 5) keep case sex age self_emp prov income_percentile +*------------------------------------------------------------------------------- +* Syntax +*------------------------------------------------------------------------------- + * Not enough caliper widths rcof `"test_calipmatch, gen(matchgroup) case(case) maxmatches(5) exactmatch(sex self_emp prov) calipermatch(age income_percentile) caliperwidth(3)"' /// == 122 @@ -245,80 +288,7 @@ rcof `"test_calipmatch, gen(matchgroup) case(case) maxmatches(5) exactmatch(sex rcof `"test_calipmatch, gen(matchgroup) case(case) maxmatches(5) exactmatch(sex self_emp prov) calipermatch(age income_percentile) caliperwidth(3 5 5)"' /// == 123 -<<<<<<< HEAD -*** NEW TESTS *** Performance Test - perfect match in the controls -* One caliper, perfect match -clear -set seed 4585239 - -set obs 200 -gen byte case=(_n<=20) -gen byte income_percentile_ex =max(21,ceil(runiform() * 100)) - -forvalues m=1/20 { - qui replace income_percentile_ex = `m' in `m' - local t = `m'+20 - qui replace income_percentile_ex = `m' in `t' -} - -test_calipmatch, gen(matchgroup) case(case) maxmatches(1) calipermatch(income_percentile_ex) caliperwidth(5) -forvalues m=1/20 { - assert matchgroup[`m'] == matchgroup[`m'+20] -} -keep case income_percentile_ex - -* One caliper and one exact matching variable - -gen byte sex_ex=round(runiform()) -replace case=(_n<=20) -forvalues m=1/20 { - qui replace sex_ex = mod(`m',2) in `m' - local t = `m'+20 - qui replace sex_ex = mod(`t',2) in `t' -} - -test_calipmatch, gen(matchgroup) case(case) maxmatches(1) /// - calipermatch(income_percentile_ex) caliperwidth(5) exactmatch(sex_ex) -forvalues m=1/20 { - assert matchgroup[`m'] == matchgroup[`m'+20] -} - -* Many caliper and many exact matching variables, m:1 match - -clear -set obs 50000 - -gen byte case=(_n<=1000) -gen byte sex_ex=round(runiform()) -gen byte age_ex = 44 + ceil(runiform()*17) -gen byte self_emp_ex = (runiform()<0.1) -gen byte prov_ex = ceil(runiform()*9) -gen byte cal_val_ex=max(10001, ceil(runiform() * 100)) - -forvalues m=1/1000 { - forvalues t=0/4 { - local s = `m' + `t'*1000 - qui replace sex_ex = mod(`m',2) in `s' - qui replace age_ex = mod(`m',30)+44 in `s' - qui replace self_emp_ex = mod(`m',2) in `s' - qui replace prov_ex = mod(`m',2) in `s' - qui replace cal_val_ex = mod(`m',1000) in `s' - } -} - -test_calipmatch, gen(matchgroup) case(case) maxmatches(4) /// - exactmatch(sex_ex self_emp_ex prov_ex) calipermatch(age_ex cal_val_ex) caliperwidth(3 5) - - -forvalues m=1/1000 { - forvalues t=1/4 { - local s = `m' + `t'*1000 - assert matchgroup[`m'] == matchgroup[`s'] - } -} -======= *------------------------------------------------------------------------------- di "Successfully completed all tests." ->>>>>>> develop From b810f349d111145ad2a4d5250d37b8108fb3ed46 Mon Sep 17 00:00:00 2001 From: jethaaly Date: Wed, 12 Oct 2022 22:48:13 -0400 Subject: [PATCH 28/50] Corrected multiple maximum matches test --- test_calipmatch.do | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test_calipmatch.do b/test_calipmatch.do index 29c7ebb..1e9c787 100644 --- a/test_calipmatch.do +++ b/test_calipmatch.do @@ -127,7 +127,7 @@ rcof `"test_calipmatch, gen(matchgroup) case(case) maxmatches(-1) calipermatch(i * multiple maximum matches specified rcof `"test_calipmatch, gen(matchgroup) case(case) maxmatches(1 5) calipermatch(income_percentile) caliperwidth(5)"' /// - == 125 + == 123 * caliper variable is ambiguous gen byte income_percentile2=ceil(rnormal() * 100) From a18f051eebfa247726bac9352d24ce0e90a2b809 Mon Sep 17 00:00:00 2001 From: jethaaly Date: Wed, 12 Oct 2022 22:50:20 -0400 Subject: [PATCH 29/50] Corrected multiple maximum matches test --- test_calipmatch.do | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test_calipmatch.do b/test_calipmatch.do index 1e9c787..586b0ba 100644 --- a/test_calipmatch.do +++ b/test_calipmatch.do @@ -127,7 +127,7 @@ rcof `"test_calipmatch, gen(matchgroup) case(case) maxmatches(-1) calipermatch(i * multiple maximum matches specified rcof `"test_calipmatch, gen(matchgroup) case(case) maxmatches(1 5) calipermatch(income_percentile) caliperwidth(5)"' /// - == 123 + == 198 * caliper variable is ambiguous gen byte income_percentile2=ceil(rnormal() * 100) From 871959235be93bd8d9fd31ca2d35c99d199675dd Mon Sep 17 00:00:00 2001 From: jethaaly Date: Wed, 12 Oct 2022 22:52:09 -0400 Subject: [PATCH 30/50] Corrected perfect match test --- test_calipmatch.do | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test_calipmatch.do b/test_calipmatch.do index 586b0ba..21c8863 100644 --- a/test_calipmatch.do +++ b/test_calipmatch.do @@ -190,7 +190,7 @@ test_calipmatch, gen(matchgroup) case(case) maxmatches(1) calipermatch(income_pe forvalues m=1/100 { assert matchgroup[`m'] == matchgroup[`m'+100] } - +keep case income_percentile *------------------------------------------------------------------------------- *** One caliper matching variable and one exact matching variable From ad3027c413956bf74c2839d85e3a30df4776c565 Mon Sep 17 00:00:00 2001 From: jethaaly Date: Thu, 13 Oct 2022 01:05:27 -0400 Subject: [PATCH 31/50] Improved perfect match performance test --- test_calipmatch.do | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/test_calipmatch.do b/test_calipmatch.do index 21c8863..98db8bc 100644 --- a/test_calipmatch.do +++ b/test_calipmatch.do @@ -184,12 +184,15 @@ keep case income_percentile * perfect match exists for each case clear set obs 100 -gen byte income_percentile = _n +gen byte income_percentile_ex = _n expand 2, gen(case) -test_calipmatch, gen(matchgroup) case(case) maxmatches(1) calipermatch(income_percentile) caliperwidth(5) -forvalues m=1/100 { - assert matchgroup[`m'] == matchgroup[`m'+100] -} +test_calipmatch, gen(matchgroup) case(case) maxmatches(1) calipermatch(income_percentile_ex) caliperwidth(5) +gen caseval = income_percentile_ex if case ==1 +egen matchval = mean(caseval), by(matchgroup) +gen squared_valdiff = (income_percentile_ex - matchval)^2 +sum squared_valdiff, meanonly +di r(max) +assert r(max) == 0 keep case income_percentile *------------------------------------------------------------------------------- From 9b747894dc22cc115ff3862807c0cdc7d416362d Mon Sep 17 00:00:00 2001 From: jethaaly Date: Thu, 13 Oct 2022 01:18:13 -0400 Subject: [PATCH 32/50] Edit organization and formatting of tests --- test_calipmatch.do | 38 +++++++++++++++++++------------------- 1 file changed, 19 insertions(+), 19 deletions(-) diff --git a/test_calipmatch.do b/test_calipmatch.do index 98db8bc..2c41777 100644 --- a/test_calipmatch.do +++ b/test_calipmatch.do @@ -89,9 +89,9 @@ program define test_calipmatch end -*------------------------------------------------------------------------------- +*---------------------------------------------------------------------------- *** One caliper matching variable -*------------------------------------------------------------------------------- +*---------------------------------------------------------------------------- clear set seed 4585239 @@ -105,9 +105,9 @@ test_calipmatch, gen(matchgroup) case(case) maxmatches(1) /// calipermatch(income_percentile) caliperwidth(5) keep case income_percentile -*------------------------------------------------------------------------------- +*---------------------------------------------------------------------------- * Syntax -*------------------------------------------------------------------------------- +*---------------------------------------------------------------------------- * string case variable drop case @@ -143,9 +143,9 @@ rcof `"test_calipmatch, gen(matchgroup) case(case) maxmatches(1) calipermatch(no rcof `"test_calipmatch, gen(matchgroup) case(case) maxmatches(1) calipermatch(income_percentile) caliperwidth(-5)"' /// == 125 -*------------------------------------------------------------------------------- +*---------------------------------------------------------------------------- * Necessary Conditions and Set-up for Matching -*------------------------------------------------------------------------------- +*---------------------------------------------------------------------------- * generate a variable that already exists gen matchgroup=. @@ -177,9 +177,9 @@ rcof `"test_calipmatch, gen(matchgroup) case(case) maxmatches(1) calipermatch(in test_calipmatch in 2/200, gen(matchgroup) case(case) maxmatches(1) calipermatch(income_percentile) caliperwidth(5) keep case income_percentile -*------------------------------------------------------------------------------- +*---------------------------------------------------------------------------- * Performance -*------------------------------------------------------------------------------- +*---------------------------------------------------------------------------- * perfect match exists for each case clear @@ -195,9 +195,9 @@ di r(max) assert r(max) == 0 keep case income_percentile -*------------------------------------------------------------------------------- +*---------------------------------------------------------------------------- *** One caliper matching variable and one exact matching variable -*------------------------------------------------------------------------------- +*---------------------------------------------------------------------------- gen byte sex=round(runiform()) replace case=(_n<=20) @@ -207,9 +207,9 @@ test_calipmatch, gen(matchgroup) case(case) maxmatches(1) /// calipermatch(income_percentile) caliperwidth(5) exactmatch(sex) keep case income_percentile sex -*------------------------------------------------------------------------------- +*---------------------------------------------------------------------------- * Syntax -*------------------------------------------------------------------------------- +*---------------------------------------------------------------------------- * exact variable is ambiguous gen byte sex2=round(runiform()) @@ -236,9 +236,9 @@ drop sex rename sex_numeric sex recast byte sex -*------------------------------------------------------------------------------- +*---------------------------------------------------------------------------- * Necessary Conditions and Set-up for Matching -*------------------------------------------------------------------------------- +*---------------------------------------------------------------------------- * no controls among one matching group replace case=1 if sex==1 @@ -260,9 +260,9 @@ assert matchgroup==. keep case income_percentile sex -*------------------------------------------------------------------------------- +*---------------------------------------------------------------------------- *** Many caliper and exact matching variables, m:1 match -*------------------------------------------------------------------------------- +*---------------------------------------------------------------------------- clear set obs 50000 @@ -279,9 +279,9 @@ test_calipmatch, gen(matchgroup) case(case) maxmatches(5) /// exactmatch(sex self_emp prov) calipermatch(age income_percentile) caliperwidth(3 5) keep case sex age self_emp prov income_percentile -*------------------------------------------------------------------------------- +*---------------------------------------------------------------------------- * Syntax -*------------------------------------------------------------------------------- +*---------------------------------------------------------------------------- * Not enough caliper widths rcof `"test_calipmatch, gen(matchgroup) case(case) maxmatches(5) exactmatch(sex self_emp prov) calipermatch(age income_percentile) caliperwidth(3)"' /// @@ -292,6 +292,6 @@ rcof `"test_calipmatch, gen(matchgroup) case(case) maxmatches(5) exactmatch(sex == 123 -*------------------------------------------------------------------------------- +*---------------------------------------------------------------------------- di "Successfully completed all tests." From 6244634706b93c610d3656de232595801901dbb7 Mon Sep 17 00:00:00 2001 From: jethaaly Date: Thu, 13 Oct 2022 04:05:21 -0400 Subject: [PATCH 33/50] Correction to one caliper, one exact matching testing --- test_calipmatch.do | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/test_calipmatch.do b/test_calipmatch.do index 0c71799..b588b64 100644 --- a/test_calipmatch.do +++ b/test_calipmatch.do @@ -203,14 +203,21 @@ gen squared_valdiff = (income_percentile_ex - matchval)^2 sum squared_valdiff, meanonly di r(max) assert r(max) == 0 -keep case income_percentile + *---------------------------------------------------------------------------- *** One caliper matching variable and one exact matching variable *---------------------------------------------------------------------------- +clear +set seed 4585239 +set sortseed 789045789 + +set obs 200 +gen byte case=(_n<=20) +gen byte income_percentile=ceil(runiform() * 100) + gen byte sex=round(runiform()) -replace case=(_n<=20) * Valid test test_calipmatch, gen(matchgroup) case(case) maxmatches(1) /// From a069d74651a76f28d9b98a16cfdae9c7a2219c97 Mon Sep 17 00:00:00 2001 From: Michael Stepner Date: Wed, 19 Oct 2022 14:04:31 -0400 Subject: [PATCH 34/50] Verify failed test: revert syntax improvement --- calipmatch.ado | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/calipmatch.ado b/calipmatch.ado index 76cc373..395db5f 100644 --- a/calipmatch.ado +++ b/calipmatch.ado @@ -12,7 +12,8 @@ human-readable summary can be accessed at http://creativecommons.org/publicdomai program define calipmatch, sortpreserve rclass version 13.0 - syntax [if] [in], GENerate(name) CASEvar(varname numeric) MAXmatches(numlist integer >0 max=1) CALIPERMatch(varlist numeric) CALIPERWidth(numlist >0) [EXACTmatch(varlist)] + syntax [if] [in], GENerate(name) CASEvar(varname numeric) MAXmatches(integer) CALIPERMatch(varlist numeric) CALIPERWidth(numlist >0) [EXACTmatch(varlist)] + * Verify there are same number of caliper vars as caliper widths if (`: word count `calipermatch'' != `: word count `caliperwidth'') { From 6d2b3902913d1467dda6425a4ee010fd128a984c Mon Sep 17 00:00:00 2001 From: Michael Stepner Date: Wed, 19 Oct 2022 14:09:02 -0400 Subject: [PATCH 35/50] Revert "Verify failed test: revert syntax improvement" This reverts commit a069d74651a76f28d9b98a16cfdae9c7a2219c97. --- calipmatch.ado | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/calipmatch.ado b/calipmatch.ado index 395db5f..76cc373 100644 --- a/calipmatch.ado +++ b/calipmatch.ado @@ -12,8 +12,7 @@ human-readable summary can be accessed at http://creativecommons.org/publicdomai program define calipmatch, sortpreserve rclass version 13.0 - syntax [if] [in], GENerate(name) CASEvar(varname numeric) MAXmatches(integer) CALIPERMatch(varlist numeric) CALIPERWidth(numlist >0) [EXACTmatch(varlist)] - + syntax [if] [in], GENerate(name) CASEvar(varname numeric) MAXmatches(numlist integer >0 max=1) CALIPERMatch(varlist numeric) CALIPERWidth(numlist >0) [EXACTmatch(varlist)] * Verify there are same number of caliper vars as caliper widths if (`: word count `calipermatch'' != `: word count `caliperwidth'') { From 57613d3122331f276cdfb80bfa23ad2639abbae4 Mon Sep 17 00:00:00 2001 From: Michael Stepner Date: Wed, 19 Oct 2022 14:24:51 -0400 Subject: [PATCH 36/50] Ensure that tests run calipmatch w/ verbatim args --- test_calipmatch.do | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/test_calipmatch.do b/test_calipmatch.do index b588b64..94f108d 100644 --- a/test_calipmatch.do +++ b/test_calipmatch.do @@ -1,14 +1,16 @@ cscript "calipmatch" adofile calipmatch program define test_calipmatch - - syntax [if] [in], GENerate(name) CASEvar(varname) MAXmatches(integer) CALIPERMatch(varlist numeric) CALIPERWidth(numlist >0) [EXACTmatch(varlist)] - calipmatch `if' `in', generate(`generate') casevar(`casevar') maxmatches(`maxmatches') calipermatch(`calipermatch') /// - caliperwidth(`caliperwidth') exactmatch(`exactmatch') - + * Run calipmatch with verbatim arguments + calipmatch `0' + + * Testing after successful run if (_rc==0) { + * Assign arguments to locals using the same syntax as calipmatch + syntax [if] [in], GENerate(name) CASEvar(varname numeric) MAXmatches(numlist integer >0 max=1) CALIPERMatch(varlist numeric) CALIPERWidth(numlist >0) [EXACTmatch(varlist)] + * Store returned objects local cases_total=r(cases_total) local cases_matched=r(cases_matched) From ca8d0334d706cbd295bb4e07f06bd2a70f4564ce Mon Sep 17 00:00:00 2001 From: Michael Stepner Date: Wed, 19 Oct 2022 14:31:53 -0400 Subject: [PATCH 37/50] Add whitespace to test_calipmatch commands --- test_calipmatch.do | 34 +++++++++++++++++----------------- 1 file changed, 17 insertions(+), 17 deletions(-) diff --git a/test_calipmatch.do b/test_calipmatch.do index 94f108d..fd4653c 100644 --- a/test_calipmatch.do +++ b/test_calipmatch.do @@ -12,14 +12,14 @@ program define test_calipmatch syntax [if] [in], GENerate(name) CASEvar(varname numeric) MAXmatches(numlist integer >0 max=1) CALIPERMatch(varlist numeric) CALIPERWidth(numlist >0) [EXACTmatch(varlist)] * Store returned objects - local cases_total=r(cases_total) - local cases_matched=r(cases_matched) - local match_rate=r(match_rate) - matrix matches=r(matches) + local cases_total = r(cases_total) + local cases_matched = r(cases_matched) + local match_rate = r(match_rate) + matrix matches = r(matches) * Exactly one case per matchgroup - egen casecount=sum(`casevar'), by(`generate') - cap assert casecount==1 if !mi(`generate') + egen casecount = sum(`casevar'), by(`generate') + cap assert casecount == 1 if !mi(`generate') if (_rc!=0) { di as error "More than one case per matchgroup" exit 9 @@ -31,16 +31,16 @@ program define test_calipmatch else assert `cases_matched'==0 * All matched obs are within caliper width - local c=0 + local c = 0 foreach var of varlist `calipermatch' { local ++c local width : word `c' of `caliperwidth' - qui gen caseval=`var' if `casevar'==1 & !mi(`generate') - qui egen matchval=mean(caseval), by(`generate') + qui gen caseval = `var' if `casevar'==1 & !mi(`generate') + qui egen matchval = mean(caseval), by(`generate') - qui gen valdiff=`var'-matchval + qui gen valdiff = `var' - matchval sum valdiff, meanonly if r(N)>0 { assert r(min)>=-`width' @@ -55,10 +55,10 @@ program define test_calipmatch if ("`exactmatch'"!="") { foreach var of varlist `exactmatch' { - qui gen caseval=`var' if `casevar'==1 & !mi(`generate') - qui egen matchval=mean(caseval), by(`generate') + qui gen caseval = `var' if `casevar'==1 & !mi(`generate') + qui egen matchval = mean(caseval), by(`generate') - qui gen valdiff=`var'-matchval + qui gen valdiff = `var' - matchval sum valdiff, meanonly if r(N)>0 { assert r(min)==0 @@ -80,10 +80,10 @@ program define test_calipmatch assert r(sum)==`cases_matched' * Tabulation of number of controls matched to each case reported correctly - qui gen control=1-`casevar' - qui egen matched_controls=sum(control), by(`generate') - qui replace matched_controls=0 if mi(`generate') - qui replace matched_controls=. if `casevar'!=1 + qui gen control = 1 - `casevar' + qui egen matched_controls = sum(control), by(`generate') + qui replace matched_controls = 0 if mi(`generate') + qui replace matched_controls = . if `casevar'!=1 forvalues m=0/`maxmatches' { qui count if matched_controls==`m' From 497a76c0d9e799fb1af6c2f5905ef8391e85685b Mon Sep 17 00:00:00 2001 From: Michael Stepner Date: Wed, 19 Oct 2022 14:34:56 -0400 Subject: [PATCH 38/50] Correct specified return code The wrong return code was being tested for. The error was being caused by the syntax statement before -calipmatch-, rather than -calipmatch- itself. This changed in 57613d3 --- test_calipmatch.do | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test_calipmatch.do b/test_calipmatch.do index fd4653c..97357c6 100644 --- a/test_calipmatch.do +++ b/test_calipmatch.do @@ -131,7 +131,7 @@ gen byte case=(_n<=20) * maximum matches is positive, but not an integer rcof `"test_calipmatch, gen(matchgroup) case(case) maxmatches(.3) calipermatch(income_percentile) caliperwidth(5)"' /// - == 198 + == 126 * maximum matches is a negative integer rcof `"test_calipmatch, gen(matchgroup) case(case) maxmatches(-1) calipermatch(income_percentile) caliperwidth(5)"' /// From 0feb7cd84a0c3899dd33bafdbb1167a959da4ce3 Mon Sep 17 00:00:00 2001 From: Michael Stepner Date: Wed, 19 Oct 2022 14:36:22 -0400 Subject: [PATCH 39/50] Fix another incorrect return code The wrong return code was being tested for. The error was being caused by the syntax statement before -calipmatch-, rather than -calipmatch- itself. This changed in 57613d3 --- test_calipmatch.do | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test_calipmatch.do b/test_calipmatch.do index 97357c6..64f3925 100644 --- a/test_calipmatch.do +++ b/test_calipmatch.do @@ -139,7 +139,7 @@ rcof `"test_calipmatch, gen(matchgroup) case(case) maxmatches(-1) calipermatch(i * multiple maximum matches specified rcof `"test_calipmatch, gen(matchgroup) case(case) maxmatches(1 5) calipermatch(income_percentile) caliperwidth(5)"' /// - == 198 + == 123 * caliper variable is ambiguous gen byte income_percentile2=ceil(rnormal() * 100) From 47f47232083425106af7ce956e7cc382ff2e1296 Mon Sep 17 00:00:00 2001 From: Michael Stepner Date: Wed, 19 Oct 2022 16:55:06 -0400 Subject: [PATCH 40/50] Refactor tests - Update comments - Recategorize / rename categories - Simplify tests for 'perfect match per case' - Set seeds only once --- test_calipmatch.do | 146 ++++++++++++++++++++++++++------------------- 1 file changed, 83 insertions(+), 63 deletions(-) diff --git a/test_calipmatch.do b/test_calipmatch.do index 64f3925..1baf1b9 100644 --- a/test_calipmatch.do +++ b/test_calipmatch.do @@ -99,35 +99,29 @@ program define test_calipmatch end -*---------------------------------------------------------------------------- -*** One caliper matching variable -*---------------------------------------------------------------------------- +*=============================================================================== +* Create dataset: one caliper matching variable +*=============================================================================== clear set seed 4585239 set sortseed 789045789 - set obs 200 gen byte case=(_n<=20) gen byte income_percentile=ceil(runiform() * 100) -* Valid test +*------------------------------------------------------------------------------- +* Valid inputs +*------------------------------------------------------------------------------- + test_calipmatch, gen(matchgroup) case(case) maxmatches(1) /// calipermatch(income_percentile) caliperwidth(5) keep case income_percentile -*---------------------------------------------------------------------------- -* Syntax -*---------------------------------------------------------------------------- - -* string case variable -drop case -gen case=cond(_n<=20,"case","ctrl") -rcof `"test_calipmatch, gen(matchgroup) case(case) maxmatches(1) calipermatch(income_percentile) caliperwidth(5) "' /// - == 109 -drop case -gen byte case=(_n<=20) +*------------------------------------------------------------------------------- +* Invalid syntax +*------------------------------------------------------------------------------- * maximum matches is positive, but not an integer rcof `"test_calipmatch, gen(matchgroup) case(case) maxmatches(.3) calipermatch(income_percentile) caliperwidth(5)"' /// @@ -154,10 +148,18 @@ rcof `"test_calipmatch, gen(matchgroup) case(case) maxmatches(1) calipermatch(no * caliper width is negative rcof `"test_calipmatch, gen(matchgroup) case(case) maxmatches(1) calipermatch(income_percentile) caliperwidth(-5)"' /// == 125 - -*---------------------------------------------------------------------------- -* Necessary Conditions and Set-up for Matching -*---------------------------------------------------------------------------- + +*------------------------------------------------------------------------------- +* Invalid data +*------------------------------------------------------------------------------- + +* string case variable +drop case +gen case=cond(_n<=20,"case","ctrl") +rcof `"test_calipmatch, gen(matchgroup) case(case) maxmatches(1) calipermatch(income_percentile) caliperwidth(5) "' /// + == 109 +drop case +gen byte case=(_n<=20) * generate a variable that already exists gen matchgroup=. @@ -177,7 +179,7 @@ rcof `"test_calipmatch, gen(matchgroup) case(case) maxmatches(1) calipermatch(in * no cases replace case=0 rcof `"test_calipmatch, gen(matchgroup) case(case) maxmatches(1) calipermatch(income_percentile) caliperwidth(5)"' /// - == 2001 + == 2001 * case/control variable not always 0 or 1 in sample replace case=(_n<=20) @@ -189,46 +191,52 @@ rcof `"test_calipmatch, gen(matchgroup) case(case) maxmatches(1) calipermatch(in test_calipmatch in 2/200, gen(matchgroup) case(case) maxmatches(1) calipermatch(income_percentile) caliperwidth(5) keep case income_percentile -*---------------------------------------------------------------------------- -* Performance -*---------------------------------------------------------------------------- +*=============================================================================== +* Create dataset: one caliper matching variable, with a perfect match per case +*=============================================================================== -* perfect match exists for each case -clear +clear set obs 100 -gen byte income_percentile_ex = _n +gen byte income_percentile = _n expand 2, gen(case) -test_calipmatch, gen(matchgroup) case(case) maxmatches(1) calipermatch(income_percentile_ex) caliperwidth(5) -gen caseval = income_percentile_ex if case ==1 -egen matchval = mean(caseval), by(matchgroup) -gen squared_valdiff = (income_percentile_ex - matchval)^2 -sum squared_valdiff, meanonly -di r(max) -assert r(max) == 0 +*------------------------------------------------------------------------------- +* Valid inputs, test performance of matching algorithm +*------------------------------------------------------------------------------- -*---------------------------------------------------------------------------- -*** One caliper matching variable and one exact matching variable -*---------------------------------------------------------------------------- +* perfect match is found for each case, despite other valid matches +test_calipmatch, gen(matchgroup) case(case) maxmatches(1) calipermatch(income_percentile) caliperwidth(5) -clear -set seed 4585239 -set sortseed 789045789 +assert !mi(matchgroup) +egen n = count(income_percentile), by(matchgroup) +egen sd = sd(income_percentile), by(matchgroup) + +assert n == 2 +assert sd == 0 +keep case income_percentile + +*=============================================================================== +* Create dataset: one caliper matching variable and one exact matching variable +*=============================================================================== + +clear set obs 200 -gen byte case=(_n<=20) -gen byte income_percentile=ceil(runiform() * 100) +gen byte case = (_n<=20) +gen byte income_percentile = ceil(runiform() * 100) +gen byte sex = round(runiform()) -gen byte sex=round(runiform()) +*------------------------------------------------------------------------------- +* Valid inputs +*------------------------------------------------------------------------------- -* Valid test test_calipmatch, gen(matchgroup) case(case) maxmatches(1) /// calipermatch(income_percentile) caliperwidth(5) exactmatch(sex) keep case income_percentile sex -*---------------------------------------------------------------------------- -* Syntax -*---------------------------------------------------------------------------- +*------------------------------------------------------------------------------- +* Invalid syntax +*------------------------------------------------------------------------------- * exact variable is ambiguous gen byte sex2=round(runiform()) @@ -240,6 +248,10 @@ drop sex2 rcof `"test_calipmatch, gen(matchgroup) case(case) maxmatches(1) calipermatch(income_percentile) caliperwidth(5) exactmatch(nonsense)"' /// == 111 +*------------------------------------------------------------------------------- +* Invalid data +*------------------------------------------------------------------------------- + * float exact matching variable recast float sex rcof `"test_calipmatch, gen(matchgroup) case(case) maxmatches(1) calipermatch(income_percentile) caliperwidth(5) exactmatch(sex)"' /// @@ -256,51 +268,59 @@ rename sex_numeric sex recast byte sex *---------------------------------------------------------------------------- -* Necessary Conditions and Set-up for Matching +* Edge cases *---------------------------------------------------------------------------- -* no controls among one matching group +* no controls among one matching group -> no matches in that group replace case=1 if sex==1 test_calipmatch, gen(matchgroup) case(case) maxmatches(1) /// calipermatch(income_percentile) caliperwidth(5) exactmatch(sex) + +assert mi(matchgroup) if sex==1 keep case income_percentile sex - -* no cases among one matching group + +* no cases among one matching group -> no matches in that group replace case=0 if sex==1 test_calipmatch, gen(matchgroup) case(case) maxmatches(1) /// calipermatch(income_percentile) caliperwidth(5) exactmatch(sex) + +assert mi(matchgroup) if sex==1 keep case income_percentile sex -* no matching groups with both cases and controls +* no matching groups with both cases and controls -> no matches in any group replace case=1 if sex==0 test_calipmatch, gen(matchgroup) case(case) maxmatches(1) /// calipermatch(income_percentile) caliperwidth(5) exactmatch(sex) -assert matchgroup==. + +assert mi(matchgroup) keep case income_percentile sex -*---------------------------------------------------------------------------- -*** Many caliper and exact matching variables, m:1 match -*---------------------------------------------------------------------------- +*=============================================================================== +* Create dataset: many caliper variables and many exact matching variables +*=============================================================================== clear set obs 50000 gen byte case=(_n<=5000) gen byte sex=round(runiform()) -gen byte age = 44 + ceil(runiform()*17) gen byte self_emp = (runiform()<0.1) -gen byte prov = ceil(runiform()*9) +gen byte prov = ceil(runiform()*10) +gen byte age = 44 + ceil(runiform()*17) gen byte income_percentile=ceil(runiform() * 100) -* Valid test +*------------------------------------------------------------------------------- +* Valid inputs +*------------------------------------------------------------------------------- + test_calipmatch, gen(matchgroup) case(case) maxmatches(5) /// exactmatch(sex self_emp prov) calipermatch(age income_percentile) caliperwidth(3 5) -keep case sex age self_emp prov income_percentile +keep case sex self_emp prov age income_percentile -*---------------------------------------------------------------------------- -* Syntax -*---------------------------------------------------------------------------- +*------------------------------------------------------------------------------- +* Invalid syntax +*------------------------------------------------------------------------------- * Not enough caliper widths rcof `"test_calipmatch, gen(matchgroup) case(case) maxmatches(5) exactmatch(sex self_emp prov) calipermatch(age income_percentile) caliperwidth(3)"' /// From 25e3e35e5b58c3cbcab5cdfb03cad6017f5c0b57 Mon Sep 17 00:00:00 2001 From: Michael Stepner Date: Wed, 19 Oct 2022 22:53:17 -0400 Subject: [PATCH 41/50] GHA: auto-update calipmatch.sthlp in README (#3) * Automatically update embedded sthlp in README * Bugfix detached HEAD ``` [detached HEAD e8fbcab] README: update embedded calipmatch.sthlp 1 file changed, 144 deletions(-) fatal: You are not currently on a branch. To push the history leading to the current (detached HEAD) state now, use git push origin HEAD: ``` * README: update embedded calipmatch.sthlp * Bugfix replacement of sthlp in README * README: update embedded calipmatch.sthlp * Whitespace changes in code * Bugfix: detection of changes * GHA: Update names of steps Co-authored-by: OppInsights-Bot --- .github/workflows/readme_sthlp.yml | 72 ++++++++++++++++++++++++++++++ README.md | 2 + test_calipmatch.do | 6 +-- 3 files changed, 76 insertions(+), 4 deletions(-) create mode 100644 .github/workflows/readme_sthlp.yml diff --git a/.github/workflows/readme_sthlp.yml b/.github/workflows/readme_sthlp.yml new file mode 100644 index 0000000..9828848 --- /dev/null +++ b/.github/workflows/readme_sthlp.yml @@ -0,0 +1,72 @@ +name: readme_sthlp + +on: + pull_request: + types: [opened, synchronize, reopened] + paths: + - 'calipmatch.sthlp' + workflow_dispatch: + +jobs: + readme_sthlp: + runs-on: ubuntu-latest + timeout-minutes: 30 # change max time from default 6hr + + steps: + - name: Install Stata + run: | + # Install Stata + mkdir -p /tmp/statafiles + curl -u oi:${OI_HTTPS_PW} https://d2bx6aas1fcmzl.cloudfront.net/stata16/Stata16Linux64.tar.gz --output /tmp/statafiles/Stata16Linux64.tar.gz + cd /tmp/statafiles + tar -zxf ./Stata16Linux64.tar.gz + sudo mkdir -p /usr/local/stata16 + cd /usr/local/stata16 + + # The following command returns 1 even though it's ok + set +e + sudo sh -c 'yes | /tmp/statafiles/install' + set -e + + cd /usr/local/bin + sudo ln -s /usr/local/stata16/stata-mp . + sudo ln -s /usr/local/stata16/xstata-mp . + sudo curl -u oi:${OI_HTTPS_PW} https://d2bx6aas1fcmzl.cloudfront.net/stata16/stata.lic --output /usr/local/stata16/stata.lic + rm -r /tmp/statafiles + cd /tmp + env: + OI_HTTPS_PW: ${{ secrets.OI_HTTPS_PW }} + + - name: Check out code repository (main branch) + uses: actions/checkout@v2 + if: github.ref == 'refs/heads/main' + + - name: Check out code repository (PR branch) + uses: actions/checkout@v2 + with: + ref: ${{ github.event.pull_request.head.ref }} + if: github.ref != 'refs/heads/main' + + - name: Convert Stata help file to HTML + run: stata-mp log html calipmatch.sthlp calipmatch.html + + - name: Place Stata help file in README.md + run: | + sed -z -i 's/.*/\n\n/' README.md + sed -i '// r calipmatch.html' README.md + rm calipmatch.html + + - name: Check if there are changes + run: | + set +e + test -z "$(git status --porcelain)" + echo "README_UPDATED=$?" >> $GITHUB_ENV + + - name: Push updated README to Github + run: | + git config user.name OppInsights-Bot + git config user.email info@opportunityinsights.org + git add README.md + git commit -m "README: update embedded calipmatch.sthlp" + git push + if: env.README_UPDATED == 1 diff --git a/README.md b/README.md index 2845946..469c535 100644 --- a/README.md +++ b/README.md @@ -12,6 +12,7 @@ This documentation was converted automatically from the Stata help file by runni The help file looks best when viewed in Stata using `help calipmatch`. +

 Title
 

@@ -140,3 +141,4 @@ The help file looks best when viewed in Stata using `help calipmatch`. agarland@hsc.mb.ca

+ diff --git a/test_calipmatch.do b/test_calipmatch.do index 1baf1b9..e110fc2 100644 --- a/test_calipmatch.do +++ b/test_calipmatch.do @@ -93,9 +93,7 @@ program define test_calipmatch exit 9 } } - } - end @@ -175,7 +173,7 @@ rcof `"test_calipmatch if income_percentile>100, gen(matchgroup) case(case) maxm replace case=1 rcof `"test_calipmatch, gen(matchgroup) case(case) maxmatches(1) calipermatch(income_percentile) caliperwidth(5)"' /// == 2001 - + * no cases replace case=0 rcof `"test_calipmatch, gen(matchgroup) case(case) maxmatches(1) calipermatch(income_percentile) caliperwidth(5)"' /// @@ -256,7 +254,7 @@ rcof `"test_calipmatch, gen(matchgroup) case(case) maxmatches(1) calipermatch(in recast float sex rcof `"test_calipmatch, gen(matchgroup) case(case) maxmatches(1) calipermatch(income_percentile) caliperwidth(5) exactmatch(sex)"' /// == 198 - + * string exact matching variable rename sex sex_numeric gen sex=cond(sex_numeric==0,"M","F") From 08b77986d7c1fb34a7558ef039c18ef720c08b7e Mon Sep 17 00:00:00 2001 From: Michael Stepner Date: Wed, 19 Oct 2022 23:01:16 -0400 Subject: [PATCH 42/50] Refactor: shared Stata installation bash script --- .github/workflows/readme_sthlp.yml | 21 +-------------------- .github/workflows/stata_tests.yml | 21 +-------------------- automation/install_stata.sh | 21 +++++++++++++++++++++ 3 files changed, 23 insertions(+), 40 deletions(-) create mode 100755 automation/install_stata.sh diff --git a/.github/workflows/readme_sthlp.yml b/.github/workflows/readme_sthlp.yml index 9828848..d459f99 100644 --- a/.github/workflows/readme_sthlp.yml +++ b/.github/workflows/readme_sthlp.yml @@ -14,26 +14,7 @@ jobs: steps: - name: Install Stata - run: | - # Install Stata - mkdir -p /tmp/statafiles - curl -u oi:${OI_HTTPS_PW} https://d2bx6aas1fcmzl.cloudfront.net/stata16/Stata16Linux64.tar.gz --output /tmp/statafiles/Stata16Linux64.tar.gz - cd /tmp/statafiles - tar -zxf ./Stata16Linux64.tar.gz - sudo mkdir -p /usr/local/stata16 - cd /usr/local/stata16 - - # The following command returns 1 even though it's ok - set +e - sudo sh -c 'yes | /tmp/statafiles/install' - set -e - - cd /usr/local/bin - sudo ln -s /usr/local/stata16/stata-mp . - sudo ln -s /usr/local/stata16/xstata-mp . - sudo curl -u oi:${OI_HTTPS_PW} https://d2bx6aas1fcmzl.cloudfront.net/stata16/stata.lic --output /usr/local/stata16/stata.lic - rm -r /tmp/statafiles - cd /tmp + run: ./automation/install_stata.sh env: OI_HTTPS_PW: ${{ secrets.OI_HTTPS_PW }} diff --git a/.github/workflows/stata_tests.yml b/.github/workflows/stata_tests.yml index 6fce63b..7707682 100644 --- a/.github/workflows/stata_tests.yml +++ b/.github/workflows/stata_tests.yml @@ -12,26 +12,7 @@ jobs: steps: - name: Install Stata - run: | - # Install Stata - mkdir -p /tmp/statafiles - curl -u oi:${OI_HTTPS_PW} https://d2bx6aas1fcmzl.cloudfront.net/stata16/Stata16Linux64.tar.gz --output /tmp/statafiles/Stata16Linux64.tar.gz - cd /tmp/statafiles - tar -zxf ./Stata16Linux64.tar.gz - sudo mkdir -p /usr/local/stata16 - cd /usr/local/stata16 - - # The following command returns 1 even though it's ok - set +e - sudo sh -c 'yes | /tmp/statafiles/install' - set -e - - cd /usr/local/bin - sudo ln -s /usr/local/stata16/stata-mp . - sudo ln -s /usr/local/stata16/xstata-mp . - sudo curl -u oi:${OI_HTTPS_PW} https://d2bx6aas1fcmzl.cloudfront.net/stata16/stata.lic --output /usr/local/stata16/stata.lic - rm -r /tmp/statafiles - cd /tmp + run: ./automation/install_stata.sh env: OI_HTTPS_PW: ${{ secrets.OI_HTTPS_PW }} diff --git a/automation/install_stata.sh b/automation/install_stata.sh new file mode 100755 index 0000000..ba41450 --- /dev/null +++ b/automation/install_stata.sh @@ -0,0 +1,21 @@ +#!/bin/bash + +# Install Stata +mkdir -p /tmp/statafiles +curl -u oi:${OI_HTTPS_PW} https://d2bx6aas1fcmzl.cloudfront.net/stata16/Stata16Linux64.tar.gz --output /tmp/statafiles/Stata16Linux64.tar.gz +cd /tmp/statafiles +tar -zxf ./Stata16Linux64.tar.gz +sudo mkdir -p /usr/local/stata16 +cd /usr/local/stata16 + +# The following command returns 1 even though it's ok +set +e +sudo sh -c 'yes | /tmp/statafiles/install' +set -e + +cd /usr/local/bin +sudo ln -s /usr/local/stata16/stata-mp . +sudo ln -s /usr/local/stata16/xstata-mp . +sudo curl -u oi:${OI_HTTPS_PW} https://d2bx6aas1fcmzl.cloudfront.net/stata16/stata.lic --output /usr/local/stata16/stata.lic +rm -r /tmp/statafiles +cd /tmp From a89be2a41da91264c191975185fdb91cc7a030a2 Mon Sep 17 00:00:00 2001 From: Michael Stepner Date: Wed, 19 Oct 2022 23:04:26 -0400 Subject: [PATCH 43/50] GHA: Check out repo before installing Stata --- .github/workflows/readme_sthlp.yml | 20 ++++++++++++++++---- .github/workflows/stata_tests.yml | 12 ++++++++++-- 2 files changed, 26 insertions(+), 6 deletions(-) diff --git a/.github/workflows/readme_sthlp.yml b/.github/workflows/readme_sthlp.yml index d459f99..57c1f57 100644 --- a/.github/workflows/readme_sthlp.yml +++ b/.github/workflows/readme_sthlp.yml @@ -13,10 +13,9 @@ jobs: timeout-minutes: 30 # change max time from default 6hr steps: - - name: Install Stata - run: ./automation/install_stata.sh - env: - OI_HTTPS_PW: ${{ secrets.OI_HTTPS_PW }} + ####################### + # Configure + ####################### - name: Check out code repository (main branch) uses: actions/checkout@v2 @@ -28,6 +27,15 @@ jobs: ref: ${{ github.event.pull_request.head.ref }} if: github.ref != 'refs/heads/main' + - name: Install Stata + run: ./automation/install_stata.sh + env: + OI_HTTPS_PW: ${{ secrets.OI_HTTPS_PW }} + + ####################### + # Build + ####################### + - name: Convert Stata help file to HTML run: stata-mp log html calipmatch.sthlp calipmatch.html @@ -37,6 +45,10 @@ jobs: sed -i '// r calipmatch.html' README.md rm calipmatch.html + ####################### + # Push + ####################### + - name: Check if there are changes run: | set +e diff --git a/.github/workflows/stata_tests.yml b/.github/workflows/stata_tests.yml index 7707682..4ac9309 100644 --- a/.github/workflows/stata_tests.yml +++ b/.github/workflows/stata_tests.yml @@ -11,13 +11,21 @@ jobs: timeout-minutes: 30 # change max time from default 6hr steps: + ####################### + # Configure + ####################### + + - name: Check out code repository + uses: actions/checkout@v2 + - name: Install Stata run: ./automation/install_stata.sh env: OI_HTTPS_PW: ${{ secrets.OI_HTTPS_PW }} - - name: Check out code repository - uses: actions/checkout@v2 + ####################### + # Build + ####################### - name: Run tests run: | From 43950f7fc4d6a00cc8a119d68be63e17d1fb931c Mon Sep 17 00:00:00 2001 From: Michael Stepner Date: Wed, 19 Oct 2022 23:06:45 -0400 Subject: [PATCH 44/50] Update length of comment title separators --- test_calipmatch.do | 60 +++++++++++++++++++++++----------------------- 1 file changed, 30 insertions(+), 30 deletions(-) diff --git a/test_calipmatch.do b/test_calipmatch.do index e110fc2..aa0d994 100644 --- a/test_calipmatch.do +++ b/test_calipmatch.do @@ -97,9 +97,9 @@ program define test_calipmatch end -*=============================================================================== -* Create dataset: one caliper matching variable -*=============================================================================== +*============================================================================ +* New dataset: one caliper matching variable +*============================================================================ clear set seed 4585239 @@ -109,17 +109,17 @@ set obs 200 gen byte case=(_n<=20) gen byte income_percentile=ceil(runiform() * 100) -*------------------------------------------------------------------------------- +*---------------------------------------------------------------------------- * Valid inputs -*------------------------------------------------------------------------------- +*---------------------------------------------------------------------------- test_calipmatch, gen(matchgroup) case(case) maxmatches(1) /// calipermatch(income_percentile) caliperwidth(5) keep case income_percentile -*------------------------------------------------------------------------------- +*---------------------------------------------------------------------------- * Invalid syntax -*------------------------------------------------------------------------------- +*---------------------------------------------------------------------------- * maximum matches is positive, but not an integer rcof `"test_calipmatch, gen(matchgroup) case(case) maxmatches(.3) calipermatch(income_percentile) caliperwidth(5)"' /// @@ -147,9 +147,9 @@ rcof `"test_calipmatch, gen(matchgroup) case(case) maxmatches(1) calipermatch(no rcof `"test_calipmatch, gen(matchgroup) case(case) maxmatches(1) calipermatch(income_percentile) caliperwidth(-5)"' /// == 125 -*------------------------------------------------------------------------------- +*---------------------------------------------------------------------------- * Invalid data -*------------------------------------------------------------------------------- +*---------------------------------------------------------------------------- * string case variable drop case @@ -189,18 +189,18 @@ rcof `"test_calipmatch, gen(matchgroup) case(case) maxmatches(1) calipermatch(in test_calipmatch in 2/200, gen(matchgroup) case(case) maxmatches(1) calipermatch(income_percentile) caliperwidth(5) keep case income_percentile -*=============================================================================== -* Create dataset: one caliper matching variable, with a perfect match per case -*=============================================================================== +*============================================================================ +* New dataset: one caliper matching variable, with a perfect match per case +*============================================================================ clear set obs 100 gen byte income_percentile = _n expand 2, gen(case) -*------------------------------------------------------------------------------- +*---------------------------------------------------------------------------- * Valid inputs, test performance of matching algorithm -*------------------------------------------------------------------------------- +*---------------------------------------------------------------------------- * perfect match is found for each case, despite other valid matches test_calipmatch, gen(matchgroup) case(case) maxmatches(1) calipermatch(income_percentile) caliperwidth(5) @@ -214,9 +214,9 @@ assert sd == 0 keep case income_percentile -*=============================================================================== -* Create dataset: one caliper matching variable and one exact matching variable -*=============================================================================== +*============================================================================ +* New dataset: one caliper matching variable and one exact matching variable +*============================================================================ clear set obs 200 @@ -224,17 +224,17 @@ gen byte case = (_n<=20) gen byte income_percentile = ceil(runiform() * 100) gen byte sex = round(runiform()) -*------------------------------------------------------------------------------- +*---------------------------------------------------------------------------- * Valid inputs -*------------------------------------------------------------------------------- +*---------------------------------------------------------------------------- test_calipmatch, gen(matchgroup) case(case) maxmatches(1) /// calipermatch(income_percentile) caliperwidth(5) exactmatch(sex) keep case income_percentile sex -*------------------------------------------------------------------------------- +*---------------------------------------------------------------------------- * Invalid syntax -*------------------------------------------------------------------------------- +*---------------------------------------------------------------------------- * exact variable is ambiguous gen byte sex2=round(runiform()) @@ -246,9 +246,9 @@ drop sex2 rcof `"test_calipmatch, gen(matchgroup) case(case) maxmatches(1) calipermatch(income_percentile) caliperwidth(5) exactmatch(nonsense)"' /// == 111 -*------------------------------------------------------------------------------- +*---------------------------------------------------------------------------- * Invalid data -*------------------------------------------------------------------------------- +*---------------------------------------------------------------------------- * float exact matching variable recast float sex @@ -294,9 +294,9 @@ assert mi(matchgroup) keep case income_percentile sex -*=============================================================================== -* Create dataset: many caliper variables and many exact matching variables -*=============================================================================== +*============================================================================ +* New dataset: many caliper variables and many exact matching variables +*============================================================================ clear set obs 50000 @@ -308,17 +308,17 @@ gen byte prov = ceil(runiform()*10) gen byte age = 44 + ceil(runiform()*17) gen byte income_percentile=ceil(runiform() * 100) -*------------------------------------------------------------------------------- +*---------------------------------------------------------------------------- * Valid inputs -*------------------------------------------------------------------------------- +*---------------------------------------------------------------------------- test_calipmatch, gen(matchgroup) case(case) maxmatches(5) /// exactmatch(sex self_emp prov) calipermatch(age income_percentile) caliperwidth(3 5) keep case sex self_emp prov age income_percentile -*------------------------------------------------------------------------------- +*---------------------------------------------------------------------------- * Invalid syntax -*------------------------------------------------------------------------------- +*---------------------------------------------------------------------------- * Not enough caliper widths rcof `"test_calipmatch, gen(matchgroup) case(case) maxmatches(5) exactmatch(sex self_emp prov) calipermatch(age income_percentile) caliperwidth(3)"' /// From 74e6b2a1c1c76e24e77f9d675a104f024890bb5f Mon Sep 17 00:00:00 2001 From: Michael Stepner Date: Tue, 25 Oct 2022 14:11:05 -0400 Subject: [PATCH 45/50] Enable Stata 17 installs (#5) * Upgrade to Stata 17 * Make stata version configurable by environment variable * Install Stata 16 instead * Use Stata 16 for all workflows --- .github/workflows/readme_sthlp.yml | 1 + .github/workflows/stata_tests.yml | 1 + automation/install_stata.sh | 23 ++++++++++++++++------- 3 files changed, 18 insertions(+), 7 deletions(-) diff --git a/.github/workflows/readme_sthlp.yml b/.github/workflows/readme_sthlp.yml index 57c1f57..427da16 100644 --- a/.github/workflows/readme_sthlp.yml +++ b/.github/workflows/readme_sthlp.yml @@ -31,6 +31,7 @@ jobs: run: ./automation/install_stata.sh env: OI_HTTPS_PW: ${{ secrets.OI_HTTPS_PW }} + STATA_VERSION: 16 ####################### # Build diff --git a/.github/workflows/stata_tests.yml b/.github/workflows/stata_tests.yml index 4ac9309..bb14417 100644 --- a/.github/workflows/stata_tests.yml +++ b/.github/workflows/stata_tests.yml @@ -22,6 +22,7 @@ jobs: run: ./automation/install_stata.sh env: OI_HTTPS_PW: ${{ secrets.OI_HTTPS_PW }} + STATA_VERSION: 16 ####################### # Build diff --git a/automation/install_stata.sh b/automation/install_stata.sh index ba41450..d40b6b8 100755 --- a/automation/install_stata.sh +++ b/automation/install_stata.sh @@ -1,12 +1,21 @@ #!/bin/bash +# Define parameters +if [ -z "$STATA_VERSION" ]; then + STATA_VERSION=17 +fi +url_installer=https://d2bx6aas1fcmzl.cloudfront.net/stata_install/Stata${STATA_VERSION}Linux64.tar.gz +url_license=https://d2bx6aas1fcmzl.cloudfront.net/stata_install/stata.lic +download_username=oi +download_password=${OI_HTTPS_PW} + # Install Stata mkdir -p /tmp/statafiles -curl -u oi:${OI_HTTPS_PW} https://d2bx6aas1fcmzl.cloudfront.net/stata16/Stata16Linux64.tar.gz --output /tmp/statafiles/Stata16Linux64.tar.gz +curl -u ${download_username}:${download_password} ${url_installer} --output /tmp/statafiles/Stata${STATA_VERSION}Linux64.tar.gz cd /tmp/statafiles -tar -zxf ./Stata16Linux64.tar.gz -sudo mkdir -p /usr/local/stata16 -cd /usr/local/stata16 +tar -zxf ./Stata${STATA_VERSION}Linux64.tar.gz +sudo mkdir -p /usr/local/stata${STATA_VERSION} +cd /usr/local/stata${STATA_VERSION} # The following command returns 1 even though it's ok set +e @@ -14,8 +23,8 @@ sudo sh -c 'yes | /tmp/statafiles/install' set -e cd /usr/local/bin -sudo ln -s /usr/local/stata16/stata-mp . -sudo ln -s /usr/local/stata16/xstata-mp . -sudo curl -u oi:${OI_HTTPS_PW} https://d2bx6aas1fcmzl.cloudfront.net/stata16/stata.lic --output /usr/local/stata16/stata.lic +sudo ln -s /usr/local/stata${STATA_VERSION}/stata-mp . +sudo ln -s /usr/local/stata${STATA_VERSION}/xstata-mp . +sudo curl -u ${download_username}:${download_password} ${url_license} --output /usr/local/stata${STATA_VERSION}/stata.lic rm -r /tmp/statafiles cd /tmp From cf1bb89c80adac3b2a9eb1989c707f6205579f9a Mon Sep 17 00:00:00 2001 From: Michael Stepner Date: Wed, 26 Oct 2022 10:27:23 -0400 Subject: [PATCH 46/50] GHA: check if version number has incremented, auto-increment dates (#4) * GHA action version upgrades and indentation fix * GHA: check if version number in PR has incremented * Fix missing syntax in GHA * GHA: bugfix paths * Fix nested variable names * Assign value to variable with name containing var Ref: https://stackoverflow.com/questions/13716607/creating-a-string-variable-name-from-the-value-of-another-string * Allow single-digit day in date * WIP: update last-updated date to today * WIP: try removing second match parameter * WIP: bugfix sed * WIP: remove all match parameters from substitution * WIP: sed substitution on specific line * WIP: simplify regular expression * Modify date replacement * WIP debug date and add package date update * Fix calipmatch.pkg name * Fix package date name * Fix package file name * Separate code to update pkg date * Simplify date update in pkg file * Push date updates to repo * Bugfix: push from the PR directory * Update 'last updated' dates * Add pkg file as trigger for version_increment GHA * Bugfix sed syntax -iE was treating 'E' as the suffix for a backup file also, we no longer need -E because we're not using regex extended syntax * Tidy step names Co-authored-by: jethaaly Co-authored-by: OppInsights-Bot --- .github/workflows/readme_sthlp.yml | 6 +- .github/workflows/stata_tests.yml | 2 +- .github/workflows/version_increment.yml | 143 ++++++++++++++++++++++++ calipmatch.ado | 2 +- calipmatch.pkg | 2 +- calipmatch.sthlp | 2 +- 6 files changed, 150 insertions(+), 7 deletions(-) create mode 100644 .github/workflows/version_increment.yml diff --git a/.github/workflows/readme_sthlp.yml b/.github/workflows/readme_sthlp.yml index 427da16..b6a9b59 100644 --- a/.github/workflows/readme_sthlp.yml +++ b/.github/workflows/readme_sthlp.yml @@ -4,7 +4,7 @@ on: pull_request: types: [opened, synchronize, reopened] paths: - - 'calipmatch.sthlp' + - 'calipmatch.sthlp' workflow_dispatch: jobs: @@ -18,11 +18,11 @@ jobs: ####################### - name: Check out code repository (main branch) - uses: actions/checkout@v2 + uses: actions/checkout@v3 if: github.ref == 'refs/heads/main' - name: Check out code repository (PR branch) - uses: actions/checkout@v2 + uses: actions/checkout@v3 with: ref: ${{ github.event.pull_request.head.ref }} if: github.ref != 'refs/heads/main' diff --git a/.github/workflows/stata_tests.yml b/.github/workflows/stata_tests.yml index bb14417..d64f4e0 100644 --- a/.github/workflows/stata_tests.yml +++ b/.github/workflows/stata_tests.yml @@ -16,7 +16,7 @@ jobs: ####################### - name: Check out code repository - uses: actions/checkout@v2 + uses: actions/checkout@v3 - name: Install Stata run: ./automation/install_stata.sh diff --git a/.github/workflows/version_increment.yml b/.github/workflows/version_increment.yml new file mode 100644 index 0000000..c7916ef --- /dev/null +++ b/.github/workflows/version_increment.yml @@ -0,0 +1,143 @@ +name: version_increment + +on: + pull_request: + types: [opened, synchronize, reopened] + branches: + - main + paths: + - 'calipmatch.ado' + - 'calipmatch.sthlp' + - 'calipmatch.pkg' + workflow_dispatch: + +jobs: + version_increment: + runs-on: ubuntu-latest + timeout-minutes: 30 # change max time from default 6hr + + steps: + ####################### + # Configure + ####################### + + - name: Check out repository, PR branch + uses: actions/checkout@v3 + with: + ref: ${{ github.event.pull_request.head.ref }} + path: pr + + - name: Check out repository, main branch + uses: actions/checkout@v3 + with: + ref: main + path: main + + - name: Obtain 'version number' and 'changed date' from ado-file and help-file + run: | + for b in "pr" "main"; do + cd ${b} + echo "${b}_ado_version=$(head -n 1 calipmatch.ado | sed -nE 's/.*version ([0-9]+\.[0-9]+\.[0-9]+).*/\1/p')" >> $GITHUB_ENV + echo "${b}_ado_date=$(head -n 1 calipmatch.ado | sed -nE 's/.*version [0-9]+\.[0-9]+\.[0-9]+ +([0-3]?[0-9][a-z][a-z][a-z][0-9][0-9][0-9][0-9]).*/\1/p')" >> $GITHUB_ENV + + echo "${b}_help_version=$(sed '2q;d' calipmatch.sthlp | sed -nE 's/.*version ([0-9]+\.[0-9]+\.[0-9]+).*/\1/p')" >> $GITHUB_ENV + echo "${b}_help_date=$(sed '2q;d' calipmatch.sthlp | sed -nE 's/.*version [0-9]+\.[0-9]+\.[0-9]+ +([0-3]?[0-9][a-z][a-z][a-z][0-9][0-9][0-9][0-9]).*/\1/p')" >> $GITHUB_ENV + cd .. + done + + ####################### + # Version numbers + ####################### + + - name: Validate ado-file version numbers exist + run: | + if [[ -z ${pr_ado_version} ]]; then + echo "PR branch: version number not detected in calipmatch.ado" + exit 1 + fi + if [[ -z ${main_ado_version} ]]; then + echo "main branch: version number not detected in calipmatch.ado" + exit 1 + fi + + - name: Validate help-file version numbers exist + run: | + if [[ -z ${pr_help_version} ]]; then + echo "PR branch: version number not detected in calipmatch.sthlp" + exit 1 + fi + if [[ -z ${main_help_version} ]]; then + echo "main branch: version number not detected in calipmatch.sthlp" + exit 1 + fi + + - name: Validate ado-file version numbers are incremented + run: | + for b in "pr" "main"; do + v=${b}_ado_version + printf -v "${b}_major" "%s" "$(echo "${!v}" | sed -nE 's/([0-9]+)\.[0-9]+\.[0-9]+/\1/p')" + printf -v "${b}_minor" "%s" "$(echo "${!v}" | sed -nE 's/[0-9]+\.([0-9]+)\.[0-9]+/\1/p')" + printf -v "${b}_patch" "%s" "$(echo "${!v}" | sed -nE 's/[0-9]+\.[0-9]+\.([0-9]+)/\1/p')" + done + if [[ ! $pr_major -gt $main_major ]] && [[ ! $pr_minor -gt $main_minor ]] && [[ ! $pr_minor -gt $main_minor ]]; then + echo "version number not incremented, pr=${pr_ado_version} main=${main_ado_version}" + exit 1 + fi + + - name: Validate help-file version numbers match ado-file version numbers + run: | + if [[ "$pr_ado_version" != "$pr_help_version" ]]; then + echo "version numbers in calipmatch.ado and calipmatch.sthlp do not match, ado=${pr_ado_version} sthlp=${pr_help_version}" + exit 1 + fi + if [[ "$main_ado_version" != "$main_help_version" ]]; then + echo 'error in main branch:' + echo "version numbers in calipmatch.ado and calipmatch.sthlp do not match, ado=${main_ado_version} sthlp=${main_help_version}" + exit 1 + fi + + ####################### + # Date last updated + ####################### + + - name: Update date in ado-file and help-file + run: | + today=$(TZ=America/New_York date +%-d%b%Y | tr A-Z a-z) + if [[ "$pr_ado_date" != "$today" ]]; then + sed -i "1s/${pr_ado_date}/${today}/" calipmatch.ado + fi + if [[ "$pr_help_date" != "$today" ]]; then + sed -i "2s/${pr_help_date}/${today}/" calipmatch.sthlp + fi + working-directory: pr + + - name: Update date in package-file + run: | + if grep -q '^d Distribution-Date: [0-9][0-9][0-9][0-9][0-1][0-9][0-3][0-9]' calipmatch.pkg; then + sed -i "s/^d Distribution-Date: [0-9][0-9][0-9][0-9][0-1][0-9][0-3][0-9]/d Distribution-Date: $(TZ=America/New_York date +%Y%m%d)/" calipmatch.pkg + else + echo "PR branch: Distribution-Date not detected in calipmatch.pkg" + exit 1 + fi + working-directory: pr + + ####################### + # Push + ####################### + + - name: Check if there are changes + run: | + set +e + test -z "$(git status --porcelain)" + echo "FILES_UPDATED=$?" >> $GITHUB_ENV + working-directory: pr + + - name: Push updates to Github + run: | + git config user.name OppInsights-Bot + git config user.email info@opportunityinsights.org + git add calipmatch.ado calipmatch.sthlp calipmatch.pkg + git commit -m "Update 'last updated' dates" + git push + if: env.FILES_UPDATED == 1 + working-directory: pr diff --git a/calipmatch.ado b/calipmatch.ado index 76cc373..39351f6 100644 --- a/calipmatch.ado +++ b/calipmatch.ado @@ -1,4 +1,4 @@ -*! version 1.1.0 10jun2022 Michael Stepner and Allan Garland, software@michaelstepner.com +*! version 1.1.0 26oct2022 Michael Stepner and Allan Garland, software@michaelstepner.com /* CC0 license information: To the extent possible under law, the author has dedicated all copyright and related and neighboring rights diff --git a/calipmatch.pkg b/calipmatch.pkg index efa99ac..ab6447a 100644 --- a/calipmatch.pkg +++ b/calipmatch.pkg @@ -14,7 +14,7 @@ d KW: caliper d d Requires: Stata version 13 d -d Distribution-Date: 20220610 +d Distribution-Date: 20221026 d d Author: Michael Stepner d Support: email software@michaelstepner.com diff --git a/calipmatch.sthlp b/calipmatch.sthlp index ea88bcd..7274064 100644 --- a/calipmatch.sthlp +++ b/calipmatch.sthlp @@ -1,5 +1,5 @@ {smcl} -{* *! version 1.1.0 10jun2022}{...} +{* *! version 1.1.0 26oct2022}{...} {viewerjumpto "Syntax" "calipmatch##syntax"}{...} {viewerjumpto "Description" "calipmatch##description"}{...} {viewerjumpto "Options" "calipmatch##options"}{...} From 99326d658e6fad31690bae12cb55e83c1f4a667a Mon Sep 17 00:00:00 2001 From: jethaaly <113864374+jethaaly@users.noreply.github.com> Date: Wed, 26 Oct 2022 20:21:29 -0400 Subject: [PATCH 47/50] Test distance metric: minimize SSE (#6) * Add test: minimize sum of squares * Fix test: minimize sum of squares * Style/formatting changes Co-authored-by: Michael Stepner --- test_calipmatch.do | 40 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 40 insertions(+) diff --git a/test_calipmatch.do b/test_calipmatch.do index aa0d994..5151395 100644 --- a/test_calipmatch.do +++ b/test_calipmatch.do @@ -328,6 +328,46 @@ rcof `"test_calipmatch, gen(matchgroup) case(case) maxmatches(5) exactmatch(sex rcof `"test_calipmatch, gen(matchgroup) case(case) maxmatches(5) exactmatch(sex self_emp prov) calipermatch(age income_percentile) caliperwidth(3 5 5)"' /// == 123 +*============================================================================ +* New dataset: two caliper matching variables, with different optimal +* matches under different distance metrics +*============================================================================ + +clear +set obs 5 + +gen byte case = 0 +replace case = 1 in 1 + +gen byte income_percentile = 40 +replace income_percentile = 47 in 2 +replace income_percentile = 52 in 3 +replace income_percentile = 41 in 4 +replace income_percentile = 55 in 5 + +gen byte age = 40 +replace age = 47 in 2 +replace age = 55 in 4 + +gen float sse = (income_percentile - income_percentile[1])^2 + (age - age[1])^2 + +list + +*---------------------------------------------------------------------------- +* Valid inputs, test performance of matching algorithm +*---------------------------------------------------------------------------- + +* matches minimize sum of squares +test_calipmatch, gen(matchgroup) case(case) maxmatches(1) /// + calipermatch(income_percentile age) caliperwidth(100 100) + +sum sse if case==0, meanonly +assert cond(_n==2, sse==r(min), sse!=r(min)) // test that obs 2 is global min + +assert matchgroup == 1 in 2 // test that obs 2 is matched +assert matchgroup == . in 3/5 + +keep case income_percentile age *---------------------------------------------------------------------------- From 849d2210a7ba38d2f7b9015c569e52569e22687a Mon Sep 17 00:00:00 2001 From: Michael Stepner Date: Wed, 26 Oct 2022 20:24:44 -0400 Subject: [PATCH 48/50] Tweak README --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 469c535..850c095 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,6 @@ ## Installation -This beta version of **calipmatch** can be installed using: +This development version of **calipmatch** can be installed using: ``` net install calipmatch, from(https://github.com/michaelstepner/calipmatch/raw/develop) @@ -8,7 +8,7 @@ net install calipmatch, from(https://github.com/michaelstepner/calipmatch/raw/de ## Stata help file -This documentation was converted automatically from the Stata help file by running `log html calipmatch.sthlp calipmatch.md` in Stata. +This documentation was converted automatically from the Stata help file by running `log html calipmatch.sthlp calipmatch.html` in Stata. The help file looks best when viewed in Stata using `help calipmatch`. From 5e09772b47799c6f2a4728f059f309513a5efd68 Mon Sep 17 00:00:00 2001 From: Michael Stepner Date: Wed, 26 Oct 2022 21:20:46 -0400 Subject: [PATCH 49/50] GHA: auto update README installation instructions (#7) * GHA: auto update README installation instructions * README: update installation instructions * Bugfix escaping in multiline string * Actually insert README installation instructions * README: update installation instructions Co-authored-by: OppInsights-Bot --- .github/workflows/readme_installation.yml | 86 +++++++++++++++++++++++ .github/workflows/readme_sthlp.yml | 1 + .github/workflows/version_increment.yml | 1 + README.md | 8 ++- 4 files changed, 93 insertions(+), 3 deletions(-) create mode 100644 .github/workflows/readme_installation.yml diff --git a/.github/workflows/readme_installation.yml b/.github/workflows/readme_installation.yml new file mode 100644 index 0000000..bde2fc9 --- /dev/null +++ b/.github/workflows/readme_installation.yml @@ -0,0 +1,86 @@ +name: readme_installation + +on: + pull_request: + types: [opened, synchronize, reopened, closed] + branches: + - main + +jobs: + readme_installation: + runs-on: ubuntu-latest + timeout-minutes: 30 # change max time from default 6hr + + steps: + ####################### + # Configure + ####################### + + - name: Check out code repository (main branch) + uses: actions/checkout@v3 + with: + ref: ${{ github.base_ref }} + if: github.event.pull_request.merged == true + + - name: Check out code repository (head branch) + uses: actions/checkout@v3 + with: + ref: ${{ github.head_ref }} + if: github.event.pull_request.merged != true + + ####################### + # Build + ####################### + + - name: Write installation instructions (main branch) + run: | + cat > readme_installation_instructions.md <<"EOL" + Install **calipmatch** in Stata from the SSC repository: + ``` + ssc install calipmatch + ``` + + Or you can install **calipmatch** in Stata directly from this Github repository: + ``` + net install calipmatch, from(https://github.com/michaelstepner/calipmatch/raw/main) + ``` + EOL + if: github.event.pull_request.merged == true + + - name: Write installation instructions (head branch) + run: | + cat > readme_installation_instructions.md <<"EOL" + This development version of **calipmatch** can be installed using: + ``` + net install calipmatch, from(https://github.com/michaelstepner/calipmatch/raw/${GITHUB_HEAD_REF}) + ``` + EOL + + sed -i "s/\${GITHUB_HEAD_REF}/${GITHUB_HEAD_REF}/" readme_installation_instructions.md + if: github.event.pull_request.merged != true + + - name: Place installation instructions in README.md + run: | + sed -z -i 's/.*/\n\n/' README.md + sed -i '// r readme_installation_instructions.md' README.md + rm readme_installation_instructions.md + + ####################### + # Push + ####################### + + - name: Check if there are changes + run: | + set +e + test -z "$(git status --porcelain)" + echo "README_UPDATED=$?" >> $GITHUB_ENV + + - name: Push updated README to Github + run: | + git config user.name OppInsights-Bot + git config user.email info@opportunityinsights.org + git pull --ff-only + git add README.md + git commit -m "README: update installation instructions" + git push + if: env.README_UPDATED == 1 diff --git a/.github/workflows/readme_sthlp.yml b/.github/workflows/readme_sthlp.yml index b6a9b59..c6366ac 100644 --- a/.github/workflows/readme_sthlp.yml +++ b/.github/workflows/readme_sthlp.yml @@ -60,6 +60,7 @@ jobs: run: | git config user.name OppInsights-Bot git config user.email info@opportunityinsights.org + git pull --ff-only git add README.md git commit -m "README: update embedded calipmatch.sthlp" git push diff --git a/.github/workflows/version_increment.yml b/.github/workflows/version_increment.yml index c7916ef..a95feda 100644 --- a/.github/workflows/version_increment.yml +++ b/.github/workflows/version_increment.yml @@ -136,6 +136,7 @@ jobs: run: | git config user.name OppInsights-Bot git config user.email info@opportunityinsights.org + git pull --ff-only git add calipmatch.ado calipmatch.sthlp calipmatch.pkg git commit -m "Update 'last updated' dates" git push diff --git a/README.md b/README.md index 850c095..e6f7784 100644 --- a/README.md +++ b/README.md @@ -1,16 +1,18 @@ ## Installation + This development version of **calipmatch** can be installed using: - ``` -net install calipmatch, from(https://github.com/michaelstepner/calipmatch/raw/develop) +net install calipmatch, from(https://github.com/michaelstepner/calipmatch/raw/readme_install_instructions) ``` + + ## Stata help file This documentation was converted automatically from the Stata help file by running `log html calipmatch.sthlp calipmatch.html` in Stata. -The help file looks best when viewed in Stata using `help calipmatch`. +The help file can be explored interactively in Stata using `help calipmatch`.

From 6f15e3a4951925ccc8d19c83c992d6c9863aebd3 Mon Sep 17 00:00:00 2001
From: OppInsights-Bot 
Date: Thu, 27 Oct 2022 01:21:02 +0000
Subject: [PATCH 50/50] README: update installation instructions

---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index e6f7784..e08003d 100644
--- a/README.md
+++ b/README.md
@@ -3,7 +3,7 @@
 
 This development version of **calipmatch** can be installed using:
 ```
-net install calipmatch, from(https://github.com/michaelstepner/calipmatch/raw/readme_install_instructions)
+net install calipmatch, from(https://github.com/michaelstepner/calipmatch/raw/develop)
 ```