| |
Merging PSID Data: SAS Example
Method 1
*---------------------------------------------------------------------*
| |
| This example program demonstrates a relatively simple method for |
| merging PSID data. It uses data from 3 different years, subset- |
| ting criteria, and the compress and tagsort options. |
| |
| When working with PSID data, the amount of available system disk |
| space and memory is often an important consideration. The follow- |
| ing options can be used to enhance system performance and control |
| the amount of disk space and memory used: |
| |
| 1) LENGTH statements specify the number of bytes used for storing |
| variables and can significantly reduce the size of a data set. |
| The precision of a numeric variable is dependent on its length. |
| Length specifications for numeric variables are host-specific. |
| Do not shorten length specifications for numeric variables con- |
| taining fractions. |
| |
| 2) The COMPRESS= data set or system option can decrease the size |
| of a data set and reduce the number of input/output operations. |
| |
| 3) The SORTSIZE= option specifies the maximum amount of memory |
| available to PROC SORT. Sortsize parameters are host-specific. |
| |
| 4) The TAGSORT= option can be used with PROC SORT to reduce the |
| amount of temporary disk space used. When the total length of |
| BY variables is small compared with the record length, temp- |
| orary disk space is reduced significantly, however, processing |
| time can be much higher. |
| |
| 5) The BUFNO= data set or system option specifies the number of |
| page buffers to use when reading from or writing to a SAS data |
| set. Increasing the number of available buffers uses more |
| memory while reducing the number of input/output operations. |
| |
| 6) The BUFSIZE= data set or system option specifies the permanent |
| page size for a SAS data set. Increasing the page size uses |
| more memory while reducing the number of input/output operat- |
| ions. Using a buffer size larger than necessary is inefficent. |
| |
*---------------------------------------------------------------------;
* Read in cross-year individual file and select variables
from 1990-1992 needed for analysis;
LIBNAME PSID "[FOLDER NAME]";
DATA IND90_92(COMPRESS=YES RENAME=(ER30642=ID90 ER30689=ID91
ER30733=ID92));
INFILE "[PATH]\IND2007ER.txt" LRECL = 3118 ;
INPUT
ER30001 2 - 5
ER30002 6 - 8
ER30642 1528 - 1532
ER30643 1533 - 1534
ER30644 1535 - 1536
ER30645 1537 - 1539
ER30653 1555
ER30657 1563 - 1564
ER30659 1566 - 1571
ER30689 1647 - 1650
ER30690 1651 - 1652
ER30691 1653 - 1654
ER30692 1655 - 1657
ER30699 1672
ER30703 1680 - 1681
ER30705 1683 - 1688
ER30707 1690 - 1695
ER30733 1764 - 1767
ER30734 1768 - 1769
ER30735 1770 - 1771
ER30736 1772 - 1774
ER30744 1790
ER30748 1798 - 1799
ER30750 1801 - 1806
ER30752 1808 - 1813
ER30805 1907 - 1913 .3
ER32000 2060
ER32022 2113 - 2114
ER32049 2189
;
LABEL
ER30001="1968 INTERVIEW NUMBER 68"
ER30002="PERSON NUMBER 68"
ER30642="1990 INTERVIEW NUMBER"
ER30643="SEQUENCE NUMBER 90"
ER30644="RELATIONSHIP TO HEAD 90"
ER30645="AGE OF INDIVIDUAL 90"
ER30653="EMPLOYMENT STAT 90"
ER30657="COMPLETED EDUCATION 90"
ER30659="TOT TXBL INCOME 90"
ER30689="1991 INTERVIEW NUMBER"
ER30690="SEQUENCE NUMBER 91"
ER30691="RELATIONSHIP TO HEAD 91"
ER30692="AGE OF INDIVIDUAL 91"
ER30699="EMPLOYMENT STAT 91"
ER30703="COMPLETED EDUCATION 91"
ER30705="TOT LABOR INCOME 91"
ER30707="TOT ASSET INCOME 91"
ER30733="1991 INTERVIEW NUMBER"
ER30734="SEQUENCE NUMBER 92"
ER30735="RELATIONSHIP TO HEAD 92"
ER30736="AGE OF INDIVIDUAL 92"
ER30744="EMPLOYMENT STAT 92"
ER30748="COMPLETED EDUCATION 92"
ER30750="TOT LABOR INCOME 92"
ER30752="TOT ASSET INCOME 92"
ER30805="COMBINED IND WEIGHT 92"
ER32000="SEX OF INDIVIDUAL"
ER32022="# BIRTHS OF THIS IND"
ER32049="LAST KNOWN MARITAL STAT"
;
IF ER30645=99 THEN ER30645=.;
IF ER30657=99 THEN ER30657=.;
IF ER30692=99 THEN ER30692=.;
IF ER30703=99 THEN ER30703=.;
IF ER30736=99 THEN ER30736=.;
IF ER30748=99 THEN ER30748=.;
IF ER32022=98 THEN ER32022=.;
IF ER32049=8 THEN ER32049=.;
* Select those who were ever heads or wives/"wives" between 1990 and
1992;
IF (ER30643 EQ 01 AND ER30644 EQ 10) OR
(ER30643 EQ 02 AND (ER30644 EQ 20 OR ER30644 EQ 22)) OR
(ER30690 EQ 01 AND ER30691 EQ 10) OR
(ER30690 EQ 02 AND (ER30691 EQ 20 OR ER30691 EQ 22)) OR
(ER30734 EQ 01 AND ER30735 EQ 10) OR
(ER30734 EQ 02 AND (ER30735 EQ 20 OR ER30735 EQ 22));
RUN ;
* Read in 1990 family file and select variables needed for analysis;
DATA FAM90(COMPRESS=YES RENAME=(V17702=ID90));
INFILE "[PATH]\FAM1990.txt" LRECL=2312;
INPUT
V17702 4 - 8
V17836 283 - 288
V18262 1167 - 1169
V18564 1633 - 1635
V18814 2018
V18878 2160 - 2165
V18887 2206 - 2212 .2
V18888 2213 - 2219 .2
;
LABEL
V17702="1990 INTERVIEW NUMBER"
V17836="WIFE 89 LABOR/WAGE"
V18262="C9-10 OCC-LAST JOB (H-U)"
V18564="E9-10 OCC-LAST JOB (W-U)"
V18814="M32 RACE OF HEAD (1 MEN)"
V18878="TOTAL HEAD LABOR Y 89"
V18887="HEAD 89 AVG HRLY EARNING"
V18888="WIFE 89 AVG HRLY EARNING";
IF V18262=999 THEN V18262=.;
IF V18564=999 THEN V18564=.;
IF V18814=9 THEN V18814=.;
RUN ;
* Sort fam90 and ind90_92 by id90;
PROC SORT DATA=FAM90 TAGSORT;
BY ID90;
RUN ;
PROC SORT DATA=IND90_92 TAGSORT;
BY ID90;
RUN ;
* Merge fam90 and ind90_92 by id90;
DATA PSID.FAM_IND(COMPRESS=YES);
MERGE FAM90 IND90_92(IN=IND90_92);
BY ID90;
IF IND90_92;
RUN ;
* Read in 1991 family file and select variables needed for analysis;
DATA FAM91(COMPRESS=YES RENAME=(V19002=ID91));
INFILE "[PATH]\FAM1991.txt" LRECL = 2314 ;
INPUT
V19002 2 - 5
V19136 281 - 286
V19562 1165 - 1167
V19864 1631 - 1633
V20114 2016
V20178 2158 - 2163
V20187 2204 - 2210 .2
V20188 2211 - 2217 .2
;
LABEL
V19002="1991 INTERVIEW NUMBER"
V19136="WIFE 90 LABOR/WAGE"
V19562="C9-10 OCC-LAST JOB (H-U)"
V19864="E9-10 OCC-LAST JOB (W-U)"
V20114="L32 RACE OF HEAD (1 MEN)"
V20178="TOTAL HEAD LABOR Y 90"
V20187="HEAD 90 AVG HRLY EARNING"
V20188="WIFE 90 AVG HRLY EARNING";
IF V19562=999 THEN V19562=.;
IF V19864=999 THEN V19864=.;
IF V20114=9 THEN V20114=.;
RUN ;
* Sort fam91 and psid.fam_ind by id91;
PROC SORT DATA=FAM91 TAGSORT;
BY ID91;
RUN ;
PROC SORT DATA=PSID.FAM_IND TAGSORT;
BY ID91;
RUN ;
* Merge fam91 and psid.fam_ind by id91;
DATA PSID.FAM_IND(COMPRESS=YES);
MERGE FAM91 PSID.FAM_IND(IN=FAM_IND);
BY ID91;
IF FAM_IND;
RUN ;
* Read in 1992 family file and select variables needed for analysis;
DATA FAM92(COMPRESS=YES RENAME=(V20302=ID92));
INFILE "[PATH]\FAM1992.txt" LRECL = 2294 ;
INPUT
V20302 4 - 7
V20436 283 - 288
V20862 1174 - 1176
V21164 1640 - 1642
V21420 2031
V21484 2137 - 2142
V21493 2183 - 2189 .2
V21494 2190 - 2196 .2
;
LABEL
V20302="1992 INTERVIEW NUMBER"
V20436="WIFE 91 LABOR/WAGE"
V20862="C9-10 OCC-LAST JOB (H-U)"
V21164="E9-10 OCC-LAST JOB (W-U)"
V21420="M32 RACE OF HEAD (1 MEN)"
V21484="TOTAL HEAD LABOR Y 91"
V21493="HEAD 91 AVG HRLY EARNING"
V21494="WIFE 91 AVG HRLY EARNING";
IF V20862=999 THEN V20862=.;
IF V21164=999 THEN V21164=.;
IF V21420=9 THEN V21420=.;
RUN ;
* Sort fam92 and psid.fam_ind by id92;
PROC SORT DATA=FAM92 TAGSORT;
BY ID92;
RUN ;
PROC SORT DATA=PSID.FAM_IND TAGSORT;
BY ID92;
RUN ;
* Merge fam92 and psid.fam_ind by id92;
DATA PSID.FAM_IND(COMPRESS=YES);
MERGE FAM92 PSID.FAM_IND(IN=FAM_IND);
BY ID92;
IF FAM_IND;
RUN;
|
|