Chapter 5. ghimport.sas

Table of Contents
The macro itself
A sample SAS Software script with ghimport.sas

ghimport.sas is used to import an IBD file from Genehunter 2 and phenotypic data into a SAS Software dataset.

The macro itself

The best documentation for using ghimport.sas are the comments at the top of the macro.

	  /*******************************************************************
%ghimport 0.09

Copyright (c) 2000 Jeff Lessem, Institute for Behavioral Genetics,
    University of Colorado; and Stacey Cherny, Wellcome Trust Centre
    for Human Genetics, University of Oxford

See http://ibgwww.Colorado.EDU/~lessem/software/qms2.html#license for
the terms under which this software may be used.

Import a GENEHUNTER ibd file into a SAS Software dataset.

Usage: 
       ibdfile=	ibd file to analyze.  This is the file that is output
		by "pre-sas.sh".  REQUIRED

       phenfile= The file used to read phenotype scores from.  REQUIRED

       markers= The number of loci at which you have markers.  This is
                needed so that when reading in the phenotype file the
                macro knows how many columns to skip before reading
                the phenotype values.  REQUIRED
    
       missing= The number used to represent missing phenotype
		values.  This will be converted to the SAS Software
		standard "." notation.  This will default to "." if
		not specified.  OPTIONAL, DEFAULT="."

       asis=  Set to 1 to use the data exactly as read in, no
               selection on the affection status or double entry is
               performed.  This assumes that the first sib is the
               proband.  This may produce a datafile which is
               statistically inapropriate for some analysis.  0
               selects probands based on the affection status code and
               performs double entry when there are multiple probands
               in a family.  The default of reading the data in with
               selection and double entry is that the QMS2 macro was
               primarily written to perform the DF model, so it is
               setup by default to be correct for a DF analysis.
               OPTIONAL, DEFAULT=0

       outdata= The dataset to save the imported data into.  This
		defaults to "data.ibdphen" (the temporary dataset
		named "ibdphen").  OPTIONAL, DEFAULT="ibdphen"

For example to call this from within a SAS Software script the
following commands would work:

%ghimport(ibdfile=dump-sas.ibd,
	   phenfile=pheno.dat,
           markers=10,
	   missing=99.000,
           asis=0,
	   outdata=mydata.wave1);

or

%ghimport(ibdfile=dump-sas.ibd,
	   phenfile=pheno.dat,
           markers=10);

*********************************************************************/

%macro ghimport(
    ibdfile=macro-error,
    phenfile=macro-error,
    markers=macro-error,
    missing=.,
    asis=0,
    outdata=ibdphen);

data ibd; /* the dataset ibd will contain the ibd probabilities */
    infile "&ibdfile";

/* The columns are assigned to the following variables, where pos is
    the position on the chromosome, pedigree is the ID number of the
    family, indnum1 is the ID number of the first family member of the
    pairing and indnum2 is the ID number of the second member of the
    pairing, ibd0 ibd1 ibd2 represent the probability of the
    pairing having ibd 0, .5 or 1 at the pos(ition).  This routine
    could be modified to import ibd files from other programs than
    GENEHUNTER by changing the order of the variables read and adding
    or removing variables as necessary.
    */
   input pos pedigree indnum1 indnum2 prior0 prior1 prior2 ibd0 ibd1 ibd2 ;
run;

data phen; /* the dataset phen will contain the phenotype scores */
    infile "&phenfile";
    /* The phenotypes are read from a GENEHUNTER input file.  The
        variable "proband" designates the affection status with
        0=unknown 1=unaffected 2=affected.  For the purposes of this
        macro 0,1=not a proband; 2=proband */
    input pedigree indnum1 mothnum fathnum sex1 proband1
        x1-x%eval(&markers*2) phen1;
    drop x1-x%eval(&markers*2) mothnum fathnum;
    if phen1 ne &missing;
run;

/* sort the datasets in preparation to merge them */
proc sort data=ibd;
    by pedigree indnum1;
proc sort data=phen;
    by pedigree indnum1;

/* merge the phenotype of the first individual into the ibd file */
data ibdphen;
    merge ibd phen ;
    by pedigree indnum1;
run;

/* rename the variables in the pheno dataset to be set for individual
    2 */
proc datasets library=work;
    modify phen;
    rename sex1=sex2 phen1=phen2 indnum1=indnum2 proband1=proband2;
run;

/* sort ibdphen by indnum2 for next merge */

proc sort data=ibdphen;
    by pedigree indnum2;

data ibdphen;
    merge ibdphen phen;
    by pedigree indnum2;
/* drop the parents and lines with missing individuals */
/* no assumption of the parents id number, use the prior probability
    score */
        if indnum1 ne . and indnum2 ne . and
        (prior0 eq .25 and prior0 ne .) and
        (prior1 eq .5 and prior1 ne .) and
        (prior2 eq .25 and prior2 ne .);
    df=1; /* all forward pairings are an independent observation */
    dbl=0; /* is this the original or the double entered version? */
        drop prior0 prior1 prior2;
run;

/* Double enter the data */
data backward;
    set ibdphen;
    /* save into dummy variables */
    indnumx=indnum1;
    sexx=sex1;
    phenx=phen1;
    probandx=proband1;
    /* move 1 to 2 */
    indnum1=indnum2;
    sex1=sex2;
    phen1=phen2;
    proband1=proband2;
    /* move dummies back to 1 */
    indnum2=indnumx;
    sex2=sexx;
    phen2=phenx;
    proband2=probandx;
    /* drop dummy variables */
    drop indnumx sexx phenx probandx;
    /* double entered cases do not add new information */
    df=0;
    /* set double entered to true */
    dbl=1;
run;

data ibdphen;
/* Combine the single and the double entered datasets */
    set ibdphen backward;
/* compute pihat from the ibd probabilities */
    pihat = .5 * ibd1 + ibd2 ;
run;

proc sort data=ibdphen;
    by pos;

/* This part handles selection */
%if &asis=0 %then %do; /* do selection */
        data &outdata;
           set ibdphen;
/* This removes non-proband predictor phenotypes */
               if proband1 eq 2;
/* If the first instance of a pairing has been dropped then add back
    a degree of freedom */
                   if proband1 eq 2 and (proband2 eq 0 or proband2 eq
                       1) and df eq 0 then df=1;
run;
%end; /* selection */
    %else %do; /* No selection */
        data &outdata;
            set ibdphen;
            if dbl ne 1;
run;
%end; /* No selection */

%mend ghimport; /* ghimport */