Sergei
|
Dear Graham,
This example code revealed a bug in how the HyPhy core handles model mixtures when one of the components had category variables, and the other did not.
I greatly appreciate your bringing this to my attention. The problem has now been fixed (as of Aug 9th 2006) builds, and the slightly modified example code from your example (pasted here), now gives very similar results with two parameterizations. I realized that my earlier example was not scaling the mean of the mixture to be one correctly, and I also adjusted the number of rate categories to be the same in the gamma component in both cases.
[code] DataSet nucleotideSequences = ReadDataFile ("../data/p51.nex"); DataSetFilter filteredData = CreateFilter ( nucleotideSequences, 1 ); /* Get observed nucleotide frequencies */ HarvestFrequencies ( observedFreqs, filteredData, 1, 1, 1 ); /******************************************************************** Gamma+Invariant LF using category variable ********************************************************************/ fprintf ( stdout, "Gamma+Invariant using category variable\n\n" ); global alpha = 1.0; alpha:>0.01; alpha:<100; /* shape parameter */ global beta = 1.0; beta:>0.01; beta:<100; /* beta paramter */ global TV_TS; TV_TS:>0; /* transversion/transition rate ratio */ global ncats = 5; /* number of rate categories */ global IP = 1/ncats; IP :< 1-(1e-10); /* weight of invariant category */ beta:=alpha*(1-IP); /* mean of rate dist:=1 => (1-IP)*(alpha/beta)+IP*0 := 1 */ catFreqMatrix = { ncats, 1 }; catFreqMatrix[0] := IP; for (h=1; h<ncats; h=h+1) { catFreqMatrix[h] := (1-IP)/(ncats__-1); } category c = ( ncats, /* number of rates */ catFreqMatrix, /* weights */ MEAN, /* sampling method */ (1-IP)*GammaDist(_x_,alpha,beta)*(_x_>0), /* density */ (1-IP)*CGammaDist(_x_,alpha,beta)*(_x_>0)+IP, /* CDF */ 0, /* left bound */ 1e5, /* right bound */ (1-IP)*CGammaDist(_x_,alpha+1,beta)*alpha/beta*(_x_>0) /* mean cumulative function */ ); HKY85RateMatrixGamma = {{* ,c*t*TV_TS,c*t ,c*t*TV_TS} {c*t*TV_TS,* ,c*t*TV_TS,c*t } {c*t ,c*t*TV_TS,* ,c*t*TV_TS} {c*t*TV_TS,c*t ,c*t*TV_TS,* }}; Model HKY85Gamma = ( HKY85RateMatrixGamma, observedFreqs ); Tree givenTree = DATAFILE_TREE; LikelihoodFunction likeFunc = ( filteredData, givenTree ); Optimize ( paramValues, likeFunc ); fprintf ( stdout, likeFunc, "\n\n" ); /******************************************************************** Gamma+Invariant LF using model mixture ********************************************************************/ fprintf ( stdout, "Gamma+Invariant using model mixture\n\n" ); category c = ( ncats-1, /* number of rates */ EQUAL, /* weights */ MEAN, /* sampling method */ GammaDist(_x_,alpha,beta), /* density */ CGammaDist(_x_,alpha,beta), /* CDF */ 0, /* left bound */ 1e5, /* right bound */ CGammaDist(_x_,alpha+1,beta)*alpha/beta /* mean cumulative function */ ); HKY85RateMatrixGamma = {{* ,c*t*TV_TS,c*t ,c*t*TV_TS} {c*t*TV_TS,* ,c*t*TV_TS,c*t } {c*t ,c*t*TV_TS,* ,c*t*TV_TS} {c*t*TV_TS,c*t ,c*t*TV_TS,* }}; Model HKY85Gamma = ( HKY85RateMatrixGamma, observedFreqs ); Tree givenTree1 = DATAFILE_TREE; RateMatrixInvariant = {{0,0,0,0} {0,0,0,0} {0,0,0,0} {0,0,0,0}}; Model Invariant = ( RateMatrixInvariant, observedFreqs ); Tree givenTree2 = DATAFILE_TREE; LikelihoodFunction likeFunc = ( filteredData, givenTree1, filteredData, givenTree2, "Log(SITE_LIKELIHOOD[0]*(1-IP)+SITE_LIKELIHOOD[1]*IP)" ); Optimize ( paramValues, likeFunc ); fprintf ( stdout, likeFunc, "\n\n" ); [/code]
Output [code] amma+Invariant using category variable
Log Likelihood = -3285.32599393655; Shared Parameters: IP=0.438837 alpha=0.791996 TV_TS=0.0980606 beta=alpha*(1-IP)=0.444439
Tree givenTree=((((D_CD_83_ELI_ACC_K03454:0.0214343,D_CD_83_NDK_ACC_M27323:0.0101549)Node3:0.0121809,D_UG_94_94UG114_ACC_U88824:0.0659841)Node2:0.00275392,D_CD_84_84ZR085_ACC_U88822:0.0326156)Node1:0.0278006,B_US_83_RF_ACC_M17451:0.0301682,((B_FR_83_HXB2_ACC_K03455:0.0124699,B_US_86_JRFL_ACC_U63632:0.0189762)Node10:0.00189701,B_US_90_WEAU160_ACC_U21135:0.0225559)Node9:0.00470909);
Gamma+Invariant using model mixture
Log Likelihood = -3285.32535002373; Shared Parameters: IP=0.440838 alpha=0.799072 TV_TS=0.0980331 beta=alpha*(1-IP)=0.446811
Tree givenTree1=((((D_CD_83_ELI_ACC_K03454:0.0383084,D_CD_83_NDK_ACC_M27323:0.0181054)Node3:0.0218335,D_UG_94_94UG114_ACC_U88824:0.118069)Node2:0.00489725,D_CD_84_84ZR085_ACC_U88822:0.0583811)Node1:0.0497151,B_US_83_RF_ACC_M17451:0.0539374,((B_FR_83_HXB2_ACC_K03455:0.0222191,B_US_86_JRFL_ACC_U63632:0.0340032)Node10:0.00337515,B_US_90_WEAU160_ACC_U21135:0.0403464)Node9:0.0084376); Tree givenTree2=((((D_CD_83_ELI_ACC_K03454:0,D_CD_83_NDK_ACC_M27323:0)Node3:0,D_UG_94_94UG114_ACC_U88824:0)Node2:0,D_CD_84_84ZR085_ACC_U88822:0)Node1:0,B_US_83_RF_ACC_M17451:0,((B_FR_83_HXB2_ACC_K03455:0,B_US_86_JRFL_ACC_U63632:0)Node10:0,B_US_90_WEAU160_ACC_U21135:0)Node9:0); [/code]
Cheers, Sergei
|