attached. cheers ./w
Code:_Genetic_Code = {
{14,/*AAA*/ 13,/*AAC*/ 14,/*AAG*/ 13,/*AAT*/
7, /*ACA*/ 7, /*ACC*/ 7, /*ACG*/ 7, /*ACT*/
19, /*AGA*/ 5, /*AGC*/19, /*AGG*/ 5, /*AGT*/
2, /*ATA*/ 2, /*ATC*/ 3, /*ATG*/ 2, /*ATT*/
12,/*CAA*/ 11,/*CAC*/ 12,/*CAG*/ 11,/*CAT*/
6, /*CCA*/ 6, /*CCC*/ 6, /*CCG*/ 6, /*CCT*/
19,/*CGA*/ 19,/*CGC*/ 19,/*CGG*/ 19,/*CGT*/
1, /*CTA*/ 1, /*CTG*/ 1, /*CTC*/ 1, /*CTT*/
16,/*GAA*/ 15,/*GAC*/ 16,/*GAG*/ 15,/*GAT*/
8, /*GCA*/ 8, /*GCC*/ 8, /*GCG*/ 8, /*GCT*/
20,/*GGA*/ 20,/*GGC*/ 20,/*GGG*/ 20,/*GGT*/
4, /*GTA*/ 4, /*GTC*/ 4, /*GTG*/ 4, /*GTT*/
10,/*TAA*/ 9, /*TAC*/10,/*TAG*/ 9, /*TAT*/
5, /*TCA*/ 5, /*TCC*/ 5, /*TCG*/ 5, /*TCT*/
10,/*TGA*/ 17,/*TGC*/ 18,/*TGG*/ 17,/*TGT*/
1, /*TTA*/ 0, /*TTC*/ 1, /*TTG*/ 0 /*TTT*/ }
};
GeneticCodeExclusions = "TAA,TAG,TGA";
function CodonFrequencies ( nucFreqMatrix, mSize )
{
PIStop = 1.0;
result = { mSize, 1 };
hshift = 0;
for ( h = 0; h < 64; h = h + 1 ) {
first = h$16;
second = h%16$4;
third = h%4;
if ( _Genetic_Code[ h ] == 10 ) { /* stop codon */
hshift = hshift + 1;
PIStop = PIStop - nucFreqMatrix[first][0]*nucFreqMatrix[second][1]*nucFreqMatrix[third][2];
}
else {
result[ h-hshift ][0] = nucFreqMatrix[first][0]*nucFreqMatrix[second][1]*nucFreqMatrix[third][2];
}
}
return result*( 1.0/PIStop );
}
function PopulateModelMatrix (ModelMatrixName& )
{
ModelMatrixName = {ModelMatrixDimension,ModelMatrixDimension};
hshift = 0;
for (h=0; h<64; h=h+1)
{
if (_Genetic_Code[h]==10)
{
hshift = hshift+1;
continue;
}
vshift = hshift;
for (v = h+1; v<64; v=v+1)
{
diff = v-h;
if (_Genetic_Code[v]==10)
{
vshift = vshift+1;
continue;
}
if ((h$4==v$4)||((diff%4==0)&&(h$16==v$16))||(diff%16==0))
{
if (_Genetic_Code[0][h]==_Genetic_Code[0][v])
{
ModelMatrixName[h-hshift][v-vshift] := t;
ModelMatrixName[v-vshift][h-hshift] := t;
}
else
{
ModelMatrixName[h-hshift][v-vshift] := c*t;
ModelMatrixName[v-vshift][h-hshift] := c*t;
}
}
}
}
return 0;
}
function SetWDistribution (resp)
{
global P1 = 1/3;
global P2 = 1/2;
P1:<1;
P2:<1;
global W_1 = 0.1;
W_1:<1;
global W_2 = 5;
W_2:>1;
categFreqMatrix = {{P1,(1-P1)*P2, (1-P1)*(1-P2)}};
categRateMatrix = {{W_1,1,W_2}};
category c = (3, categFreqMatrix , MEAN, ,categRateMatrix, 0, 1e25);
return 0;
}
function SetWDistributionSims ( resp )
{
global P1 := 1/3;
global P2 := 1/2;
global W_1 := 0.1;
global W_2 := 5;
categFreqMatrix = {{P1,(1-P1)*P2, (1-P1)*(1-P2)}};
categRateMatrix = {{W_1,1,W_2}};
category c = (3, categFreqMatrix , MEAN, ,categRateMatrix, 0, 1e25);
return 0;
}
/*======================================================================================================================*/
LIKELIHOOD_FUNCTION_OUTPUT = 3;
ModelMatrixDimension = 64;
for ( h = 0; h < 64; h = h + 1 ) {
if ( _Genetic_Code[ h ] == 10 ) {
ModelMatrixDimension = ModelMatrixDimension - 1;
}
}
DataSet ds = ReadDataFile ( "30taxa.nex" );
fscanf ( "30taxa.tre", "String", TREE );
DataSetFilter FullData = CreateFilter ( ds, 3, "", "", GeneticCodeExclusions );
HarvestFrequencies ( nucleotidesByPos, FullData, 3, 1, 1 );
vectorOfFrequencies = CodonFrequencies ( nucleotidesByPos, ModelMatrixDimension );
/* Infer parameters from data */
SetWDistribution ( 1 );
modelMatrix = 0;
PopulateModelMatrix ( "modelMatrix" );
Model model = ( modelMatrix, vectorOfFrequencies, 1 );
Tree tree = TREE;
LikelihoodFunction lf = ( FullData, tree );
Optimize ( res, lf );
fprintf ( stdout, "\n*** Optimized parameter values: base data ***\n" );
fprintf ( stdout, "W1 < 1:\t", Format (W_1,0,3), "\tP:\t", Format (P1,0,3), "\n" );
fprintf ( stdout, "W = 1:\t1\tP:\t", Format( (1-P1)*P2, 0, 3), "\n" );
fprintf ( stdout, "W2 > 1:\t", Format (W_2,0,3), "\tP:\t", Format((1-P1)*(1-P2),0,3), "\n" );
fprintf ( stdout, "\n" );
/* Simulate data with category variable constraints */
SetWDistributionSims ( 1 );
modelMatrix = 0;
PopulateModelMatrix ( "modelMatrix" );
Model model = ( modelMatrix, vectorOfFrequencies, 1 );
Tree tree = TREE;
LikelihoodFunction lf = ( FullData, tree );
DataSet simData = SimulateDataSet ( lf, GeneticCodeExclusions, simRates, catVarNames );
fprintf ( stdout, "\n*** Simulated parameter values ***\n" );
fprintf ( stdout, "W1 < 1:\t", Format (W_1,0,3), "\tP:\t", Format (P1,0,3), "\n" );
fprintf ( stdout, "W = 1:\t1\tP:\t", Format( (1-P1)*P2, 0, 3), "\n" );
fprintf ( stdout, "W2 > 1:\t", Format (W_2,0,3), "\tP:\t", Format((1-P1)*(1-P2),0,3), "\n" );
fprintf ( stdout, "\n" );
/* Infer parameters from the simulated data */
DataSetFilter simDataFilter = CreateFilter ( simData, 3, "", "", GeneticCodeExclusions );
HarvestFrequencies ( nucleotidesByPos, simDataFilter, 3, 1, 1 );
vectorOfFrequencies = CodonFrequencies ( nucleotidesByPos, ModelMatrixDimension );
SetWDistribution ( 1 );
modelMatrix = 0;
PopulateModelMatrix ( "modelMatrix" );
Model model = ( modelMatrix, vectorOfFrequencies, 1 );
Tree tree = TREE;
LikelihoodFunction lf = ( FullData, tree );
Optimize ( res, lf );
fprintf ( stdout, "\n*** Optimized parameter values: simulated data ***\n" );
fprintf ( stdout, "W1 < 1:\t", Format (W_1,0,3), "\tP:\t", Format (P1,0,3), "\n" );
fprintf ( stdout, "W = 1:\t1\tP:\t", Format( (1-P1)*P2, 0, 3), "\n" );
fprintf ( stdout, "W2 > 1:\t", Format (W_2,0,3), "\tP:\t", Format((1-P1)*(1-P2),0,3), "\n" );
fprintf ( stdout, "\n" );