Art Poon
|
Hi Francesc, Try this out: Code:/*------------------------------------------------------------------------*/
function ReportSite (siteI, siteM)
{
fullSites[siteI][0] = doneSites[siteM][0]*rateX;
fullSites[siteI][1] = doneSites[siteM][1];
fprintf (stdout, "Site ", Format(siteI+1,4,0),
" Rate = ", Format(fullSites[siteI][0],7,4),
" E[Subs/site] = ", Format(fullSites[siteI][0]*referenceLength,7,4),
" Log(L) ", Format(fullSites[siteI][1],7,4),"\n");
return 0;
}
/*------------------------------------------------------------------------*/
function ReceiveJobs (sendOrNot)
{
MPIReceive (-1, fromNode, result_String);
siteIndex = MPINodeState[fromNode-1][1];
if (sendOrNot)
{
MPISend (fromNode,siteLikelihood);
MPINodeState[fromNode-1][1] = siteCount;
}
else
{
MPINodeState[fromNode-1][0] = 0;
MPINodeState[fromNode-1][1] = -1;
}
siteMap = dupInfo[siteIndex];
ExecuteCommands (result_String);
doneSites[siteMap][0] = siteRate;
doneSites[siteMap][1] = siteLikelihood_MLES[1][0];
ReportSite (siteIndex, siteMap);
return fromNode-1;
}
/*------------------------------------------------------------------------*/
DataSet ds = ReadDataFile (HYPHY_BASE_DIRECTORY + "data" + DIRECTORY_SEPARATOR + "p51.aa");
DataSetFilter filteredData = CreateFilter (ds, 1);
/* load a template model from file without prompting user */
ExecuteAFile (HYPHY_BASE_DIRECTORY + "TemplateBatchFiles" + DIRECTORY_SEPARATOR + "TemplateModels" + DIRECTORY_SEPARATOR + "Dayhoff.mdl");
/* either use tree included with data file or estimate one using NJ, ML, etc. */
ExecuteAFile (HYPHY_BASE_DIRECTORY + "TemplateBatchFiles" + DIRECTORY_SEPARATOR + "queryTree.bf");
global rateX = 1;
ReplicateConstraint ("this1.?.?:=rateX*this2.?.?__",givenTree,givenTree);
/* by default, tree object returned from queryTree.bf is named "givenTree" */
LikelihoodFunction lf = (filteredData, givenTree);
Optimize (resNull, lf);
fprintf (stdout, "\n\nGlobal fit\n",lf);
fprintf (stdout, "\n\nSite by site fits:\n\n");
global siteRate = 1;
/*siteRate:<100;*/ /*It is a arbitrary value?*/
doneSites = {filteredData.unique_sites,2}; /* Getting the different site patterns. Whats is the parameter 2?*/
fullSites = {filteredData.sites,2};
Tree siteTree = treeString;
GetDataInfo (dupInfo, filteredData);
alreadyDone = {};
ReplicateConstraint ("this1.?.?:=siteRate*this2.?.?__",siteTree,givenTree);
UseModel(USE_NO_MODEL);
Tree referenceTree = treeString;
refL = BranchLength(givenTree,-1);
referenceLength = 0;
for (i=0; i< Columns(refL); i=i+1)
{
referenceLength = referenceLength + refL[i];
}
fprintf (stdout, "\nReference tree length (units time): ", referenceLength,
"\nMean rate: ", rateX, " substitutions/site/unit time\n",
"Mean substitutions/site: ", rateX*referenceLength, "\n");
for (siteCount = 0; siteCount < resNull[1][2]; siteCount = siteCount+1)
/*the third is the number of global(shared) parameters that were optimized([1][2])*/
{
GetString (globalVarName,lf,siteCount);
ExecuteCommands (globalVarName+":="+globalVarName+"__;");
}
labels = {{"Rate","Log[L]"}};
if (MPI_NODE_COUNT<=1) /*MPI_NODE_COUNT numberof nodes in MPI which are available*/
{
VERBOSITY_LEVEL = -1;
for (siteCount = 0; siteCount < filteredData.sites; siteCount = siteCount+1)
{
siteMap = dupInfo[siteCount];
if (alreadyDone[siteMap] == 0)
{
filterString = "" + siteCount;
DataSetFilter siteFilter = CreateFilter (ds,1,filterString);
LikelihoodFunction siteLikelihood = (siteFilter, siteTree);
siteRate = 1;
Optimize (site_res, siteLikelihood);
alreadyDone[siteMap] = 1;
siteLengths = BranchLength (siteTree,-1);
doneSites[siteMap][0] = siteRate;
doneSites[siteMap][1] = site_res[1][0];/*Loglikelihood*/
}
ReportSite (siteCount, siteMap);
}
VERBOSITY_LEVEL = 0;
}
else
{
MPINodeState = {MPI_NODE_COUNT-1,2};
for (siteCount = 0; siteCount < filteredData.sites; siteCount = siteCount+1)
{
siteMap = dupInfo[siteCount];
if (alreadyDone[siteMap] == 0)
{
filterString = "" + siteCount;
DataSetFilter siteFilter = CreateFilter (ds,1,filterString);
LikelihoodFunction siteLikelihood = (siteFilter, siteTree);
alreadyDone[siteMap] = 1;
siteRate = 1;
for (mpiNode = 0; mpiNode < MPI_NODE_COUNT-1; mpiNode = mpiNode+1)
{
if (MPINodeState[mpiNode][0]==0)
{
break;
}
}
if (mpiNode==MPI_NODE_COUNT-1)
/* all nodes busy */
{
mpiNode = ReceiveJobs (1);
}
else
{
MPISend (mpiNode+1,siteLikelihood);
MPINodeState[mpiNode][0] = 1;
MPINodeState[mpiNode][1] = siteCount;
}
}
}
while (1)
{
for (nodeCounter = 0; nodeCounter < MPI_NODE_COUNT-1; nodeCounter = nodeCounter+1)
{
if (MPINodeState[nodeCounter][0]==1)
{
fromNode = ReceiveJobs (0);
break;
}
}
if (nodeCounter == MPI_NODE_COUNT-1)
{
break;
}
}
fprintf (stdout, "\n\n");
for (siteCount = 0; siteCount < filteredData.sites; siteCount = siteCount+1)
{
siteMap = dupInfo[siteCount];
ReportSite (siteCount, siteMap);
}
}
nodeCounter = 0;
likelihoodBound = 0;
for (siteCount = 0; siteCount < filteredData.sites; siteCount = siteCount+1)
{
nodeCounter = nodeCounter + fullSites[siteCount][0];
likelihoodBound = likelihoodBound + fullSites[siteCount][1];
}
nodeCounter = nodeCounter/filteredData.sites;
for (siteCount = 0; siteCount < filteredData.sites; siteCount = siteCount+1)
{
fullSites[siteCount][0] = fullSites[siteCount][0]/nodeCounter;
}
/*Creates a windows with the output*/
OpenWindow (CHARTWINDOW,{{"Data Rates"}
{"labels"},
{"fullSites"},
{"Bar Chart"},
{"Index"},
{labels[0]},
{"Site Index"},
{""},
{labels[0]},
{"0"}},
"SCREEN_WIDTH-60;SCREEN_HEIGHT-50;30;50");
fprintf (stdout, "\n\nLikelihood lower bound = ", resNull[1][0]," AIC=", 2*(resNull[1][1]-resNull[1][0]), "\n");
fprintf (stdout, "\n\nApproximate likelihood upper bound = ", likelihoodBound," AIC=", 2*(resNull[1][1]+filteredData.unique_sites-likelihoodBound), "\n");
/*Printing the results*/
SetDialogPrompt ("Save rate results to:");
siteCount = Columns (fullSites);
fprintf (PROMPT_FOR_FILE,CLEAR_FILE,labels[0]);
for (nodeCounter=1; nodeCounter<siteCount; nodeCounter=nodeCounter+1)
{
fprintf (LAST_FILE_PATH,",",labels[nodeCounter]);
}
for (nodeCounter=0; nodeCounter < Rows (fullSites); nodeCounter = nodeCounter+1)
{
fprintf (LAST_FILE_PATH,"\n",fullSites[nodeCounter][0]);
for (mpiNode=1; mpiNode<siteCount; mpiNode=mpiNode+1)
{
fprintf (LAST_FILE_PATH,",",fullSites[nodeCounter][mpiNode]);
}
}
for (siteCount = 0; siteCount < resNull[1][2]; siteCount = siteCount+1)
{
GetString (globalVarName,lf,siteCount);
ExecuteCommands (globalVarName+"="+globalVarName+";");
}
Cheers, - Art.
|