bruno
YaBB Newbies
Offline
I love YaBB 1G - SP1!
Posts: 13
|
VERBOSITY_LEVEL = -1;
/* include the topologySearch.bf module ExecuteAFile (HYPHY_BASE_DIRECTORY+"TemplateBatchFiles"+DIRECTORY_SEPARATOR+"TopologySearch2. bf.txt");
include a standard NJ module ExecuteAFile (HYPHY_BASE_DIRECTORY+"TemplateBatchFiles"+DIRECTORY_SEPARATOR+"Utility" +DIRECTORY_SEPARATOR+"NJ.bf");
*/
/* first, read the data file and get a list of taxon names */ SetDialogPrompt ("A nucleotide alignment file:"); DataSet allData = ReadDataFile (PROMPT_FOR_FILE); availableTaxonNames = {}; /* an associative array of the form array[taxon] = taxon # for all taxa which are in the data file */ for (i = 0; i<allData.species; i=i+1) { GetString (ithTaxon,allData,i); availableTaxonNames[ithTaxon&&1] = i+1; /* &&1 capitalizes the string; this is used to make comparisons case insensitive */ } fprintf (stdout, "[PHASE 0] Read a data file with ", allData.species, " taxa and ", allData.sites, " nucleotides.\n"); /* now, read and process the list of clades */ SetDialogPrompt ("A list of taxa in every clade:"); currentClade = 1; taxaByClade = {}; /* assoc. array of the form array[clade#] = a list of taxon names */ fscanf (PROMPT_FOR_FILE, "Lines", taxonList); /* read the file into an array of lines*/ isLastLineEmpty = 0; for (i = 0; i<Columns (taxonList); i=i+1) { currentLine = taxonList[i]&&1; if (Abs(currentLine) == 0 ) /* empty line */ { if (isLastLineEmpty == 0) { currentClade = currentClade + 1; isLastLineEmpty = 1; } } else { isLastLineEmpty = 0; if (availableTaxonNames[currentLine] > 0) /* valid taxon name */ { if (Abs(taxaByClade[currentClade])==0) { (taxaByClade)[currentClade] = {}; } ((taxaByClade)[currentClade])[Abs((taxaByClade)[currentClade])] = currentLine; } } } cladesDefined = Abs(taxaByClade); fprintf (stdout, "[PHASE 1] Defined ",cladesDefined , " clades"); taxaInClade = {cladesDefined,1}; /* how many taxa in each clade */ for (i=1; i<=cladesDefined; i=i+1) { fprintf (stdout, "\n\tClade ", i); taxaInClade[i-1] = Abs(taxaByClade[i]); for (j=0; j<Abs(taxaByClade[i]); j=j+1) { fprintf (stdout, "\n\t\t", (taxaByClade[i])[j]); } } fprintf (stdout, "\n"); /* select a model to use */ DataSetFilter filteredData = CreateFilter (allData,1); SelectTemplateModel (filteredData); /* invoke a user-driven selection of substitution model */ fprintf (stdout, "[PHASE 2] Fitting global model parameters using a global tree.\n"); /* see if there is a tree for the entire alignment to infer model parameters from; if not use neighbor joining to get one */ if (Abs(DATAFILE_TREE)) /* have tree */ { Tree globalTree = DATAFILE_TREE; fprintf (stdout, "\tInference from a user (in-file) tree\n"); } else { njTreeString = InferTreeTopology(0); Tree globalTree = njTreeString; fprintf (stdout, "\tInference from a NJ tree\n"); } LikelihoodFunction global_lf = (filteredData,globalTree); Optimize (global_res, global_lf); fprintf (stdout, "\tLogL = ", global_res[1][0],"\n"); /* load more utility functions */ ExecuteAFile (HYPHY_BASE_DIRECTORY+"TemplateBatchFiles"+DIRECTORY_SEPARATOR+"Utility" +DIRECTORY_SEPARATOR+"GrabBag.bf"); /* constrain all global model parameters (e.g. trans/transv ratio, gamma shape parameter etc) based on the global alignment */ fixGlobalParameters ("global_lf"); /* prompt for the number of replicates and where to write the results*/ fprintf (stdout, "How many samples should be drawn:?"); fscanf (stdin,"Number",sampleCount); SetDialogPrompt ("Write the results to:"); fprintf (PROMPT_FOR_FILE, CLEAR_FILE, KEEP_OPEN,"Iterate\tTaxa\tStarLog\tNJ Log\tDeltaDegFreedom\tp-value\tNJ Tree"); baseOutPath = LAST_FILE_PATH; /* loop over the replicates */ for (iterate = 0; iterate < sampleCount; iterate = iterate + 1) { selectedTaxa = {}; /* which taxa fall in this iterate; an assoc. array of the form array[taxon INDEX] = 1 for chosen taxa*/ selectedByClade = {}; for (clade = 0; clade < cladesDefined; clade = clade + 1) { taxonName = (taxaByClade[clade+1])[Random (0,taxaInClade[clade])$1]; selectedTaxa [availableTaxonNames[taxonName]-1] = 1; selectedByClade[clade] = taxonName; } DataSetFilter filteredData = CreateFilter (allData,1,"",selectedTaxa[speciesIndex]>0); fprintf (baseOutPath, "\n", iterate+1,"\t"); starTree = "("; for (i=0; i<cladesDefined; i=i+1) { if (i>0) { fprintf (baseOutPath, ","); starTree = starTree + "," + selectedByClade[i]; } else { starTree = starTree + selectedByClade[i]; } fprintf (baseOutPath, selectedByClade[i]); } starTree = starTree + ")"; /* make a star tree */ Tree star = starTree; LikelihoodFunction smallLF = (filteredData, star); Optimize (resStar, smallLF); /* ################################# BEGGINING OF TOPOLOGYSEARCH.bf ##################### */ /* ____________________________________________*/
function TreeMatrix2TreeString (levelIndex) { treeString = ""; p = 0; k = 0; m = treeNodes[0][levelIndex+1]; n = treeNodes[0][levelIndex];
while (m) { if (m>p) { if (p) { treeString = treeString+","; } for (j=p;j<m;j=j+1) { treeString = treeString+"("; } } else { if (m<p) { for (j=m;j<p;j=j+1) { treeString = treeString+")"; } } else { treeString = treeString+","; } } if (n<cladesDefined) { /* Next line was also changed */ GetString (nodeName, filteredData, n); treeString = treeString+nodeName; } k=k+1; p=m; n=treeNodes[k][levelIndex]; m=treeNodes[k][levelIndex+1]; }
for (j=m;j<p;j=j+1) { treeString = treeString+")"; } return treeString; }
/* ____________________________________________*/
function _PrepareForTreeSearch (treesToBeSearched) { bestTreesStash = {10,2}; globalTreeCounter = 0; treeStatistics = {treesToBeSearched, 1}; for (ii=0; ii<10; ii=ii+1) { bestTreesStash [ii][1] = -1e100; bestTreesStash [ii][0] = ""; } return 1; }
/* ____________________________________________*/
function _AddTreeToResults (currentTreeString, currentLFValue) { treeStatistics [globalTreeCounter][0] = currentLFValue; globalTreeCounter = globalTreeCounter+1; for (ii = 0; ii<10; ii=ii+1) { if (currentLFValue>bestTreesStash[ii][1]) { break; } } if (ii<10) { for (ii2 = 8; ii2>=ii; ii2=ii2-1) { bestTreesStash [ii2+1][1] = bestTreesStash[ii2][1]; bestTreesStash [ii2+1][0] = bestTreesStash[ii2][0]; } bestTreesStash [ii][0] = currentTreeString; bestTreesStash [ii][1] = currentLFValue; } return 1; }
/*---------------------------------------------------------------*/
function ReceiveJobs (sendOrNot) { if (MPI_NODE_COUNT>1) { MPIReceive (-1, fromNode, result_String); saveTree = MPINodeTree[fromNode-1]; saveIdx = MPINodeInfo[fromNode-1][1]; if (sendOrNot) { MPISend (fromNode,current_MPIJOB); MPINodeTree[fromNode-1] = thisTree; MPINodeInfo[fromNode-1][1] = treeCounter; } else { MPINodeInfo[fromNode-1][0] = 0; MPINodeInfo[fromNode-1][1] = -1; MPINodeTree[fromNode-1] = ""; } thisTree = saveTree; ExecuteCommands ("currentLL = " + result_String + ";"); saveCounter = treeCounter; treeCounter = saveIdx; } else { currentLL = res[1][0]; } dummy = _AddTreeToResults (thisTree, currentLL); if (currentLL>bestValue) { bestValue = currentLL; bestTree = thisTree; } fprintf (stdout, " ==> logLhd = ", currentLL); if (MPI_NODE_COUNT>1) { treeCounter = saveCounter; } return fromNode-1; }
/* ____________________________________________*/
function _ReportTreeStatistics (currentLFValue) { ii = 0; fprintf (stdout, "\n\n**************************\n", "* TREE REPORT *\n", "**************************\n\n"); fprintf (stdout, "\n#### BEST TREES #####\n\n"); for (ii=0; ii<10; ii = ii+1) { if (bestTreesStash[ii][1]==(-1e100)) { break; } fprintf (stdout, ii+1, ")."); if (ii>0) { fprintf (stdout, " Worse by: ", bestTreesStash[ii][1]-currentLFValue); } fprintf (stdout,"\n", bestTreesStash[ii][0], "\nLog-likelihood = ", bestTreesStash[ii][1], "\n\n"); } fprintf (stdout, "\n#### STATISTICS #####\n\n"); bestTreesStash [0][0] = 0.1; bestTreesStash [1][0] = 0.5; bestTreesStash [2][0] = 1; bestTreesStash [3][0] = 5; bestTreesStash [4][0] = 10; bestTreesStash [5][0] = 50; bestTreesStash [6][0] = 100; bestTreesStash [7][0] = 1000; bestTreesStash [8][0] = 10000; bestTreesStash [9][0] = 1e100;
bestTreesStash [0][1] = 0; bestTreesStash [1][1] = 0; bestTreesStash [2][1] = 0; bestTreesStash [3][1] = 0; bestTreesStash [4][1] = 0; bestTreesStash [5][1] = 0; bestTreesStash [6][1] = 0; bestTreesStash [7][1] = 0; bestTreesStash [8][1] = 0; bestTreesStash [9][1] = 0; probabilityOfTheData = 0; for (i=0; i<globalTreeCounter; i=i+1) { diff = currentLFValue-treeStatistics[i]; j = 0; while (diff>bestTreesStash[j][0]) { j=j+1; } bestTreesStash [j][1] = bestTreesStash [j][1] + 1; probabilityOfTheData = probabilityOfTheData+Exp(-diff); } bestTreesStash [0][1] = bestTreesStash [0][1]-1; ii = "+---------------+---------------+---------------+---------------+\n"; fprintf (stdout, "\n\n", ii, "| From Best + | To Best + | Tree Count | % of total |\n", ii); for (i=0; i<10; i=i+1) { if (i) { fprintf (stdout, "| " , Format (bestTreesStash [i-1][0],13,1)); } else { fprintf (stdout, "| 0"); } if (i<9) { fprintf (stdout, " | " , Format (bestTreesStash [i][0],13,1)); } else { fprintf (stdout, " | Infinity"); } fprintf (stdout, " | ", Format (bestTreesStash [i][1],13,0), " | ", Format (100*bestTreesStash [i][1]/globalTreeCounter,13,8), " |\n",ii); } fprintf (stdout, "\n\nPosterior probability of the best tree (with uninformative prior) = ",1./probabilityOfTheData,"\n\n"); fprintf (stdout, "\n\n***********Save full tree statistics to a file (y/n)?");
fscanf (stdin, "String", resp);
if ((resp!="n")&&(resp!="N")) { SetDialogPrompt ("Write tree stats string to:"); fprintf (PROMPT_FOR_FILE,CLEAR_FILE,treeStatistics); } treeStatistics = 0;
return 1; }
/* ##################################################################### */
MESSAGE_LOGGING = 0; VERBOSITY_LEVEL = -1; /* SetDialogPrompt ("Please choose a nucleotide or amino-acid data file:");
DataSet ds = ReadDataFile (PROMPT_FOR_FILE); DataSetFilter filteredData = CreateFilter (ds,1,"","");
SelectTemplateModel(filteredData); */
if (MPI_NODE_COUNT > 1) { mpiJobPrefix = ""; mpiJobPrefix * 128; mpiJobPrefix * ("DataSet ds = ReadDataFile (\""+LAST_FILE_PATH+"\");DataSetFilter filteredData = CreateFilter (ds,1);"); Export (modelExp, USE_LAST_MODEL); mpiJobPrefix * modelExp; mpiJobPrefix * 0; mpiJobSuffix = "LikelihoodFunction lf = (filteredData,givenTree);Optimize(res,lf);return res[1][0];"; }
treeNodes = {2*(cladesDefined+1),2*(cladesDefined-2)}; cladesInfo = {cladesDefined,2*(cladesDefined-2)};
branchIndex = {cladesDefined-3,1}; currentLevel = 0;
done = false;
i = 2*cladesDefined-5; j = 1; while (i>1) { j = j*i; i = i-2; }
dummy = _PrepareForTreeSearch (j);
treeNodes[0][0]=0; treeNodes[0][1]=1; treeNodes[1][0]=1; treeNodes[1][1]=1; treeNodes[2][0]=2; treeNodes[2][1]=1; treeNodes[3][0]=cladesDefined; treeNodes[3][1]=0; cladesInfo[0][0]=0; cladesInfo[0][1]=4;
bestTree =""; bestValue=-1e20;
done = 0; treeCounter = 0;
if (MPI_NODE_COUNT>1) { MPINodeInfo = {MPI_NODE_COUNT-1,2}; MPINodeTree = {MPI_NODE_COUNT-1,1}; MPINodeTree[0] = ""; }
while (!done) { if (branchIndex[currentLevel]<2*currentLevel+3) { i = 0; shift = 0; j = 2*currentLevel; k = j+2; m = j+1; while (treeNodes[i][m]) { /*copy tree from prev level to this level */ if (i==branchIndex[currentLevel]) /*insert new branch*/ { shift = 2; if (treeNodes[i][j]<cladesDefined) /* simple branch */ { treeNodes[i][k]=treeNodes[i][j]; treeNodes[i][k+1]=treeNodes[i][m]+1; treeNodes[i+1][k]=currentLevel+3; treeNodes[i+1][k+1]=treeNodes[i][m]+1; treeNodes[i+2][k]=currentLevel+cladesDefined+1; treeNodes[i+2][k+1]=treeNodes[i][m]; cladesInfo[currentLevel+1][k] = i; cladesInfo[currentLevel+1][k+1] = 3; } else { /* update node depths for the entire clade now*/ l = treeNodes[i][j]-cladesDefined; s = cladesInfo[l][j]; for (p=s+cladesInfo[l][m]-1; p>=s; p=p-1) { treeNodes[i][k]=treeNodes[i][j]; treeNodes[i][k+1]=treeNodes[i][m]+1; i=i-1; } i=i+cladesInfo[l][m]; /* new clade record */ cladesInfo[currentLevel+1][k] = cladesInfo[l][j]; cladesInfo[currentLevel+1][k+1] = cladesInfo[l][m]+2; /* now we need to insert two more nodes */ treeNodes[i+1][k]=currentLevel+3; treeNodes[i+1][k+1]=treeNodes[i][m]+1; treeNodes[i+2][k]=currentLevel+cladesDefined+1; treeNodes[i+2][k+1]=treeNodes[i][m]; } for (p=0; p<=currentLevel; p=p+1) { if (cladesInfo[p][j]>i) { cladesInfo[p][k] = cladesInfo[p][j]+2; } else { cladesInfo[p][k] = cladesInfo[p][j]; } if ((cladesInfo[p][j]<=i)&&((cladesInfo[p][j]+cladesInfo[p][m])>i+1)) { cladesInfo[p][k+1] = cladesInfo[p][m]+2; } else { cladesInfo[p][k+1] = cladesInfo[p][m]; } } } else { treeNodes[i+shift][k]=treeNodes[i][j]; treeNodes[i+shift][k+1]=treeNodes[i][m]; } i = i+1; } treeNodes[i+2][k]=treeNodes[i][j]; treeNodes[i+2][k+1]=treeNodes[i][j+1]; if (currentLevel<cladesDefined-4) { currentLevel = currentLevel+1; } else { thisTree = TreeMatrix2TreeString (2*(currentLevel+1)); branchIndex[currentLevel]=branchIndex[currentLevel]+1; fprintf (stdout, "\nTree#",Format(treeCounter,0,0)," ", thisTree); current_MPIJOB = mpiJobPrefix + "Tree givenTree = " + thisTree + ";" + mpiJobSuffix; if (MPI_NODE_COUNT>1) { for (mpiNode = 0; mpiNode < MPI_NODE_COUNT-1; mpiNode = mpiNode+1) { if (MPINodeInfo[mpiNode][0]==0) { break; } } if (mpiNode==MPI_NODE_COUNT-1) /* all nodes busy */ { mpiNode = ReceiveJobs (1); } else { MPINodeInfo[mpiNode][0] = 1; MPINodeInfo[mpiNode][1] = treeCounter; MPINodeTree[mpiNode] = thisTree; MPISend (mpiNode+1,current_MPIJOB); } } else { Tree treeVar = thisTree; LikelihoodFunction lf = (filteredData, treeVar); Optimize (res, lf); ReceiveJobs (0); } treeCounter = treeCounter+1; } } else { branchIndex[currentLevel]=0; if (currentLevel==0) { done = 1; } else { currentLevel = currentLevel-1; branchIndex[currentLevel]=branchIndex[currentLevel]+1; } } }
if (MPI_NODE_COUNT>1) { while (1) { for (nodeCounter = 0; nodeCounter < MPI_NODE_COUNT-1; nodeCounter = nodeCounter+1) { if (MPINodeInfo[nodeCounter][0]==1) { fromNode = ReceiveJobs (0); break; } } if (nodeCounter == MPI_NODE_COUNT-1) { break; } } }
VERBOSITY_LEVEL = 0;
Tree es_tree = bestTree; /* fprintf (stdout,"\n\n --------------------- RESULTS --------------------- \n\n"); fprintf (stdout,"\n\n BestTree =", bestTree);
dummy = _ReportTreeStatistics (bestValue);
LikelihoodFunction lf = (filteredData, tr); Optimize (res,lf); fprintf (stdout, "\n",lf, "\n\n***********Save this tree to a file (y/n)?");
fscanf (stdin, "String", resp);
if ((resp!="n")&&(resp!="N")) { SetDialogPrompt ("Write tree string to:"); fprintf (PROMPT_FOR_FILE,CLEAR_FILE,bestTree,";"); }
*/
/* ####################################### END OF TOPOLOGYSEARCH.bf ###################### */
/* Optimize the likfunc with the best tree found through exaustive search */ LikelihoodFunction small_ES = (filteredData, es_tree); Optimize (res_ES, small_ES); /* compute LRT p-value and write the results */ DF = res_ES[1][1]-resStar[1][1]; pv = 1-CChi2(2(res_ES[1][0]-resStar[1][0]),DF); fprintf (baseOutPath,"\t",resStar[1][0],"\t",res_ES[1][0],"\t",DF,"\t",pv,"\t",Format(es_tree,0,1)); fprintf (stdout, "[ITERATE ", iterate + 1, "/",sampleCount," p = ", pv, "]\n");
if (pv<0.05){ counter_pv = counter_pv+1; }
/* compute AIC and write results */
AICstar = 2*resStar[1][1] - 2*resStar[1][0]; AICes = 2*res_ES[1][1] - 2*res_ES[1][0]; if (AICes<AICstar){ counter_aic = counter_aic+1; } fprintf (stdout, "AIC ", " : ", "Star Tree", " = ", AICstar, "\t", "ES Tree", " = ", AICes, "\n"); fprintf (baseOutPath,"\tAIC results(df):\tStarTree(", resStar[1][1], "):\t", AICstar, "\tExaustiveTree(", res_ES[1][1], "):\t", AICes);
} fprintf (stdout, "Proportion of significant p-values: ", counter_pv, "/", sampleCount, "\n"); fprintf (stdout, "Proportion of AIC_ES < AIC_Star: ", counter_aic, "/", sampleCount, "\n"); fprintf (baseOutPath, "Proportion of significant p-values: ", counter_pv, "/", sampleCount, "\n"); fprintf (baseOutPath, "Proportion of AIC_ES < AIC_Star: ", counter_aic, "/", sampleCount, "\n");
/* close file */ fprintf (baseOutPath, CLOSE_FILE);
|