private static void GetWeightedAverageBinCount(IEnumerable<string> binnedPaths, string mergedBinnedPath,
NexteraManifest manifest = null)
{
int sampleCount = binnedPaths.Count();
if (sampleCount == 1) // copy file
{
if (File.Exists(binnedPaths.First()))
{
if (File.Exists(mergedBinnedPath)) { File.Delete(mergedBinnedPath); }
File.Copy(binnedPaths.First(), mergedBinnedPath);
}
}
else // merge normal samples
{
double[] weights = new double[sampleCount];
List<double>[] binCountsBySample = new List<double>[sampleCount];
for (int sampleIndex = 0; sampleIndex < sampleCount; sampleIndex++)
{
string binnedPath = binnedPaths.ElementAt(sampleIndex);
var binCounts = new BinCounts(binnedPath, manifest: manifest);
List<double> counts = binCounts.AllCounts;
// If a manifest is available, get the median of bins overlapping the targeted regions only.
// For small panels, there could be a lot of bins with zero count and the median would be 0 if taken over all the bins, resulting in division by zero.
double median = binCounts.OnTargetMedianBinCount;
weights[sampleIndex] = median > 0 ? 1.0 / median : 0;
binCountsBySample[sampleIndex] = counts;
}
double weightSum = weights.Sum();
for (int i = 0; i < sampleCount; i++) { weights[i] /= weightSum; } // so weights sum to 1
// Computed weighted average of bin counts across samples
using (GzipReader reader = new GzipReader(binnedPaths.First()))
using (GzipWriter writer = new GzipWriter(mergedBinnedPath))
{
string line;
string[] toks;
int lineIdx = 0;
while ((line = reader.ReadLine()) != null)
{
toks = line.Split('\t');
double weightedBinCount = 0;
for (int i = 0; i < sampleCount; i++) { weightedBinCount += weights[i] * binCountsBySample[i][lineIdx]; }
toks[3] = String.Format("{0}", weightedBinCount);
writer.WriteLine(String.Join("\t", toks));
lineIdx++;
}
}
}
}