/// <summary>
/// Parse the outputs of CanvasSNV, and note these variant frequencies in the appropriate segment.
/// </summary>
public static float LoadVariantFrequencies(string variantFrequencyFile, List <CanvasSegment> segments)
{
Console.WriteLine("{0} Load variant frequencies from {1}", DateTime.Now, variantFrequencyFile);
int count = 0;
Dictionary <string, List <CanvasSegment> > segmentsByChromosome = CanvasSegment.GetSegmentsByChromosome(segments);
Dictionary <string, string> alternativeNames = GetChromosomeAlternativeNames(segmentsByChromosome.Keys);
long totalCoverage = 0;
int totalRecords = 0;
using (GzipReader reader = new GzipReader(variantFrequencyFile))
{
while (true)
{
string fileLine = reader.ReadLine();
if (fileLine == null)
{
break;
}
if (fileLine.Length == 0 || fileLine[0] == '#')
{
continue; // Skip headers
}
string[] bits = fileLine.Split('\t');
if (bits.Length < 6)
{
Console.Error.WriteLine("* Bad line in {0}: '{1}'", variantFrequencyFile, fileLine);
continue;
}
string chromosome = bits[0];
if (!segmentsByChromosome.ContainsKey(chromosome))
{
if (alternativeNames.ContainsKey(chromosome))
{
chromosome = alternativeNames[chromosome];
}
else
{
continue;
}
}
int position = int.Parse(bits[1]); // 1-based (from the input VCF to Canvas SNV)
int countRef = int.Parse(bits[4]);
int countAlt = int.Parse(bits[5]);
if (countRef + countAlt < 10)
{
continue;
}
float VF = countAlt / (float)(countRef + countAlt);
// Binary search for the segment this variant hits:
List <CanvasSegment> chrSegments = segmentsByChromosome[chromosome];
int start = 0;
int end = chrSegments.Count - 1;
int mid = (start + end) / 2;
while (start <= end)
{
if (chrSegments[mid].End < position) // CanvasSegment.End is already 1-based
{
start = mid + 1;
mid = (start + end) / 2;
continue;
}
if (chrSegments[mid].Begin + 1 > position) // Convert CanvasSegment.Begin to 1-based by adding 1
{
end = mid - 1;
mid = (start + end) / 2;
continue;
}
chrSegments[mid].VariantFrequencies.Add(VF);
chrSegments[mid].VariantTotalCoverage.Add(countRef + countAlt);
count++;
totalCoverage += (countRef + countAlt); // use only coverage information in segments
totalRecords++;
break;
}
}
}
float meanCoverage = 0;
if (totalRecords > 0)
{
meanCoverage = totalCoverage / Math.Max(1f, totalRecords);
}
Console.WriteLine("{0} Loaded a total of {1} usable variant frequencies", DateTime.Now, count);
return(meanCoverage);
}