protected void LoadKnownCNVCF(string oracleVCFPath)
{
bool stripChr = false;
// Load our "oracle" of known copy numbers:
this.KnownCN = new Dictionary<string, List<CNInterval>>();
int count = 0;
using (GzipReader reader = new GzipReader(oracleVCFPath))
{
while (true)
{
string fileLine = reader.ReadLine();
if (fileLine == null) break;
if (fileLine.Length == 0 || fileLine[0] == '#') continue;
string[] bits = fileLine.Split('\t');
if (bits.Length == 1 && bits[0].Trim().Length == 0) continue; // skip empty lines!
string chromosome = bits[0];
if (stripChr) chromosome = chromosome.Replace("chr", "");
if (!KnownCN.ContainsKey(chromosome)) KnownCN[chromosome] = new List<CNInterval>();
CNInterval interval = new CNInterval();
interval.Start = int.Parse(bits[1]);
interval.CN = -1;
string[] infoBits = bits[7].Split(';');
foreach (string subBit in infoBits)
{
if (subBit.StartsWith("CN="))
{
float tempCN = float.Parse(subBit.Substring(3));
if (subBit.EndsWith(".5"))
{
interval.CN = (int)Math.Round(tempCN + 0.1); // round X.5 up to X+1
}
else
{
interval.CN = (int)Math.Round(tempCN); // Round off
}
}
if (subBit.StartsWith("END="))
{
interval.End = int.Parse(subBit.Substring(4));
}
}
// Parse CN from Canvas output:
if (bits.Length > 8)
{
string[] subBits = bits[8].Split(':');
string[] subBits2 = bits[9].Split(':');
for (int subBitIndex = 0; subBitIndex < subBits.Length; subBitIndex++)
{
if (subBits[subBitIndex] == "CN")
{
interval.CN = int.Parse(subBits2[subBitIndex]);
}
}
}
if (interval.End == 0 || interval.CN < 0)
{
Console.WriteLine("Error - bogus record!");
Console.WriteLine(fileLine);
}
else
{
KnownCN[chromosome].Add(interval);
count++;
}
}
}
Console.WriteLine(">>>Loaded {0} known-CN intervals", count);
}