protected void LoadVariants(string vcfPath)
{
Console.WriteLine("{0} Loading variants of interest from {1}", DateTime.Now, vcfPath);
this.Variants = new List<VcfVariant>();
int overallCount = 0;
int countThisChromosome = 0;
using (VcfReader reader = new VcfReader(vcfPath, requireGenotypes: false))
{
VcfVariant variant = new VcfVariant();
while (true)
{
bool result = reader.GetNextVariant(out variant);
if (!result) break;
overallCount++;
if (variant.ReferenceName != this.Chromosome)
{
// Shortcut: If we've seen records for the desired chromosome, then as soon as we hit another chromosome,
// we can abort:
if (countThisChromosome > 0) break;
continue;
}
countThisChromosome++;
// Single-allele SNVs only:
if (variant.VariantAlleles.Length != 1 || variant.VariantAlleles[0].Length != 1 || variant.ReferenceAllele.Length != 1) continue;
// PF variants only:
if ((variant.GenotypeColumns != null && variant.GenotypeColumns.Any()) && variant.Filters != "PASS") continue; // FILTER may not say PASS for a dbSNP VCF file
if (variant.GenotypeColumns != null && variant.GenotypeColumns.Any()) // not available if we use a dbSNP VCF file
{
if (!variant.GenotypeColumns[0].ContainsKey("GT")) continue; // no genotype - we don't know if it's a het SNV.
string genotype = variant.GenotypeColumns[0]["GT"];
if (genotype != "0/1" && genotype != "1/0") continue;
// Also require they have a high enough quality score:
if (variant.GenotypeColumns[0].ContainsKey("GQX")) // Note: Allow no GQX field, in case we want to use another caller (e.g. Pisces) and not crash
{
float GQX = float.Parse(variant.GenotypeColumns[0]["GQX"]);
if (GQX < 30) continue;
}
}
// Note: Let's NOT require the variant be in dbSNP. Maybe we didn't do annotation, either because
// we chose not to or because we're on a reference without annotation available.
//if (variant.Identifier == ".") continue;
// Remember all the variants that pass all our tests:
this.Variants.Add(variant);
variant = new VcfVariant();
}
}
Console.WriteLine("Retained {0} variants, out of {1} records for {2}", this.Variants.Count, countThisChromosome, this.Chromosome);
}