private void Wavelets(bool isGermline, double thresholdLower = 5, double thresholdUpper = 80, int minSize = 10, int verbose = 1)
{
Dictionary<string, int[]> inaByChr = new Dictionary<string, int[]>();
Dictionary<string, double[]> finiteScoresByChr = new Dictionary<string, double[]>();
List<ThreadStart> tasks = new List<ThreadStart>();
foreach (KeyValuePair<string, double[]> scoreByChrKVP in ScoreByChr)
{
tasks.Add(new ThreadStart(() =>
{
string chr = scoreByChrKVP.Key;
int[] ina;
Helper.GetFiniteIndices(scoreByChrKVP.Value, out ina); // not NaN, -Inf, Inf
double[] scores;
if (ina.Length == scoreByChrKVP.Value.Length)
{
scores = scoreByChrKVP.Value;
}
else
{
Helper.ExtractValues<double>(scoreByChrKVP.Value, ina, out scores);
}
lock (finiteScoresByChr)
{
finiteScoresByChr[chr] = scores;
inaByChr[chr] = ina;
}
}));
}
Isas.Shared.Utilities.DoWorkParallelThreads(tasks);
// Quick sanity-check: If we don't have any segments, then return a dummy result.
int n = 0;
foreach (var list in finiteScoresByChr.Values)
{
n += list.Length;
}
if (n == 0)
{
this.SegmentationResults = this.GetDummySegmentationResults();
return;
}
Dictionary<string, Segment[]> segmentByChr = new Dictionary<string, Segment[]>();
// when parallelizing we need an RNG for each chromosome to get deterministic results
Random seedGenerator = new MersenneTwister(0);
Dictionary<string, Random> perChromosomeRandom = new Dictionary<string, Random>();
foreach (string chr in this.ScoreByChr.Keys)
{
perChromosomeRandom[chr] = new MersenneTwister(seedGenerator.NextFullRangeInt32(), true);
}
tasks = new List<ThreadStart>();
foreach (string chr in ScoreByChr.Keys)
{
tasks.Add(new ThreadStart(() =>
{
int[] ina = inaByChr[chr];
List<int> breakpoints = new List<int>();
int sizeScoreByChr = this.ScoreByChr[chr].Length;
if (sizeScoreByChr > minSize)
{
WaveletSegmentation.HaarWavelets(this.ScoreByChr[chr].ToArray(), thresholdLower, thresholdUpper, breakpoints, isGermline);
}
List<int> startBreakpointsPos = new List<int>();
List<int> endBreakpointPos = new List<int>();
List<int> lengthSeg = new List<int>();
if (breakpoints.Count() >= 2 && sizeScoreByChr > 10)
{
startBreakpointsPos.Add(breakpoints[0]);
endBreakpointPos.Add(breakpoints[1] - 1);
lengthSeg.Add(breakpoints[1] - 1);
for (int i = 1; i < breakpoints.Count - 1; i++)
{
startBreakpointsPos.Add(breakpoints[i]);
endBreakpointPos.Add(breakpoints[i + 1] - 1);
lengthSeg.Add(breakpoints[i + 1] - 1 - breakpoints[i]);
}
startBreakpointsPos.Add(breakpoints[breakpoints.Count - 1]);
endBreakpointPos.Add(sizeScoreByChr - 1);
lengthSeg.Add(sizeScoreByChr - breakpoints[breakpoints.Count - 1] - 1);
}
else
{
startBreakpointsPos.Add(0);
endBreakpointPos.Add(sizeScoreByChr - 1);
lengthSeg.Add(sizeScoreByChr - 1);
}
// estimate segment means
double[] segmentMeans = new double[lengthSeg.Count()];
int ss = 0, ee = 0;
for (int i = 0; i < lengthSeg.Count(); i++)
{
ee += lengthSeg[i];
// Works even if weights == null
segmentMeans[i] = Helper.WeightedAverage(this.ScoreByChr[chr], null, iStart: ss, iEnd: ee);
ss = ee;
}
Segment[] segments = new Segment[startBreakpointsPos.Count];
for (int i = 0; i < startBreakpointsPos.Count; i++)
{
int start = startBreakpointsPos[i];
int end = endBreakpointPos[i];
segments[i] = new Segment();
segments[i].start = this.StartByChr[chr][start]; // Genomic start
segments[i].end = this.EndByChr[chr][end]; // Genomic end
segments[i].nMarkers = lengthSeg[i];
segments[i].mean = segmentMeans[i];
}
lock (segmentByChr)
{
segmentByChr[chr] = segments;
}
}));
}
Console.WriteLine("{0} Launching wavelet tasks", DateTime.Now);
Isas.Shared.Utilities.DoWorkParallelThreads(tasks);
Console.WriteLine("{0} Completed wavelet tasks", DateTime.Now);
this.SegmentationResults = new GenomeSegmentationResults(segmentByChr);
Console.WriteLine("{0} Segmentation results complete", DateTime.Now);
}