/*
* Load the label data (single column) and return it in a sparse matrix
*/
public static SparseMatrix LabelDataLoader(string LabelDataFile, int nOutput, string OutputType)
{
Console.WriteLine("==================================================");
Console.WriteLine("Scanning the file: {0}", LabelDataFile);
StreamReader LabelDataStream = new StreamReader(LabelDataFile);
string StrLine;
int nLine = 0;
int nSamples = 0;
List<string> AllRawLabel = new List<string>();
// Load all the data
nLine = 0;
while ((StrLine = LabelDataStream.ReadLine())!=null)
{
AllRawLabel.Add(StrLine);
nLine++;
if (nLine % 10000 == 0)
{
Console.Write("Number of lines (samples): {0}\r", nLine);
}
}
Console.WriteLine("Number of lines (samples): {0}", nLine);
Console.WriteLine("Finished scanning the input data file");
LabelDataStream.Close();
// Parse the raw text into actual labels to be used in the learning algorithm
Console.WriteLine("Loading input data...");
nSamples = nLine;
SparseMatrix LabelData = new SparseMatrix(nOutput, nSamples);
nLine = 0;
switch (OutputType)
{
case "linearCE":
Parallel.For(0, LabelData.nCols, new ParallelOptions { MaxDegreeOfParallelism = MatrixOperation.MaxMultiThreadDegree }, IdxCol =>
{
int[] Key = new int[1];
float[] Val = new float[1];
if (!int.TryParse(AllRawLabel[IdxCol], out Key[0]))
{
Key[0] = (int)float.Parse(AllRawLabel[IdxCol]);
}
Val[0] = 1.0f;
LabelData.FillColumn(Key, Val, IdxCol);
Interlocked.Increment(ref nLine);
if (nLine % 10000 == 0)
{
Console.Write("Number of lines (samples): {0}\r", nLine);
}
});
break;
case "softmaxCE":
Parallel.For(0, LabelData.nCols, new ParallelOptions { MaxDegreeOfParallelism = MatrixOperation.MaxMultiThreadDegree }, IdxCol =>
{
int[] Key = new int[1];
float[] Val = new float[1];
if (!int.TryParse(AllRawLabel[IdxCol], out Key[0]))
{
Key[0] = (int)float.Parse(AllRawLabel[IdxCol]);
}
Val[0] = 1.0f;
LabelData.FillColumn(Key, Val, IdxCol);
Interlocked.Increment(ref nLine);
if (nLine % 10000 == 0)
{
Console.Write("Number of lines (samples): {0}\r", nLine);
}
});
break;
case "linearQuad":
Parallel.For(0, LabelData.nCols, new ParallelOptions { MaxDegreeOfParallelism = MatrixOperation.MaxMultiThreadDegree }, IdxCol =>
{
string[] StrLineSplit = AllRawLabel[IdxCol].Split('\t');
int[] Key = new int[StrLineSplit.Length];
float[] Val = new float[StrLineSplit.Length];
for (int IdxOutput = 0; IdxOutput < StrLineSplit.Length; IdxOutput++)
{
Key[IdxOutput] = IdxOutput;
Val[IdxOutput] = float.Parse(StrLineSplit[IdxOutput]);
}
LabelData.FillColumn(Key, Val, IdxCol);
Interlocked.Increment(ref nLine);
if (nLine % 10000 == 0)
{
Console.Write("Number of lines (samples): {0}\r", nLine);
}
});
break;
default:
throw new Exception("Unknown OutputType are supported.");
}
Console.Write("Number of lines (samples): {0}\n", nLine);
Console.WriteLine("==================================================");
return LabelData;
}