public static SparseMatrix InputDataLoader(string InputDataFile, int nInput)
{
Console.WriteLine("==================================================");
// Scan through the entire file to get the number of lines (number of samples)
Console.WriteLine("Scanning the file: {0}", InputDataFile);
StreamReader InputDataStream = new StreamReader(InputDataFile);
string StrLine;
int nLine = 0;
int nSamples = 0;
List<string> AllRawInput = new List<string>();
while ((StrLine = InputDataStream.ReadLine()) != null)
{
AllRawInput.Add(StrLine);
nLine++;
if (nLine % 10000 == 0)
{
Console.Write("Number of lines (samples): {0}\r", nLine);
}
}
nSamples = nLine;
Console.Write("Number of lines (samples): {0}\n", nLine);
Console.WriteLine("Finished scanning the input data file");
InputDataStream.Close();
// Parse each line and store it into each column of the sparse matrix
Console.WriteLine("Loading input data...");
SparseMatrix InputDataMatrix = new SparseMatrix(nInput, nLine);
nLine = 0;
int nTotNonzero = 0;
int nEmptyLine = 0;
Parallel.For(0, InputDataMatrix.nCols, new ParallelOptions { MaxDegreeOfParallelism = MatrixOperation.MaxMultiThreadDegree }, IdxCol =>
{
string[] StrLineSplit = AllRawInput[IdxCol].Split('\t');
int nNonzero = StrLineSplit.Length;
Interlocked.Add(ref nTotNonzero, nNonzero);
int[] Key = null;
float[] Val = null;
Key = new int[nNonzero];
Val = new float[nNonzero];
string[] StrKeyValPair = null;
if (StrLineSplit.Length == 1 && StrLineSplit[0] == "")
{
Key[0] = 0;
Val[0] = 0.0f;
Interlocked.Increment(ref nEmptyLine);
}
else
{
for (int Idx = 0; Idx < nNonzero; Idx++)
{
StrKeyValPair = StrLineSplit[Idx].Split(':');
Key[Idx] = int.Parse(StrKeyValPair[0]);
Val[Idx] = float.Parse(StrKeyValPair[1]);
}
}
InputDataMatrix.FillColumn(Key, Val, IdxCol);
Interlocked.Increment(ref nLine);
if (nLine % 10000 == 0)
{
Console.Write("Number of lines (samples): {0}, with {1} empty lines.\r", nLine, nEmptyLine);
}
});
Console.Write("Number of lines (samples): {0}, with {1} empty lines.\n", nLine, nEmptyLine);
Console.WriteLine("Finished loading the input data file");
Console.WriteLine("# Samples = {0}, # Inputs = {1}", InputDataMatrix.nCols, InputDataMatrix.nRows);
Console.WriteLine("# Nonzeros = {0}/{1} ({2}%)", nTotNonzero, (long)InputDataMatrix.nRows * (long)InputDataMatrix.nCols, (((float)nTotNonzero) / ((float)InputDataMatrix.nRows * InputDataMatrix.nCols)) * 100);
Console.WriteLine("==================================================");
return InputDataMatrix;
}