BP_LDA.DataLoader.InputDataLoader C# (CSharp) Метод

InputDataLoader() публичный статический Метод

public static InputDataLoader ( string InputDataFile, int nInput ) : SparseMatrix
InputDataFile string
nInput int
Результат LinearAlgebra.SparseMatrix
		public static SparseMatrix InputDataLoader(string InputDataFile, int nInput)
		{
			Console.WriteLine("==================================================");


			// Scan through the entire file to get the number of lines (number of samples)
			Console.WriteLine("Scanning the file: {0}", InputDataFile);
			StreamReader InputDataStream = new StreamReader(InputDataFile);
			string StrLine;
			int nLine = 0;
			int nSamples = 0;
			List<string> AllRawInput = new List<string>();
			while ((StrLine = InputDataStream.ReadLine()) != null)
			{
				AllRawInput.Add(StrLine);
				nLine++;
				if (nLine % 10000 == 0)
				{
					Console.Write("Number of lines (samples): {0}\r", nLine);
				}
			}
			nSamples = nLine;
			Console.Write("Number of lines (samples): {0}\n", nLine);
			Console.WriteLine("Finished scanning the input data file");
			InputDataStream.Close();            

			// Parse each line and store it into each column of the sparse matrix
			Console.WriteLine("Loading input data...");
			SparseMatrix InputDataMatrix = new SparseMatrix(nInput, nLine);            
			nLine = 0;            
			int nTotNonzero = 0;
			int nEmptyLine = 0;
			Parallel.For(0, InputDataMatrix.nCols, new ParallelOptions { MaxDegreeOfParallelism = MatrixOperation.MaxMultiThreadDegree }, IdxCol =>
				{
					string[] StrLineSplit = AllRawInput[IdxCol].Split('\t');
					int nNonzero = StrLineSplit.Length;
					Interlocked.Add(ref nTotNonzero, nNonzero);
					int[] Key = null;
					float[] Val = null;
					Key = new int[nNonzero];
					Val = new float[nNonzero];
					string[] StrKeyValPair = null;
					if (StrLineSplit.Length == 1 && StrLineSplit[0] == "")
					{
						Key[0] = 0;
						Val[0] = 0.0f;
						Interlocked.Increment(ref nEmptyLine);
					}
					else
					{
						for (int Idx = 0; Idx < nNonzero; Idx++)
						{
							StrKeyValPair = StrLineSplit[Idx].Split(':');
							Key[Idx] = int.Parse(StrKeyValPair[0]);
							Val[Idx] = float.Parse(StrKeyValPair[1]);
						}
					}
					InputDataMatrix.FillColumn(Key, Val, IdxCol);

					Interlocked.Increment(ref nLine);
					if (nLine % 10000 == 0)
					{
						Console.Write("Number of lines (samples): {0}, with {1} empty lines.\r", nLine, nEmptyLine);
					}
				});

			Console.Write("Number of lines (samples): {0}, with {1} empty lines.\n", nLine, nEmptyLine);
			Console.WriteLine("Finished loading the input data file");
			Console.WriteLine("# Samples = {0}, # Inputs = {1}", InputDataMatrix.nCols, InputDataMatrix.nRows);
			Console.WriteLine("# Nonzeros = {0}/{1} ({2}%)", nTotNonzero, (long)InputDataMatrix.nRows * (long)InputDataMatrix.nCols, (((float)nTotNonzero) / ((float)InputDataMatrix.nRows * InputDataMatrix.nCols)) * 100);
			Console.WriteLine("==================================================");



			return InputDataMatrix;
		}

Usage Example

Пример #1
0
        static void Main(string[] args)
        {
            // ======== Setup the default parameters ========
            paramModel_t paramModel = new paramModel_t();
            paramTrain_t paramTrain = new paramTrain_t();

            SetupDefaultParams(paramModel, paramTrain);
            // ---- Data Files ----
            string TrainInputFile = "";
            string TestInputFile  = "";
            string ModelFile      = "";
            string ResultFile     = "";

            // ======== Parse the input parameters ========
            if (
                !ParseArgument(
                    args,
                    paramModel,
                    paramTrain,
                    ref TrainInputFile,
                    ref TestInputFile,
                    ref ModelFile,
                    ref ResultFile
                    )
                )
            {
                return;
            }
            paramModel.T = new float[paramModel.nHidLayer];
            for (int IdxLayer = 0; IdxLayer < paramModel.nHidLayer; IdxLayer++)
            {
                paramModel.T[IdxLayer] = paramModel.T_value;
            }

            // ======== Set the number of threads ========
            MatrixOperation.THREADNUM            = paramTrain.ThreadNum;
            MatrixOperation.MaxMultiThreadDegree = paramTrain.MaxMultiThreadDegree;

            // ======== Load data from file ========
            SparseMatrix TrainData = DataLoader.InputDataLoader(TrainInputFile, paramModel.nInput);
            SparseMatrix TestData  = DataLoader.InputDataLoader(TestInputFile, paramModel.nInput);

            paramTrain.nTrain = TrainData.nCols;
            paramTrain.nTest  = TestData.nCols;

            // ======== Unsupervised learning of LDA model: unfolding and back-propagation
            // (i) Inference: Feedforward network via MDA unfolding
            // (ii) Learning: Projected (mini-batch) stochastic gradient descent (P-SGD) using back propagation
            LDA_Learn.TrainingBP_LDA(TrainData, TestData, paramModel, paramTrain, ModelFile, ResultFile);
        }