BP_LDA.LDA_Learn.TrainingBP_LDA C# (CSharp) Method

LDA_Learn Class Documentation ファイルを表示 Open project: jvking/bp-lda
TrainingBP_LDA() public static method

public static TrainingBP_LDA ( SparseMatrix TrainData, SparseMatrix TestData, paramModel_t paramModel, paramTrain_t paramTrain, string ModelFile, string ResultFile ) : void
TrainData	LinearAlgebra.SparseMatrix
TestData	LinearAlgebra.SparseMatrix
paramModel	paramModel_t
paramTrain	paramTrain_t
ModelFile	string
ResultFile	string
return	void
		public static void TrainingBP_LDA(
			SparseMatrix TrainData,
			SparseMatrix TestData,
			paramModel_t paramModel,
			paramTrain_t paramTrain,
			string ModelFile,
			string ResultFile
		)
		{
			// ---- Extract the parameters ----
			// Model parameters
			int nInput = paramModel.nInput;
			int nHid = paramModel.nHid;
			int nHidLayer = paramModel.nHidLayer;
			int nOutput = paramModel.nOutput;
			float eta = paramModel.eta;
			float T_value = paramModel.T_value;
			string OutputType = paramModel.OutputType;
			float beta = paramModel.beta;
			// Training parameters
			int nEpoch = paramTrain.nEpoch;
			float mu_Phi = paramTrain.mu_Phi;
			float mu_U = paramTrain.mu_U;
			int nTrain = paramTrain.nTrain;
			float mu_Phi_ReduceFactor = paramTrain.mu_Phi_ReduceFactor;
			string LearnRateSchedule = paramTrain.LearnRateSchedule;
			int nSamplesPerDisplay = paramTrain.nSamplesPerDisplay;
			int nEpochPerSave = paramTrain.nEpochPerSave;
			int nEpochPerTest = paramTrain.nEpochPerTest;
			int nEpochPerDump = paramTrain.nEpochPerDump;

			// ---- Initialize the model ----
			ModelInit_LDA_Feedforward(paramModel);

			// ---- Initialize the training algorithm ----
			Console.WriteLine("#################################################################");
			Console.WriteLine("jvking version of  BP-LDA: Mirror-Descent Back Propagation");
			Console.WriteLine("#################################################################");
			float TotLoss = 0.0f;
			float TotCE = 0.0f;
			double TotTime = 0.0f;
			double TotTimeThisEpoch = 0.0f;
			int TotSamples = 0;
			int TotSamplesThisEpoch = 0;
			double AvgnHidLayerEffective = 0.0;
			int CntRunningAvg = 0;
			int CntModelUpdate = 0;
			DenseRowVector mu_phi_search = new DenseRowVector(nHid, mu_Phi);
			DenseRowVector TestLoss_pool = new DenseRowVector(nEpoch / nEpochPerTest, 0.0f);
			DenseRowVector TestLoss_epoch = new DenseRowVector(nEpoch / nEpochPerTest, 0.0f);
			DenseRowVector TestLoss_time = new DenseRowVector(nEpoch / nEpochPerTest, 0.0f);
			int CountTest = 0;
			DenseRowVector G_Phi_pool = new DenseRowVector(paramModel.nHidLayer);
			DenseRowVector G_Phi_trunc_pool = new DenseRowVector(paramModel.nHidLayer, 0.0f);
			DenseRowVector AdaGradSum = new DenseRowVector(nHid, 0.0f);
			DenseRowVector TmpDenseRowVec = new DenseRowVector(nHid, 0.0f);
			int[] SparsePatternGradPhi = null;
			float nLearnLineSearch = 0.0f;
			int[] IdxPerm = null;
			int BatchSize_NormalBatch = paramTrain.BatchSize;
			int BatchSize_tmp = paramTrain.BatchSize;
			int nBatch = (int)Math.Ceiling(((float)nTrain) / ((float)BatchSize_NormalBatch));
			DNNRun_t DNNRun_NormalBatch = new DNNRun_t(nHid, BatchSize_NormalBatch, paramModel.nHidLayer, nOutput);
			DNNRun_t DNNRun_EndBatch = new DNNRun_t(nHid, nTrain - (nBatch - 1) * BatchSize_NormalBatch, paramModel.nHidLayer, nOutput);
			DNNRun_t DNNRun = null;
			Grad_t Grad = new Grad_t(nHid, nOutput, nInput, paramModel.nHidLayer, OutputType);
			DenseMatrix TmpGradDense = new DenseMatrix(nInput, nHid);
			DenseMatrix TmpMatDensePhi = new DenseMatrix(nInput, nHid);
			paramModel_t paramModel_avg = new paramModel_t(paramModel);
			Stopwatch stopWatch = new Stopwatch();
			// ---- Compute the schedule of the learning rate
			double[] stepsize_pool = null;
			switch (LearnRateSchedule)
			{
			case "PreCompute":
				stepsize_pool = PrecomputeLearningRateSchedule(nBatch, nEpoch, mu_Phi, mu_Phi / mu_Phi_ReduceFactor, 1e-8f);
				break;
			case "Constant":
				stepsize_pool = new double[nEpoch];
				for (int Idx = 0; Idx < nEpoch; Idx++)
				{
					stepsize_pool[Idx] = mu_Phi;
				}
				break;
			default:
				throw new Exception("Unknown type of LearnRateSchedule");
			}
			// Now start training.........................
			for (int epoch = 0; epoch < nEpoch; epoch++)
			{
				TotSamplesThisEpoch = 0;
				TotTimeThisEpoch = 0.0;
				AvgnHidLayerEffective = 0.0;
				// -- Set the batch size if there is schedule --
				if (paramTrain.flag_BachSizeSchedule)
				{
					if (paramTrain.BachSizeSchedule.TryGetValue(epoch + 1, out BatchSize_tmp))
					{
						BatchSize_NormalBatch = BatchSize_tmp;
						nBatch = (int)Math.Ceiling(((float)nTrain) / ((float)BatchSize_NormalBatch));
						DNNRun_NormalBatch = new DNNRun_t(nHid, BatchSize_NormalBatch, paramModel.nHidLayer, nOutput);
						DNNRun_EndBatch = new DNNRun_t(nHid, nTrain - (nBatch - 1) * BatchSize_NormalBatch, paramModel.nHidLayer, nOutput);
					}
				}

				// -- Shuffle the data (generating shuffled index) --
				IdxPerm = Statistics.RandPerm(nTrain);
				// -- Reset the (MDA) inference step-sizes --
				if (epoch > 0)
				{
					for (int Idx = 0; Idx < paramModel.nHidLayer; Idx++)
					{
						paramModel.T[Idx] = T_value;
					}
				}
				// -- Take the learning rate for the current epoch --
				mu_Phi = (float)stepsize_pool[epoch];
				// -- Start this epoch --
				Console.WriteLine("############## Epoch #{0}. BatchSize: {1} Learning Rate: {2} ##################", epoch + 1, BatchSize_NormalBatch, mu_Phi);
				for (int IdxBatch = 0; IdxBatch < nBatch; IdxBatch++)
				{
					stopWatch.Start();
					// Extract the batch
					int BatchSize = 0;
					if (IdxBatch < nBatch - 1)
					{
						BatchSize = BatchSize_NormalBatch;
						DNNRun = DNNRun_NormalBatch;
					}
					else
					{
						BatchSize = nTrain - IdxBatch * BatchSize_NormalBatch;
						DNNRun = DNNRun_EndBatch;
					}
					SparseMatrix Xt = new SparseMatrix(nInput, BatchSize);
					SparseMatrix Dt = null;
					int[] IdxSample = new int[BatchSize];
					Array.Copy(IdxPerm, IdxBatch * BatchSize_NormalBatch, IdxSample, 0, BatchSize);
					TrainData.GetColumns(Xt, IdxSample);

					// Set the sparse pattern for the gradient
					SparsePatternGradPhi = Xt.GetHorizontalUnionSparsePattern();
					Grad.SetSparsePatternForAllGradPhi(SparsePatternGradPhi);

					// Forward activation
					LDA_Learn.ForwardActivation_LDA(Xt, DNNRun, paramModel, true);

					// Back propagation
					LDA_Learn.BackPropagation_LDA(Xt, Dt, DNNRun, paramModel, Grad);

					// Compute the gradient and update the model (All gradients of Phi are accumulated into Grad.grad_Q_Phi)
					MatrixOperation.ScalarDivideMatrix(Grad.grad_Q_Phi, (-1.0f) * ((beta - 1) / ((float)nTrain)), paramModel.Phi, true);
					MatrixOperation.MatrixAddMatrix(Grad.grad_Q_Phi, Grad.grad_Q_TopPhi);                  
					mu_phi_search.FillValue(mu_Phi);
					// Different learning rate for different columns of Phi: Similar to AdaGrad but does not decay with time
					++CntModelUpdate;
					MatrixOperation.ElementwiseMatrixMultiplyMatrix(TmpMatDensePhi, Grad.grad_Q_Phi, Grad.grad_Q_Phi);
					MatrixOperation.VerticalSumMatrix(TmpDenseRowVec, TmpMatDensePhi);
					MatrixOperation.ScalarMultiplyVector(TmpDenseRowVec, 1.0f / ((float)nInput));
					MatrixOperation.VectorSubtractVector(TmpDenseRowVec, AdaGradSum);
					MatrixOperation.ScalarMultiplyVector(TmpDenseRowVec, 1.0f / CntModelUpdate);
					MatrixOperation.VectorAddVector(AdaGradSum, TmpDenseRowVec);
					MatrixOperation.ElementwiseSquareRoot(TmpDenseRowVec, AdaGradSum);
					MatrixOperation.ScalarAddVector(TmpDenseRowVec, mu_Phi);
					MatrixOperation.ElementwiseVectorDivideVector(mu_phi_search, mu_phi_search, TmpDenseRowVec);
					nLearnLineSearch = SMD_Update(paramModel.Phi, Grad.grad_Q_Phi, mu_phi_search, eta);
					// Running average of the model
					if (paramTrain.flag_RunningAvg && epoch >= (int)Math.Ceiling(((float)nEpoch) / 2.0f))
					{
						++CntRunningAvg;
						MatrixOperation.MatrixSubtractMatrix(TmpMatDensePhi, paramModel.Phi, paramModel_avg.Phi);
						MatrixOperation.ScalarMultiplyMatrix(TmpMatDensePhi, 1.0f / CntRunningAvg);
						MatrixOperation.MatrixAddMatrix(paramModel_avg.Phi, TmpMatDensePhi);
					}


					// Display the result
					TotCE += ComputeCrossEntropy(Xt, paramModel.Phi,DNNRun.theta_pool, DNNRun.nHidLayerEffective);
					TotLoss = TotCE;
					TotSamples += BatchSize;
					TotSamplesThisEpoch += BatchSize;
					AvgnHidLayerEffective = (((float)(TotSamplesThisEpoch-BatchSize))/((float)TotSamplesThisEpoch))*AvgnHidLayerEffective
						+ (1.0/((float)TotSamplesThisEpoch))*( DNNRun.nHidLayerEffective.Sum());
					stopWatch.Stop();
					TimeSpan ts = stopWatch.Elapsed;
					TotTime += ts.TotalSeconds;
					TotTimeThisEpoch += ts.TotalSeconds;
					stopWatch.Reset();
					if (TotSamplesThisEpoch % nSamplesPerDisplay == 0)
					{
						// Display results
						Console.WriteLine(
							"* Ep#{0}/{1} Bat#{2}/{3}. Loss={4:F3}. CE={5:F3}.  Speed={6} Samples/Sec.",
							epoch + 1, nEpoch,
							IdxBatch + 1, nBatch,
							TotLoss / TotSamples, TotCE / TotSamples,
							(int)((double)TotSamplesThisEpoch / TotTimeThisEpoch)
						);
						if (paramTrain.DebugLevel == DebugLevel_t.medium)
						{
							Console.WriteLine(
								"  muPhiMax={0} \n  muPhiMin={1}",
								mu_phi_search.VectorValue.Max(), mu_phi_search.VectorValue.Min()
							);
							Console.WriteLine();
						}
						if (paramTrain.DebugLevel == DebugLevel_t.high)
						{
							Console.WriteLine(
								"  muPhiMax={0} \n  muPhiMin={1}",
								mu_phi_search.VectorValue.Max(), mu_phi_search.VectorValue.Min()
							);
							Console.WriteLine(
								"  AvgnHidLayerEff={0:F1}. G_Phi={1:F3}.",
								AvgnHidLayerEffective,
								Grad.grad_Q_Phi.MaxAbsValue()
							);
							Console.WriteLine();
						}


					}
				}
				// -- Test --
				if ((epoch + 1) % nEpochPerTest == 0)
				{
					TestLoss_epoch.VectorValue[(epoch + 1) / nEpochPerTest - 1] = epoch + 1;
					TestLoss_time.VectorValue[(epoch + 1) / nEpochPerTest - 1] = (float)TotTime;
					if (paramTrain.flag_RunningAvg && epoch >= (int)Math.Ceiling(((float)nEpoch) / 2.0f))
					{
						TestLoss_pool.VectorValue[(epoch + 1) / nEpochPerTest - 1] = Testing_BP_LDA(TestData, paramModel_avg, paramTrain.BatchSize_Test);
					}
					else
					{
						TestLoss_pool.VectorValue[(epoch + 1) / nEpochPerTest - 1] = Testing_BP_LDA(TestData, paramModel, paramTrain.BatchSize_Test);
					}
					CountTest++;
				}

				// -- Save --
				if ((epoch + 1) % nEpochPerSave == 0)
				{
					// Save model
					if (paramTrain.flag_RunningAvg && epoch >= (int)Math.Ceiling(((float)nEpoch) / 2.0f))
					{
						string PhiCol = null;
						(new FileInfo(ResultFile + ".model.Phi")).Directory.Create();
						StreamWriter FileSaveModel = new StreamWriter(ResultFile + ".model.Phi", false);
						for (int IdxCol = 0; IdxCol < paramModel_avg.Phi.nCols; IdxCol++)
						{
							PhiCol = String.Join("\t", paramModel_avg.Phi.DenseMatrixValue[IdxCol].VectorValue);
							FileSaveModel.WriteLine(PhiCol);
						}
						FileSaveModel.Close();
						// Save the final learning curves
						StreamWriter FileSavePerf = new StreamWriter(ResultFile + ".perf", false);
						FileSavePerf.WriteLine(String.Join("\t", TestLoss_epoch.VectorValue));
						FileSavePerf.WriteLine(String.Join("\t", TestLoss_time.VectorValue));
						FileSavePerf.WriteLine(String.Join("\t", TestLoss_pool.VectorValue));
						FileSavePerf.Close();
					}
					{
						string PhiCol = null;
						(new FileInfo(ResultFile + ".model.Phi")).Directory.Create();
						StreamWriter FileSaveModel = new StreamWriter(ResultFile + ".model.Phi", false);
						for (int IdxCol = 0; IdxCol < paramModel.Phi.nCols; IdxCol++)
						{
							PhiCol = String.Join("\t", paramModel.Phi.DenseMatrixValue[IdxCol].VectorValue);
							FileSaveModel.WriteLine(PhiCol);
						}
						FileSaveModel.Close();
						// Save the final learning curves
						StreamWriter FileSavePerf = new StreamWriter(ResultFile + ".perf", false);
						FileSavePerf.WriteLine(String.Join("\t", TestLoss_epoch.VectorValue));
						FileSavePerf.WriteLine(String.Join("\t", TestLoss_time.VectorValue));
						FileSavePerf.WriteLine(String.Join("\t", TestLoss_pool.VectorValue));
						FileSavePerf.Close();
					}
				}

				// -- Dump feature --
				if (paramTrain.flag_DumpFeature && (epoch + 1) % nEpochPerDump == 0)
				{
					if (paramTrain.flag_RunningAvg && epoch >= (int)Math.Ceiling(((float)nEpoch) / 2.0f))
					{
						DumpingFeature_BP_LDA(TrainData, paramModel_avg, paramTrain.BatchSize_Test, ResultFile + ".train.fea", "Train");
						DumpingFeature_BP_LDA(TestData, paramModel_avg, paramTrain.BatchSize_Test, ResultFile + ".test.fea", "Test");
					}
					{
						DumpingFeature_BP_LDA(TrainData, paramModel, paramTrain.BatchSize_Test, ResultFile + ".train.fea", "Train");
						DumpingFeature_BP_LDA(TestData, paramModel, paramTrain.BatchSize_Test, ResultFile + ".test.fea", "Test");
					}
				}


			}
		}
LDA_Learn
BackPropagation_LDA
ComputeCrossEntropy
ComputeInverseDocumentFrequency
ComputeNumberOfErrors
ComputeRegularizedCrossEntropy
ComputeSupervisedLoss
DumpingFeature_BP_LDA
ExternalEvaluation
ForwardActivation_LDA
ModelInit_LDA_Feedforward
PrecomputeLearningRateSchedule