BP_LDA.LDA_Learn.ForwardActivation_LDA C# (CSharp) Method

LDA_Learn Class Documentation Datei anzeigen Open project: jvking/bp-lda
ForwardActivation_LDA() public static method

public static ForwardActivation_LDA ( SparseMatrix Xt, DNNRun_t DNNRun, paramModel_t paramModel, bool flag_IsTraining ) : void
Xt	LinearAlgebra.SparseMatrix
DNNRun	DNNRun_t
paramModel	paramModel_t
flag_IsTraining	bool
return	void
		public static void ForwardActivation_LDA(SparseMatrix Xt, DNNRun_t DNNRun, paramModel_t paramModel, bool flag_IsTraining)
		{
			// -------- Extract parameters --------
			int nHid = paramModel.nHid;
			int nHidLayer = paramModel.nHidLayer;
			float eta = paramModel.eta;
			float T_value = paramModel.T_value;
			string OutputType = paramModel.OutputType;
			float To = paramModel.To;
			int BatchSize = Xt.nCols;

			// -------- Hidden activations --------
			// ---- Reset the effective number of hidden layers (mainly for alpha<1 case) ----
			Array.Clear(DNNRun.nHidLayerEffective,0,DNNRun.nHidLayerEffective.Length);
			// ---- T is different over layers (adaptive step-size MDA) ----
			DenseRowVector T = new DenseRowVector(BatchSize, T_value);
			SparseMatrix Phitheta = new SparseMatrix(Xt);
			DenseRowVector loss_pre = new DenseRowVector(BatchSize);
			DenseRowVector loss_post = new DenseRowVector(BatchSize);
			DenseRowVector loss_gap = new DenseRowVector(BatchSize);
			DenseRowVector loss_gap_thresh = new DenseRowVector(BatchSize);
			DenseRowVector gradproj = new DenseRowVector(BatchSize);
			SparseMatrix TmpSparseMat = new SparseMatrix(Xt);
			DenseMatrix TmpDenseMat = new DenseMatrix(nHid, BatchSize);
			DenseMatrix LogTheta = new DenseMatrix(nHid, BatchSize);
			DenseRowVector TmpDenseRowVec = new DenseRowVector(BatchSize);
			DenseMatrix NegGrad = new DenseMatrix(nHid, BatchSize);
			DenseMatrix LLR = new DenseMatrix(nHid, BatchSize);            
			//for (int IdxSample = 0; IdxSample < BatchSize; IdxSample++)
			Parallel.For(0, BatchSize, new ParallelOptions { MaxDegreeOfParallelism = MatrixOperation.MaxMultiThreadDegree }, IdxSample =>
				{
					float KLDivergence = 0.0f;
					// The forward activation for each data sample
					for (int IdxLayer = 0; IdxLayer < nHidLayer; IdxLayer++)
					{
						// Compute the loss before unfolding the current layer
						if (IdxLayer == 0)
						{
							MatrixOperation.MatrixMultiplyVector(
								Phitheta.SparseColumnVectors[IdxSample], 
								paramModel.Phi, 
								DNNRun.theta0.DenseMatrixValue[IdxSample]
							);
						}
						else
						{
							MatrixOperation.MatrixMultiplyVector(
								Phitheta.SparseColumnVectors[IdxSample], 
								paramModel.Phi, 
								DNNRun.theta_pool[IdxLayer - 1].DenseMatrixValue[IdxSample]
							);
						}
						if (IdxLayer > 1)
						{
							loss_pre.VectorValue[IdxSample] = loss_post.VectorValue[IdxSample];
						}
						else
						{
							MatrixOperation.ScalarAddVector(TmpSparseMat.SparseColumnVectors[IdxSample], Phitheta.SparseColumnVectors[IdxSample], 1e-12f);
							MatrixOperation.Log(TmpSparseMat.SparseColumnVectors[IdxSample]);
							MatrixOperation.ElementwiseVectorMultiplyVector(TmpSparseMat.SparseColumnVectors[IdxSample], Xt.SparseColumnVectors[IdxSample]);
							loss_pre.VectorValue[IdxSample] = (-1.0f)*TmpSparseMat.SparseColumnVectors[IdxSample].Sum();
							if (IdxLayer == 0)
							{
								MatrixOperation.ScalarAddVector(TmpDenseMat.DenseMatrixValue[IdxSample], DNNRun.theta0.DenseMatrixValue[IdxSample], 1e-12f);
							}
							else
							{
								MatrixOperation.ScalarAddVector(TmpDenseMat.DenseMatrixValue[IdxSample], DNNRun.theta_pool[IdxLayer - 1].DenseMatrixValue[IdxSample], 1e-12f);
							}
							MatrixOperation.Log(TmpDenseMat.DenseMatrixValue[IdxSample]);
							MatrixOperation.ElementwiseVectorMultiplyVector(TmpDenseMat.DenseMatrixValue[IdxSample], paramModel.b);
							TmpDenseRowVec.VectorValue[IdxSample] = TmpDenseMat.DenseMatrixValue[IdxSample].Sum();
							loss_pre.VectorValue[IdxSample] -= TmpDenseRowVec.VectorValue[IdxSample];
						}
						// Compute the hidden activation of the current layer
						MatrixOperation.ScalarAddVector(TmpSparseMat.SparseColumnVectors[IdxSample], Phitheta.SparseColumnVectors[IdxSample], 1e-12f);
						MatrixOperation.ElementwiseVectorDivideVector(
							TmpSparseMat.SparseColumnVectors[IdxSample], 
							Xt.SparseColumnVectors[IdxSample], 
							TmpSparseMat.SparseColumnVectors[IdxSample]
						);
						MatrixOperation.MatrixTransposeMultiplyVector(
							TmpDenseMat.DenseMatrixValue[IdxSample], 
							paramModel.Phi, 
							TmpSparseMat.SparseColumnVectors[IdxSample]
						);
						if (IdxLayer == 0)
						{
							MatrixOperation.ScalarAddVector(
								NegGrad.DenseMatrixValue[IdxSample], 
								DNNRun.theta0.DenseMatrixValue[IdxSample], 
								1e-12f
							);
						}
						else
						{
							MatrixOperation.ScalarAddVector(
								NegGrad.DenseMatrixValue[IdxSample], 
								DNNRun.theta_pool[IdxLayer - 1].DenseMatrixValue[IdxSample], 
								1e-12f
							);
						}
						MatrixOperation.ElementwiseVectorDivideVector(NegGrad.DenseMatrixValue[IdxSample], paramModel.b, NegGrad.DenseMatrixValue[IdxSample]);
						MatrixOperation.VectorAddVector(NegGrad.DenseMatrixValue[IdxSample], TmpDenseMat.DenseMatrixValue[IdxSample]);
						// Line search for the parameter T
						if (paramModel.alpha >= 1)
						{
							T.VectorValue[IdxSample] *= (1.0f / eta);
						} // only perform line search for alpha>=1 case (convex)
						loss_post.VectorValue[IdxSample] = loss_pre.VectorValue[IdxSample];
						if (IdxLayer == 0)
						{
							MatrixOperation.Log(LogTheta.DenseMatrixValue[IdxSample], DNNRun.theta0.DenseMatrixValue[IdxSample]);
						}
						else
						{
							MatrixOperation.Log(LogTheta.DenseMatrixValue[IdxSample], DNNRun.theta_pool[IdxLayer - 1].DenseMatrixValue[IdxSample]);
						}
						while (true)
						{
							MatrixOperation.ScalarMultiplyVector(DNNRun.theta_pool[IdxLayer].DenseMatrixValue[IdxSample],
								NegGrad.DenseMatrixValue[IdxSample], T.VectorValue[IdxSample]);
							MatrixOperation.VectorAddVector(DNNRun.theta_pool[IdxLayer].DenseMatrixValue[IdxSample],
								LogTheta.DenseMatrixValue[IdxSample]);
							MatrixOperation.ScalarAddVector(DNNRun.theta_pool[IdxLayer].DenseMatrixValue[IdxSample],
								(-1.0f) * DNNRun.theta_pool[IdxLayer].DenseMatrixValue[IdxSample].MaxValue());
							MatrixOperation.Exp(DNNRun.theta_pool[IdxLayer].DenseMatrixValue[IdxSample]);
							MatrixOperation.ScalarMultiplyVector(DNNRun.theta_pool[IdxLayer].DenseMatrixValue[IdxSample],
								(1.0f / DNNRun.theta_pool[IdxLayer].DenseMatrixValue[IdxSample].Sum()));
							// Compute the loss after undfolding the current layer
							MatrixOperation.MatrixMultiplyVector(Phitheta.SparseColumnVectors[IdxSample],
								paramModel.Phi, DNNRun.theta_pool[IdxLayer].DenseMatrixValue[IdxSample]);
							MatrixOperation.Log(Phitheta.SparseColumnVectors[IdxSample]);
							loss_post.VectorValue[IdxSample]
							= (-1.0f) * MatrixOperation.InnerProduct(Xt.SparseColumnVectors[IdxSample], Phitheta.SparseColumnVectors[IdxSample]);
							MatrixOperation.ScalarAddVector(TmpDenseMat.DenseMatrixValue[IdxSample], DNNRun.theta_pool[IdxLayer].DenseMatrixValue[IdxSample], 1e-12f);
							MatrixOperation.Log(TmpDenseMat.DenseMatrixValue[IdxSample]);
							loss_post.VectorValue[IdxSample] -= MatrixOperation.InnerProduct(TmpDenseMat.DenseMatrixValue[IdxSample], paramModel.b);
							if (IdxLayer == 0)
							{
								MatrixOperation.VectorSubtractVector(TmpDenseMat.DenseMatrixValue[IdxSample],
									DNNRun.theta_pool[IdxLayer].DenseMatrixValue[IdxSample],
									DNNRun.theta0.DenseMatrixValue[IdxSample]);
							}
							else
							{
								MatrixOperation.VectorSubtractVector(TmpDenseMat.DenseMatrixValue[IdxSample],
									DNNRun.theta_pool[IdxLayer].DenseMatrixValue[IdxSample],
									DNNRun.theta_pool[IdxLayer - 1].DenseMatrixValue[IdxSample]);
							}
							loss_gap.VectorValue[IdxSample] = loss_post.VectorValue[IdxSample] - loss_pre.VectorValue[IdxSample];
							gradproj.VectorValue[IdxSample]
							= (-1.0f) * MatrixOperation.InnerProduct(NegGrad.DenseMatrixValue[IdxSample],
								TmpDenseMat.DenseMatrixValue[IdxSample]);
							loss_gap_thresh.VectorValue[IdxSample] = gradproj.VectorValue[IdxSample]
								+ (0.5f / T.VectorValue[IdxSample]) * (float)Math.Pow((double)TmpDenseMat.DenseMatrixValue[IdxSample].L1Norm(), 2.0);
							if (loss_gap.VectorValue[IdxSample] > loss_gap_thresh.VectorValue[IdxSample] + 1e-12 && paramModel.alpha>=1)
							{
								T.VectorValue[IdxSample] *= eta;
							} // Only perform line search for alpha>=1 case (convex)
							else
							{
								DNNRun.T_pool.DenseMatrixValuePerRow[IdxLayer].VectorValue[IdxSample] = T.VectorValue[IdxSample];
								break;
							}
						}
						// Count the effective number of hidden layers
						++DNNRun.nHidLayerEffective[IdxSample];
						// stop MDA if termination condition holds
						if (paramModel.flag_AdaptivenHidLayer)
						{
							if (IdxLayer == 0)
							{
								MatrixOperation.ElementwiseVectorDivideVector(
									LLR.DenseMatrixValue[IdxSample],
									DNNRun.theta_pool[IdxLayer].DenseMatrixValue[IdxSample],
									DNNRun.theta0.DenseMatrixValue[IdxSample]
								);
								MatrixOperation.Log(LLR.DenseMatrixValue[IdxSample]);
							}
							else
							{
								MatrixOperation.ElementwiseVectorDivideVector(
									LLR.DenseMatrixValue[IdxSample],
									DNNRun.theta_pool[IdxLayer].DenseMatrixValue[IdxSample],
									DNNRun.theta_pool[IdxLayer - 1].DenseMatrixValue[IdxSample]
								);
								MatrixOperation.Log(LLR.DenseMatrixValue[IdxSample]);
								MatrixOperation.ResetVectorSparsePattern(
									LLR.DenseMatrixValue[IdxSample], 
									DNNRun.theta_pool[IdxLayer].DenseMatrixValue[IdxSample]
								);
							}
							KLDivergence = MatrixOperation.InnerProduct(
								LLR.DenseMatrixValue[IdxSample], 
								DNNRun.theta_pool[IdxLayer].DenseMatrixValue[IdxSample]
							);
							if (KLDivergence < 1e-12f)
							{
								break;
							}
						}
					}
					// ---- Generate output ----
					switch (OutputType)
					{
					case "softmaxCE":
						MatrixOperation.MatrixMultiplyVector(
							DNNRun.y.DenseMatrixValue[IdxSample],
							paramModel.U,
							DNNRun.theta_pool[DNNRun.nHidLayerEffective[IdxSample] - 1].DenseMatrixValue[IdxSample]
						);
						MatrixOperation.ScalarAddVector(DNNRun.y.DenseMatrixValue[IdxSample], To);
						TmpDenseRowVec.VectorValue[IdxSample] = DNNRun.y.DenseMatrixValue[IdxSample].MaxValue();
						MatrixOperation.ScalarAddVector(DNNRun.y.DenseMatrixValue[IdxSample], (-1.0f) * TmpDenseRowVec.VectorValue[IdxSample]);
						MatrixOperation.Exp(DNNRun.y.DenseMatrixValue[IdxSample]);
						TmpDenseRowVec.VectorValue[IdxSample] = DNNRun.y.DenseMatrixValue[IdxSample].Sum();
						MatrixOperation.ScalarMultiplyVector(DNNRun.y.DenseMatrixValue[IdxSample], (1.0f) / TmpDenseRowVec.VectorValue[IdxSample]);
						break;
					case "unsupLDA":
						// Will not compute the reconstructed input at forward activation to save time during training.
						break;
					case "linearQuad":
						MatrixOperation.MatrixMultiplyVector(
							DNNRun.y.DenseMatrixValue[IdxSample],
							paramModel.U,
							DNNRun.theta_pool[DNNRun.nHidLayerEffective[IdxSample] - 1].DenseMatrixValue[IdxSample]
						);
						break;
					case "linearCE":
						throw new Exception("linearCE not implemented.");
					default:
						throw new Exception("Unknown OutputType.");
					}
				});            
		}
LDA_Learn
BackPropagation_LDA
ComputeCrossEntropy
ComputeInverseDocumentFrequency
ComputeNumberOfErrors
ComputeRegularizedCrossEntropy
ComputeSupervisedLoss
DumpingFeature_BP_LDA
ExternalEvaluation
ForwardActivation_LDA
ModelInit_LDA_Feedforward
PrecomputeLearningRateSchedule