public static void ForwardActivation_LDA(SparseMatrix Xt, DNNRun_t DNNRun, paramModel_t paramModel, bool flag_IsTraining)
{
// -------- Extract parameters --------
int nHid = paramModel.nHid;
int nHidLayer = paramModel.nHidLayer;
float eta = paramModel.eta;
float T_value = paramModel.T_value;
string OutputType = paramModel.OutputType;
float To = paramModel.To;
int BatchSize = Xt.nCols;
// -------- Hidden activations --------
// ---- Reset the effective number of hidden layers (mainly for alpha<1 case) ----
Array.Clear(DNNRun.nHidLayerEffective,0,DNNRun.nHidLayerEffective.Length);
// ---- T is different over layers (adaptive step-size MDA) ----
DenseRowVector T = new DenseRowVector(BatchSize, T_value);
SparseMatrix Phitheta = new SparseMatrix(Xt);
DenseRowVector loss_pre = new DenseRowVector(BatchSize);
DenseRowVector loss_post = new DenseRowVector(BatchSize);
DenseRowVector loss_gap = new DenseRowVector(BatchSize);
DenseRowVector loss_gap_thresh = new DenseRowVector(BatchSize);
DenseRowVector gradproj = new DenseRowVector(BatchSize);
SparseMatrix TmpSparseMat = new SparseMatrix(Xt);
DenseMatrix TmpDenseMat = new DenseMatrix(nHid, BatchSize);
DenseMatrix LogTheta = new DenseMatrix(nHid, BatchSize);
DenseRowVector TmpDenseRowVec = new DenseRowVector(BatchSize);
DenseMatrix NegGrad = new DenseMatrix(nHid, BatchSize);
DenseMatrix LLR = new DenseMatrix(nHid, BatchSize);
//for (int IdxSample = 0; IdxSample < BatchSize; IdxSample++)
Parallel.For(0, BatchSize, new ParallelOptions { MaxDegreeOfParallelism = MatrixOperation.MaxMultiThreadDegree }, IdxSample =>
{
float KLDivergence = 0.0f;
// The forward activation for each data sample
for (int IdxLayer = 0; IdxLayer < nHidLayer; IdxLayer++)
{
// Compute the loss before unfolding the current layer
if (IdxLayer == 0)
{
MatrixOperation.MatrixMultiplyVector(
Phitheta.SparseColumnVectors[IdxSample],
paramModel.Phi,
DNNRun.theta0.DenseMatrixValue[IdxSample]
);
}
else
{
MatrixOperation.MatrixMultiplyVector(
Phitheta.SparseColumnVectors[IdxSample],
paramModel.Phi,
DNNRun.theta_pool[IdxLayer - 1].DenseMatrixValue[IdxSample]
);
}
if (IdxLayer > 1)
{
loss_pre.VectorValue[IdxSample] = loss_post.VectorValue[IdxSample];
}
else
{
MatrixOperation.ScalarAddVector(TmpSparseMat.SparseColumnVectors[IdxSample], Phitheta.SparseColumnVectors[IdxSample], 1e-12f);
MatrixOperation.Log(TmpSparseMat.SparseColumnVectors[IdxSample]);
MatrixOperation.ElementwiseVectorMultiplyVector(TmpSparseMat.SparseColumnVectors[IdxSample], Xt.SparseColumnVectors[IdxSample]);
loss_pre.VectorValue[IdxSample] = (-1.0f)*TmpSparseMat.SparseColumnVectors[IdxSample].Sum();
if (IdxLayer == 0)
{
MatrixOperation.ScalarAddVector(TmpDenseMat.DenseMatrixValue[IdxSample], DNNRun.theta0.DenseMatrixValue[IdxSample], 1e-12f);
}
else
{
MatrixOperation.ScalarAddVector(TmpDenseMat.DenseMatrixValue[IdxSample], DNNRun.theta_pool[IdxLayer - 1].DenseMatrixValue[IdxSample], 1e-12f);
}
MatrixOperation.Log(TmpDenseMat.DenseMatrixValue[IdxSample]);
MatrixOperation.ElementwiseVectorMultiplyVector(TmpDenseMat.DenseMatrixValue[IdxSample], paramModel.b);
TmpDenseRowVec.VectorValue[IdxSample] = TmpDenseMat.DenseMatrixValue[IdxSample].Sum();
loss_pre.VectorValue[IdxSample] -= TmpDenseRowVec.VectorValue[IdxSample];
}
// Compute the hidden activation of the current layer
MatrixOperation.ScalarAddVector(TmpSparseMat.SparseColumnVectors[IdxSample], Phitheta.SparseColumnVectors[IdxSample], 1e-12f);
MatrixOperation.ElementwiseVectorDivideVector(
TmpSparseMat.SparseColumnVectors[IdxSample],
Xt.SparseColumnVectors[IdxSample],
TmpSparseMat.SparseColumnVectors[IdxSample]
);
MatrixOperation.MatrixTransposeMultiplyVector(
TmpDenseMat.DenseMatrixValue[IdxSample],
paramModel.Phi,
TmpSparseMat.SparseColumnVectors[IdxSample]
);
if (IdxLayer == 0)
{
MatrixOperation.ScalarAddVector(
NegGrad.DenseMatrixValue[IdxSample],
DNNRun.theta0.DenseMatrixValue[IdxSample],
1e-12f
);
}
else
{
MatrixOperation.ScalarAddVector(
NegGrad.DenseMatrixValue[IdxSample],
DNNRun.theta_pool[IdxLayer - 1].DenseMatrixValue[IdxSample],
1e-12f
);
}
MatrixOperation.ElementwiseVectorDivideVector(NegGrad.DenseMatrixValue[IdxSample], paramModel.b, NegGrad.DenseMatrixValue[IdxSample]);
MatrixOperation.VectorAddVector(NegGrad.DenseMatrixValue[IdxSample], TmpDenseMat.DenseMatrixValue[IdxSample]);
// Line search for the parameter T
if (paramModel.alpha >= 1)
{
T.VectorValue[IdxSample] *= (1.0f / eta);
} // only perform line search for alpha>=1 case (convex)
loss_post.VectorValue[IdxSample] = loss_pre.VectorValue[IdxSample];
if (IdxLayer == 0)
{
MatrixOperation.Log(LogTheta.DenseMatrixValue[IdxSample], DNNRun.theta0.DenseMatrixValue[IdxSample]);
}
else
{
MatrixOperation.Log(LogTheta.DenseMatrixValue[IdxSample], DNNRun.theta_pool[IdxLayer - 1].DenseMatrixValue[IdxSample]);
}
while (true)
{
MatrixOperation.ScalarMultiplyVector(DNNRun.theta_pool[IdxLayer].DenseMatrixValue[IdxSample],
NegGrad.DenseMatrixValue[IdxSample], T.VectorValue[IdxSample]);
MatrixOperation.VectorAddVector(DNNRun.theta_pool[IdxLayer].DenseMatrixValue[IdxSample],
LogTheta.DenseMatrixValue[IdxSample]);
MatrixOperation.ScalarAddVector(DNNRun.theta_pool[IdxLayer].DenseMatrixValue[IdxSample],
(-1.0f) * DNNRun.theta_pool[IdxLayer].DenseMatrixValue[IdxSample].MaxValue());
MatrixOperation.Exp(DNNRun.theta_pool[IdxLayer].DenseMatrixValue[IdxSample]);
MatrixOperation.ScalarMultiplyVector(DNNRun.theta_pool[IdxLayer].DenseMatrixValue[IdxSample],
(1.0f / DNNRun.theta_pool[IdxLayer].DenseMatrixValue[IdxSample].Sum()));
// Compute the loss after undfolding the current layer
MatrixOperation.MatrixMultiplyVector(Phitheta.SparseColumnVectors[IdxSample],
paramModel.Phi, DNNRun.theta_pool[IdxLayer].DenseMatrixValue[IdxSample]);
MatrixOperation.Log(Phitheta.SparseColumnVectors[IdxSample]);
loss_post.VectorValue[IdxSample]
= (-1.0f) * MatrixOperation.InnerProduct(Xt.SparseColumnVectors[IdxSample], Phitheta.SparseColumnVectors[IdxSample]);
MatrixOperation.ScalarAddVector(TmpDenseMat.DenseMatrixValue[IdxSample], DNNRun.theta_pool[IdxLayer].DenseMatrixValue[IdxSample], 1e-12f);
MatrixOperation.Log(TmpDenseMat.DenseMatrixValue[IdxSample]);
loss_post.VectorValue[IdxSample] -= MatrixOperation.InnerProduct(TmpDenseMat.DenseMatrixValue[IdxSample], paramModel.b);
if (IdxLayer == 0)
{
MatrixOperation.VectorSubtractVector(TmpDenseMat.DenseMatrixValue[IdxSample],
DNNRun.theta_pool[IdxLayer].DenseMatrixValue[IdxSample],
DNNRun.theta0.DenseMatrixValue[IdxSample]);
}
else
{
MatrixOperation.VectorSubtractVector(TmpDenseMat.DenseMatrixValue[IdxSample],
DNNRun.theta_pool[IdxLayer].DenseMatrixValue[IdxSample],
DNNRun.theta_pool[IdxLayer - 1].DenseMatrixValue[IdxSample]);
}
loss_gap.VectorValue[IdxSample] = loss_post.VectorValue[IdxSample] - loss_pre.VectorValue[IdxSample];
gradproj.VectorValue[IdxSample]
= (-1.0f) * MatrixOperation.InnerProduct(NegGrad.DenseMatrixValue[IdxSample],
TmpDenseMat.DenseMatrixValue[IdxSample]);
loss_gap_thresh.VectorValue[IdxSample] = gradproj.VectorValue[IdxSample]
+ (0.5f / T.VectorValue[IdxSample]) * (float)Math.Pow((double)TmpDenseMat.DenseMatrixValue[IdxSample].L1Norm(), 2.0);
if (loss_gap.VectorValue[IdxSample] > loss_gap_thresh.VectorValue[IdxSample] + 1e-12 && paramModel.alpha>=1)
{
T.VectorValue[IdxSample] *= eta;
} // Only perform line search for alpha>=1 case (convex)
else
{
DNNRun.T_pool.DenseMatrixValuePerRow[IdxLayer].VectorValue[IdxSample] = T.VectorValue[IdxSample];
break;
}
}
// Count the effective number of hidden layers
++DNNRun.nHidLayerEffective[IdxSample];
// stop MDA if termination condition holds
if (paramModel.flag_AdaptivenHidLayer)
{
if (IdxLayer == 0)
{
MatrixOperation.ElementwiseVectorDivideVector(
LLR.DenseMatrixValue[IdxSample],
DNNRun.theta_pool[IdxLayer].DenseMatrixValue[IdxSample],
DNNRun.theta0.DenseMatrixValue[IdxSample]
);
MatrixOperation.Log(LLR.DenseMatrixValue[IdxSample]);
}
else
{
MatrixOperation.ElementwiseVectorDivideVector(
LLR.DenseMatrixValue[IdxSample],
DNNRun.theta_pool[IdxLayer].DenseMatrixValue[IdxSample],
DNNRun.theta_pool[IdxLayer - 1].DenseMatrixValue[IdxSample]
);
MatrixOperation.Log(LLR.DenseMatrixValue[IdxSample]);
MatrixOperation.ResetVectorSparsePattern(
LLR.DenseMatrixValue[IdxSample],
DNNRun.theta_pool[IdxLayer].DenseMatrixValue[IdxSample]
);
}
KLDivergence = MatrixOperation.InnerProduct(
LLR.DenseMatrixValue[IdxSample],
DNNRun.theta_pool[IdxLayer].DenseMatrixValue[IdxSample]
);
if (KLDivergence < 1e-12f)
{
break;
}
}
}
// ---- Generate output ----
switch (OutputType)
{
case "softmaxCE":
MatrixOperation.MatrixMultiplyVector(
DNNRun.y.DenseMatrixValue[IdxSample],
paramModel.U,
DNNRun.theta_pool[DNNRun.nHidLayerEffective[IdxSample] - 1].DenseMatrixValue[IdxSample]
);
MatrixOperation.ScalarAddVector(DNNRun.y.DenseMatrixValue[IdxSample], To);
TmpDenseRowVec.VectorValue[IdxSample] = DNNRun.y.DenseMatrixValue[IdxSample].MaxValue();
MatrixOperation.ScalarAddVector(DNNRun.y.DenseMatrixValue[IdxSample], (-1.0f) * TmpDenseRowVec.VectorValue[IdxSample]);
MatrixOperation.Exp(DNNRun.y.DenseMatrixValue[IdxSample]);
TmpDenseRowVec.VectorValue[IdxSample] = DNNRun.y.DenseMatrixValue[IdxSample].Sum();
MatrixOperation.ScalarMultiplyVector(DNNRun.y.DenseMatrixValue[IdxSample], (1.0f) / TmpDenseRowVec.VectorValue[IdxSample]);
break;
case "unsupLDA":
// Will not compute the reconstructed input at forward activation to save time during training.
break;
case "linearQuad":
MatrixOperation.MatrixMultiplyVector(
DNNRun.y.DenseMatrixValue[IdxSample],
paramModel.U,
DNNRun.theta_pool[DNNRun.nHidLayerEffective[IdxSample] - 1].DenseMatrixValue[IdxSample]
);
break;
case "linearCE":
throw new Exception("linearCE not implemented.");
default:
throw new Exception("Unknown OutputType.");
}
});
}