public static DenseColumnVector ComputeInverseDocumentFrequency(SparseMatrix InputData)
{
Console.WriteLine("==================================================");
DenseColumnVector IDF = new DenseColumnVector(InputData.nRows);
int[] DocFreq = new int[InputData.nRows];
int Cnt = 0;
for (int IdxCol = 0; IdxCol < InputData.nCols; ++IdxCol)
{
int nNonzero = InputData.SparseColumnVectors[IdxCol].nNonzero;
int[] ColKey = InputData.SparseColumnVectors[IdxCol].Key;
for (int IdxRow = 0; IdxRow < nNonzero; ++IdxRow)
{
++DocFreq[ColKey[IdxRow]];
}
++Cnt;
if (Cnt % 10000 == 0)
{
Console.Write("Generating document frequency: {0}/{1}\r", Cnt, InputData.nCols);
}
}
Console.WriteLine("Generating document frequency: {0}/{1}", Cnt, InputData.nCols);
Cnt = 0;
for (int IdxRow = 0; IdxRow < InputData.nRows; ++IdxRow )
{
if (DocFreq[IdxRow] > 0)
{
IDF.VectorValue[IdxRow] = 1.0f / ((float)DocFreq[IdxRow]);
}
else
{
IDF.VectorValue[IdxRow] = 1.0f / ((float)InputData.nCols);
}
++Cnt;
if (Cnt % 10000 == 0)
{
Console.Write("Generating inverse document frquency: {0}/{1}\r", Cnt, InputData.nRows);
}
}
Console.WriteLine("Generating inverse document frquency: {0}/{1}", Cnt, InputData.nRows);
Console.WriteLine("==================================================");
return IDF;
}