public virtual FeatureVector CreateFeatureVector(string[] toks,
string[] pos,
string[] posA,
int small,
int large,
bool attR,
FeatureVector fv)
{
string att = "";
att = attR ? "RA" : "LA";
int dist = Math.Abs(large - small);
string distBool = "0";
if (dist > 10)
distBool = "10";
else if (dist > 5)
distBool = "5";
else if (dist > 4)
distBool = "4";
else if (dist > 3)
distBool = "3";
else if (dist > 2)
distBool = "2";
else if (dist > 1)
distBool = "1";
string attDist = "&" + att + "&" + distBool;
string pLeft = small > 0 ? pos[small - 1] : "STR";
string pRight = large < pos.Length - 1 ? pos[large + 1] : "END";
string pLeftRight = small < large - 1 ? pos[small + 1] : "MID";
string pRightLeft = large > small + 1 ? pos[large - 1] : "MID";
string pLeftA = small > 0 ? posA[small - 1] : "STR";
string pRightA = large < pos.Length - 1 ? posA[large + 1] : "END";
string pLeftRightA = small < large - 1 ? posA[small + 1] : "MID";
string pRightLeftA = large > small + 1 ? posA[large - 1] : "MID";
// feature posR posMid posL
for (int i = small + 1; i < large; i++)
{
string allPos = pos[small] + " " + pos[i] + " " + pos[large];
string allPosA = posA[small] + " " + posA[i] + " " + posA[large];
Add("PC=" + allPos + attDist, 1.0, fv);
Add("1PC=" + allPos, 1.0, fv);
Add("XPC=" + allPosA + attDist, 1.0, fv);
Add("X1PC=" + allPosA, 1.0, fv);
}
// feature posL-1 posL posR posR+1
Add("PT=" + pLeft + " " + pos[small] + " " + pos[large] + " " + pRight + attDist, 1.0, fv);
Add("PT1=" + pos[small] + " " + pos[large] + " " + pRight + attDist, 1.0, fv);
Add("PT2=" + pLeft + " " + pos[small] + " " + pos[large] + attDist, 1.0, fv);
Add("PT3=" + pLeft + " " + pos[large] + " " + pRight + attDist, 1.0, fv);
Add("PT4=" + pLeft + " " + pos[small] + " " + pRight + attDist, 1.0, fv);
Add("1PT=" + pLeft + " " + pos[small] + " " + pos[large] + " " + pRight, 1.0, fv);
Add("1PT1=" + pos[small] + " " + pos[large] + " " + pRight, 1.0, fv);
Add("1PT2=" + pLeft + " " + pos[small] + " " + pos[large], 1.0, fv);
Add("1PT3=" + pLeft + " " + pos[large] + " " + pRight, 1.0, fv);
Add("1PT4=" + pLeft + " " + pos[small] + " " + pRight, 1.0, fv);
Add("XPT=" + pLeftA + " " + posA[small] + " " + posA[large] + " " + pRightA + attDist, 1.0, fv);
Add("XPT1=" + posA[small] + " " + posA[large] + " " + pRightA + attDist, 1.0, fv);
Add("XPT2=" + pLeftA + " " + posA[small] + " " + posA[large] + attDist, 1.0, fv);
Add("XPT3=" + pLeftA + " " + posA[large] + " " + pRightA + attDist, 1.0, fv);
Add("XPT4=" + pLeftA + " " + posA[small] + " " + pRightA + attDist, 1.0, fv);
Add("X1PT=" + pLeftA + " " + posA[small] + " " + posA[large] + " " + pRightA, 1.0, fv);
Add("X1PT1=" + posA[small] + " " + posA[large] + " " + pRightA, 1.0, fv);
Add("X1PT2=" + pLeftA + " " + posA[small] + " " + posA[large], 1.0, fv);
Add("X1PT3=" + pLeftA + " " + posA[large] + " " + pRightA, 1.0, fv);
Add("X1PT4=" + pLeftA + " " + posA[small] + " " + pRightA, 1.0, fv);
// feature posL posL+1 posR-1 posR
Add("APT=" + pos[small] + " " + pLeftRight + " "
+ pRightLeft + " " + pos[large] + attDist, 1.0, fv);
Add("APT1=" + pos[small] + " " + pRightLeft + " " + pos[large] + attDist, 1.0, fv);
Add("APT2=" + pos[small] + " " + pLeftRight + " " + pos[large] + attDist, 1.0, fv);
Add("APT3=" + pLeftRight + " " + pRightLeft + " " + pos[large] + attDist, 1.0, fv);
Add("APT4=" + pos[small] + " " + pLeftRight + " " + pRightLeft + attDist, 1.0, fv);
Add("1APT=" + pos[small] + " " + pLeftRight + " "
+ pRightLeft + " " + pos[large], 1.0, fv);
Add("1APT1=" + pos[small] + " " + pRightLeft + " " + pos[large], 1.0, fv);
Add("1APT2=" + pos[small] + " " + pLeftRight + " " + pos[large], 1.0, fv);
Add("1APT3=" + pLeftRight + " " + pRightLeft + " " + pos[large], 1.0, fv);
Add("1APT4=" + pos[small] + " " + pLeftRight + " " + pRightLeft, 1.0, fv);
Add("XAPT=" + posA[small] + " " + pLeftRightA + " "
+ pRightLeftA + " " + posA[large] + attDist, 1.0, fv);
Add("XAPT1=" + posA[small] + " " + pRightLeftA + " " + posA[large] + attDist, 1.0, fv);
Add("XAPT2=" + posA[small] + " " + pLeftRightA + " " + posA[large] + attDist, 1.0, fv);
Add("XAPT3=" + pLeftRightA + " " + pRightLeftA + " " + posA[large] + attDist, 1.0, fv);
Add("XAPT4=" + posA[small] + " " + pLeftRightA + " " + pRightLeftA + attDist, 1.0, fv);
Add("X1APT=" + posA[small] + " " + pLeftRightA + " "
+ pRightLeftA + " " + posA[large], 1.0, fv);
Add("X1APT1=" + posA[small] + " " + pRightLeftA + " " + posA[large], 1.0, fv);
Add("X1APT2=" + posA[small] + " " + pLeftRightA + " " + posA[large], 1.0, fv);
Add("X1APT3=" + pLeftRightA + " " + pRightLeftA + " " + posA[large], 1.0, fv);
Add("X1APT4=" + posA[small] + " " + pLeftRightA + " " + pRightLeftA, 1.0, fv);
// feature posL-1 posL posR-1 posR
// feature posL posL+1 posR posR+1
Add("BPT=" + pLeft + " " + pos[small] + " " + pRightLeft + " " + pos[large] + attDist, 1.0, fv);
Add("1BPT=" + pLeft + " " + pos[small] + " " + pRightLeft + " " + pos[large], 1.0, fv);
Add("CPT=" + pos[small] + " " + pLeftRight + " " + pos[large] + " " + pRight + attDist, 1.0, fv);
Add("1CPT=" + pos[small] + " " + pLeftRight + " " + pos[large] + " " + pRight, 1.0, fv);
Add("XBPT=" + pLeftA + " " + posA[small] + " " + pRightLeftA + " " + posA[large] + attDist, 1.0, fv);
Add("X1BPT=" + pLeftA + " " + posA[small] + " " + pRightLeftA + " " + posA[large], 1.0, fv);
Add("XCPT=" + posA[small] + " " + pLeftRightA + " " + posA[large] + " " + pRightA + attDist, 1.0, fv);
Add("X1CPT=" + posA[small] + " " + pLeftRightA + " " + posA[large] + " " + pRightA, 1.0, fv);
string head = attR ? toks[small] : toks[large];
string headP = attR ? pos[small] : pos[large];
string child = attR ? toks[large] : toks[small];
string childP = attR ? pos[large] : pos[small];
string all = head + " " + headP + " " + child + " " + childP;
string hPos = headP + " " + child + " " + childP;
string cPos = head + " " + headP + " " + childP;
string hP = headP + " " + child;
string cP = head + " " + childP;
string oPos = headP + " " + childP;
string oLex = head + " " + child;
Add("A=" + all + attDist, 1.0, fv); //this
Add("B=" + hPos + attDist, 1.0, fv);
Add("C=" + cPos + attDist, 1.0, fv);
Add("D=" + hP + attDist, 1.0, fv);
Add("E=" + cP + attDist, 1.0, fv);
Add("F=" + oLex + attDist, 1.0, fv); //this
Add("G=" + oPos + attDist, 1.0, fv);
Add("H=" + head + " " + headP + attDist, 1.0, fv);
Add("I=" + headP + attDist, 1.0, fv);
Add("J=" + head + attDist, 1.0, fv); //this
Add("K=" + child + " " + childP + attDist, 1.0, fv);
Add("L=" + childP + attDist, 1.0, fv);
Add("M=" + child + attDist, 1.0, fv); //this
Add("AA=" + all, 1.0, fv); //this
Add("BB=" + hPos, 1.0, fv);
Add("CC=" + cPos, 1.0, fv);
Add("DD=" + hP, 1.0, fv);
Add("EE=" + cP, 1.0, fv);
Add("FF=" + oLex, 1.0, fv); //this
Add("GG=" + oPos, 1.0, fv);
Add("HH=" + head + " " + headP, 1.0, fv);
Add("II=" + headP, 1.0, fv);
Add("JJ=" + head, 1.0, fv); //this
Add("KK=" + child + " " + childP, 1.0, fv);
Add("LL=" + childP, 1.0, fv);
Add("MM=" + child, 1.0, fv); //this
if (head.Length > 5 || child.Length > 5)
{
int hL = head.Length;
int cL = child.Length;
head = hL > 5 ? head.SubstringWithIndex(0, 5) : head;
child = cL > 5 ? child.SubstringWithIndex(0, 5) : child;
all = head + " " + headP + " " + child + " " + childP;
hPos = headP + " " + child + " " + childP;
cPos = head + " " + headP + " " + childP;
hP = headP + " " + child;
cP = head + " " + childP;
oPos = headP + " " + childP;
oLex = head + " " + child;
Add("SA=" + all + attDist, 1.0, fv); //this
Add("SF=" + oLex + attDist, 1.0, fv); //this
Add("SAA=" + all, 1.0, fv); //this
Add("SFF=" + oLex, 1.0, fv); //this
if (cL > 5)
{
Add("SB=" + hPos + attDist, 1.0, fv);
Add("SD=" + hP + attDist, 1.0, fv);
Add("SK=" + child + " " + childP + attDist, 1.0, fv);
Add("SM=" + child + attDist, 1.0, fv); //this
Add("SBB=" + hPos, 1.0, fv);
Add("SDD=" + hP, 1.0, fv);
Add("SKK=" + child + " " + childP, 1.0, fv);
Add("SMM=" + child, 1.0, fv); //this
}
if (hL > 5)
{
Add("SC=" + cPos + attDist, 1.0, fv);
Add("SE=" + cP + attDist, 1.0, fv);
Add("SH=" + head + " " + headP + attDist, 1.0, fv);
Add("SJ=" + head + attDist, 1.0, fv); //this
Add("SCC=" + cPos, 1.0, fv);
Add("SEE=" + cP, 1.0, fv);
Add("SHH=" + head + " " + headP, 1.0, fv);
Add("SJJ=" + head, 1.0, fv); //this
}
}
return fv;
}