diff --git a/HTKLib/HAdapt.c b/HTKLib/HAdapt.c index 09b3b3a..3090542 100644 --- a/HTKLib/HAdapt.c +++ b/HTKLib/HAdapt.c @@ -45,6 +45,9 @@ /* 2008-2009 University of Edinburgh */ /* Centre for Speech Technology Research */ /* */ +/* 2011 Idiap Research Institute */ +/* http://www.idiap.ch */ +/* */ /* All rights reserved. */ /* */ /* Redistribution and use in source and binary forms, with or */ @@ -110,6 +113,24 @@ char *hadapt_vc_id = "$Id: HAdapt.c,v 1.64 2011/06/16 04:15:50 uratec Exp $"; #define T_SWP 00200 /* Trace transform manipulation */ #define T_FRS 00400 /* Trace fisher ratio selection */ +/* Macros for brent optimization of VTLN. + Here GOLD is the default ratio by which successive intervals are magnified; + GLIMIT is the maximum magnification allowed for a parabolic-fit step. + SWAP swaps two variables.*/ +#define GOLD 1.618034 +#define GLIMIT 100.0 +#define TINY 1.0e-20 +#define SHFT(a,b,c,d) (a)=(b);(b)=(c);(c)=(d); /* swap function */ +#define SIGN(a,b) ((b) >= 0.0 ? fabs(a) : -fabs(a)) +#define FMAX(a,b) ((a) > (b) ? (a) : (b)) +/* Here ITMAX is the maximum allowed number of iterations; CGOLD is the golden ratio; ZEPS is + a small number that protects against trying to achieve fractional accuracy for a minimum that + happens to be exactly zero. */ +#define ITMAX 100 +#define CGOLD 0.3819660 +#define ZEPS 1.0e-10 +#define useDeterminant 1 + /* -------------- Structures to store adaptation info -------------------- */ typedef struct { @@ -177,6 +198,7 @@ static Boolean mllrDiagCov = FALSE; /* perform diagonal covariance adaptation static Boolean useSMAPcriterion = FALSE; /* perform SMAPLR and CSMAPLR adaptation */ static Boolean SaveAllSMAPXForms = FALSE; /* Save intermediate matrices estimated in SMAP */ static float sigmascale = 1.0; /* prior parameter for SMAP creterion*/ +static int UseDyn = 0; /* Generate the VTLN parameters separately for static and dynamic block */ static IntVec enableBlockAdapt = NULL; @@ -194,7 +216,7 @@ static Boolean staticSemiTied = FALSE; static Boolean initNuisanceFR = TRUE; static Boolean initNuisanceFRIdent = FALSE; static Boolean saveSemiTiedBinary = FALSE; - +static void Generate_Matrix(double alpha, DMatrix alphaMat, int featLen); /*------------------------------------------------------------------------*/ /* Support Routines for determining internal structures required */ /* Note: these only act on the transform NOT any parents. */ @@ -212,7 +234,7 @@ static Boolean AccAdaptMean(AdaptXForm *xform) static Boolean AccAdaptVar(AdaptXForm *xform) { XFormKind xkind = xform->xformSet->xkind; - if ( (xkind == CMLLR) || (xkind == MLLRCOV) || (mllrDiagCov)) + if ( (xkind == CMLLR) || (xkind == MLLRCOV) || (mllrDiagCov) || (xkind == VTLN)) return (TRUE); else return (FALSE); @@ -221,7 +243,7 @@ static Boolean AccAdaptVar(AdaptXForm *xform) static Boolean AccAdaptBaseTriMat(AdaptXForm *xform) { XFormKind xkind = xform->xformSet->xkind; - if ( (xkind == CMLLR) || (xkind == MLLRCOV)) + if ( (xkind == CMLLR) || (xkind == MLLRCOV) || (xkind == VTLN)) return (TRUE); else return (FALSE); @@ -239,7 +261,7 @@ Boolean HardAssign(AdaptXForm *xform) static Boolean StoreObsCache(AdaptXForm *xform) { XFormKind xkind = xform->xformSet->xkind; - if ((xkind == CMLLR) || (xkind == MLLRCOV) || (xkind == SEMIT)) + if ((xkind == CMLLR) || (xkind == MLLRCOV) || (xkind == SEMIT) || (xkind == VTLN)) return TRUE; else return FALSE; @@ -248,7 +270,7 @@ static Boolean StoreObsCache(AdaptXForm *xform) static Boolean StoreAdaptMean(AdaptXForm *xform) { XFormKind xkind = xform->xformSet->xkind; - if ((xkind == MLLRMEAN) || (xkind == MLLRCOV) || (xkind == CMLLR) || (xkind == SEMIT)) + if ((xkind == MLLRMEAN) || (xkind == MLLRCOV) || (xkind == CMLLR) || (xkind == SEMIT) || (xkind == VTLN)) return TRUE; else return FALSE; @@ -257,7 +279,7 @@ static Boolean StoreAdaptMean(AdaptXForm *xform) static Boolean StoreAdaptCov(AdaptXForm *xform) { XFormKind xkind = xform->xformSet->xkind; - if ((xkind == MLLRVAR) || (xkind == MLLRCOV) || (xkind == CMLLR) || (xkind == SEMIT)) + if ((xkind == MLLRVAR) || (xkind == MLLRCOV) || (xkind == CMLLR) || (xkind == SEMIT) || (xkind == VTLN)) return TRUE; else return FALSE; @@ -344,6 +366,15 @@ void CheckAdaptSetUp (HMMSet *hset, XFInfo *xfinfo) HError(9999,"CheckAdaptSetup: %d-th stream width (%d) and transform size (%d) is inconsistent",s,hset->swidth[s],vsize); } break; + case VTLN: + if (cmllrBlockSize[s]!=NULL) { + vsize = 0; + for (i=1; i<=IntVecSize(cmllrBlockSize[s]); i++) + vsize += cmllrBlockSize[s][i]; + if (vsize!=hset->swidth[s]) + HError(9999,"CheckAdaptSetup: %d-th stream width (%d) and transform size (%d) is inconsistent",s,hset->swidth[s],vsize); + } + break; case CMLLR: if (cmllrBlockSize[s]!=NULL) { vsize = 0; @@ -386,6 +417,14 @@ void CheckAdaptSetUp (HMMSet *hset, XFInfo *xfinfo) mllrCovBandWidth[s][i],i,s,mllrCovBlockSize[s][i]); } break; + case VTLN: + if (cmllrBlockSize[s]!=NULL) { + for (i=1; i<=IntVecSize(cmllrBandWidth[s]); i++) + if (cmllrBandWidth[s][i]>cmllrBlockSize[s][i]) + HError(9999,"CheckAdaptSetup: band width (%d) at %d-th block in %d-th stream of VTLN transform is larger than block size (%d)", + cmllrBandWidth[s][i],i,s,cmllrBlockSize[s][i]); + } + break; case CMLLR: if (cmllrBlockSize[s]!=NULL) { for (i=1; i<=IntVecSize(cmllrBandWidth[s]); i++) @@ -422,6 +461,13 @@ void CheckAdaptSetUp (HMMSet *hset, XFInfo *xfinfo) mllrCovSplitThresh[s] = 1000.0; } break; + case VTLN: + if (cmllrSplitThresh==NULL) { + xfinfo->cmllrSplitThresh = cmllrSplitThresh = CreateVector(&infoStack, hset->swidth[0]); + for (s=1; s<=hset->swidth[0]; s++) + cmllrSplitThresh[s] = 1000.0; + } + break; case CMLLR: if (cmllrSplitThresh==NULL) { xfinfo->cmllrSplitThresh = cmllrSplitThresh = CreateVector(&infoStack, hset->swidth[0]); @@ -850,6 +896,9 @@ static float GetSplitThresh(XFInfo *xfinfo, float *thresh) case MLLRCOV: thresh[s] = mllrCovSplitThresh[s]; break; + case VTLN: + thresh[s] = cmllrSplitThresh[s]; + break; case CMLLR: thresh[s] = cmllrSplitThresh[s]; break; @@ -885,6 +934,9 @@ static AdaptKind GetAdaptKind(XFInfo *xfinfo) case CMLLR: akind = cmllrAdaptKind; break; + case VTLN: + akind = cmllrAdaptKind; + break; } } return akind; @@ -912,6 +964,9 @@ static RegTree* GetRegTree(HMMSet *hset, XFInfo *xfinfo) case MLLRCOV: basename = mllrCovRegTree; break; + case VTLN: + basename = cmllrRegTree; + break; case CMLLR: basename = cmllrRegTree; break; @@ -949,6 +1004,9 @@ static BaseClass* GetBaseClass(HMMSet *hset, XFInfo *xfinfo) case CMLLR: basename = cmllrBaseClass; break; + case VTLN: + basename = cmllrBaseClass; + break; } } if (basename == NULL) { @@ -996,6 +1054,9 @@ static IntVec GetBlockSize(XFInfo *xfinfo, const int b) case MLLRCOV: blockSize = mllrCovBlockSize[s]; break; + case VTLN: + blockSize = cmllrBlockSize[s]; + break; case CMLLR: blockSize = cmllrBlockSize[s]; break; @@ -1042,6 +1103,9 @@ static IntVec GetBandWidth(XFInfo *xfinfo, const int b, IntVec blockSize) case MLLRCOV: bandWidth = mllrCovBandWidth[s]; break; + case VTLN: + bandWidth = cmllrBandWidth[s]; + break; case CMLLR: bandWidth = cmllrBandWidth[s]; break; @@ -1885,6 +1949,21 @@ static AccStruct *CreateAccStruct(MemHeap *x, XFInfo *xfinfo, int vsize, } } break; + case VTLN: + accs->K = (DVector *)New(x,(vsize+1)*sizeof(DVector)); + accs->G = (DMatrix *)New(x,(vsize+1)*sizeof(DMatrix)); + for (b=1,cnti=1;b<=IntVecSize(blockSize);b++) { + bsize = blockSize[b]; + if (useBias) dim = bsize+1; + else dim = bsize; + for (i=1;i<=bsize;i++,cnti++) { + accs->K[cnti] = CreateDVector(x,dim); + ZeroDVector(accs->K[cnti]); + accs->G[cnti] = CreateDMatrix(x,dim,dim); + ZeroDMatrix(accs->G[cnti]); + } + } + break; default : HError(999,"Transform kind not currently supported"); break; @@ -2147,8 +2226,20 @@ static Boolean ParseNode (XFInfo *xfinfo, RegNode *node, lclasses = CreateIntVec(&gstack,IntVecSize(classes)); ZeroIntVec(lclasses); if (node->numChild>0) { /* Not a terminal node */ - for (c=1;c<=node->numChild;c++) - if (ParseNode(xfinfo, node->child[c], rtree, lclasses, xfindex)) genXForm = TRUE; + printf("Type of adaptation: %d \n",xfinfo->outXKind); + if(xfinfo->outXKind == VTLN) /* For VTLN generate only for first stream */ + { + for (c=1;c<=node->numChild;c++) + { + if(node->stream ==1) + if (ParseNode(xfinfo, node->child[c], rtree, lclasses, xfindex)) genXForm = TRUE; + } + } + else + { + for (c=1;c<=node->numChild;c++) + if (ParseNode(xfinfo, node->child[c], rtree, lclasses, xfindex)) genXForm = TRUE; + } /* any of the children need a xform generate it */ if (genXForm) GenXForm(node,xfinfo,lclasses,xfindex); } @@ -2161,14 +2252,26 @@ static Boolean ParseNode (XFInfo *xfinfo, RegNode *node, } else { if (node->numChild>0) { /* Not a terminal node */ - for (c=1;c<=node->numChild;c++) - ParseNode(xfinfo, node->child[c], rtree, classes, xfindex); + printf("Type of adaptation: %d \n",xfinfo->outXKind); + if(xfinfo->outXKind == VTLN) /* For VTLN generate only for first stream */ + { + for (c=1;c<=node->numChild;c++) + { + if(node->stream ==1) + ParseNode(xfinfo, node->child[c], rtree, classes, xfindex); + } + } + else + { + for (c=1;c<=node->numChild;c++) + ParseNode(xfinfo, node->child[c], rtree, classes, xfindex); + } } else { /* Mark baseclasses for adaptation */ for (b=1;b<=IntVecSize(node->baseClasses);b++) classes[node->baseClasses[b]] = 1; } genXForm = TRUE; - } + } } return genXForm; @@ -2465,6 +2568,9 @@ static void AccMixPDFStats(HMMSet *hset, MixPDF *mp, AccStruct *accs) case SEMIT: /* The accstructure is not used for semi-tied estimation */ break; + case VTLN: + AccCMLLRPDFStats(mp,accs); + break; default : HError(999,"Transform kind not currently supported"); break; @@ -2484,6 +2590,7 @@ static void AccBaseClassStats(MixPDF *mp, AccStruct *accs) switch (accs->xkind) { case MLLRCOV: case CMLLR: + case VTLN: AccCMLLRBaseStats(mp,accs); break; default : @@ -2565,6 +2672,7 @@ static void AddPrior(AccStruct *accs, AdaptXForm *xform, int xfindex) switch (accs->xkind) { case MLLRMEAN: case CMLLR: + case VTLN: AddXFPrior(accs,xform,xfindex); break; default : @@ -2914,6 +3022,7 @@ static void ApplyCMLLRXForm2Vector(LinXForm *linXForm, Vector mean) double tmp; int i,j; int cnt,cnti,cntj; + DMatrix newA; /* Check dimensions */ const int size = linXForm->vecSize; @@ -2935,15 +3044,32 @@ static void ApplyCMLLRXForm2Vector(LinXForm *linXForm, Vector mean) bsize = linXForm->blockSize[b]; A = CreateMatrix(&gstack, bsize, bsize); MatInvert(linXForm->xform[b], A); - for (i=1;i<=bsize;i++,cnti++) { - tmp = 0.0; - for (j=1,cntj=cnt;j<=bsize;j++,cntj++) - tmp += A[i][j] * vec[cntj]; - mean[cnti] = tmp; - } - cnt += bsize; - FreeMatrix(&gstack, A); - } + + if((linXForm->xform[b][1][1] == 1) && (linXForm->xform[b][bsize][1] == 0))/* Crack to check if it is a VTLN matrix */ + { + newA = CreateDMatrix(&gstack, bsize, bsize); + Generate_Matrix(A[1][2], newA, bsize); + for (i=1;i<=bsize;i++,cnti++) { + tmp = 0.0; + for (j=1,cntj=cnt;j<=bsize;j++,cntj++) + tmp += newA[i][j] * vec[cntj]; + mean[cnti] = tmp; + } + cnt += bsize; + FreeDMatrix(&gstack, newA); + } + else + { + for (i=1;i<=bsize;i++,cnti++) { + tmp = 0.0; + for (j=1,cntj=cnt;j<=bsize;j++,cntj++) + tmp += A[i][j] * vec[cntj]; + mean[cnti] = tmp; + } + cnt += bsize; + } + FreeMatrix(&gstack, A); + } FreeVector(&gstack,vec); } @@ -3132,6 +3258,7 @@ static Vector CompFXForm(MixPDF *mp, Vector svec, AdaptXForm *xform, AInfo *ai, xformSet = xform->xformSet; switch (xformSet->xkind) { case CMLLR: + case VTLN: case MLLRCOV: case SEMIT: ApplyXForm2Vector(xformSet->xforms[numXf],svec); @@ -3212,6 +3339,7 @@ static void CompXForm(MixPDF *mp, AdaptXForm *xform, AInfo *ai, Boolean full) break; case SEMIT: case CMLLR: + case VTLN: if (full) { ApplyXForm2Cov(xformSet->xforms[numXf],&mp->cov,mp->ckind); /* FixFullGConst(mp,-CovDet(mp->cov.inv)); */ @@ -3484,13 +3612,13 @@ static double GetRowLike(DMatrix gmat,DVector kmat, DVector cofact, double occ, for (i=1;i<=size;i++) for (j=1;j<=size;j++) tvec[i] += w[j]*tmat[i][j]; - rowLike = 0; + rowLike = 0.0; for (i=1;i<=size;i++) - rowLike += (tvec[i] - 2*kmat[i])*w[i]; - det=0; + rowLike += (tvec[i] - 2.0*kmat[i])*w[i]; + det=0.0; for (i=1;i<=size2;i++) det += cofact[i]*w[i]; - rowLike = log(fabs(det))*occ - rowLike/2; + rowLike = log(fabs(det))*occ - rowLike/2.0; FreeDVector(&gstack,tvec); return rowLike; } @@ -3565,7 +3693,7 @@ static void InitCMLLRXForm(AccStruct *accs, DVector W, DVector bias) lK = CreateDVector(&gstack, ldim); iW = CreateDVector(&gstack, ldim); /* identity xform for log-likelihood check */ - iW[1]=1; iW[2]=0; + iW[1]=1.0; iW[2]=0.0; for (b=1,cnt=1;b<=IntVecSize(accs->blockSize);b++) { bsize = accs->blockSize[b]; if (uBias) dim = bsize+1; @@ -3583,7 +3711,7 @@ static void InitCMLLRXForm(AccStruct *accs, DVector W, DVector bias) lK[1] = accs->K[cnti][i]; } /* For diag case the cofactors are independent */ - cofact[1]=1; + cofact[1]=1.0; InvSVD(lG, u, w, v, invG); alpha = GetAlpha(invG,lK,accs->occ,cofact); tvec[1] = alpha * cofact[1] + lK[1]; @@ -3592,7 +3720,7 @@ static void InitCMLLRXForm(AccStruct *accs, DVector W, DVector bias) for (k=1;k<=ldim;k++) tW[1] += invG[1][k] * tvec[k]; if (uBias) { - tW[ldim]=0; + tW[ldim]=0.0; for (k=1;k<=ldim;k++) tW[ldim] += invG[ldim][k] * tvec[k]; } @@ -3642,7 +3770,7 @@ static void EstCMLLRXForm(AccStruct *accs, LinXForm *xf) InitCMLLRXForm(accs, iniA , bias); InvG = (DMatrix *)New(&gstack,sizeof(DMatrix)*(accs->dim+1)); - tdet = 0; + tdet = 0.0; for (b=1,cnt=1;b<=IntVecSize(accs->blockSize);b++) { bsize = accs->blockSize[b]; @@ -3686,13 +3814,13 @@ static void EstCMLLRXForm(AccStruct *accs, LinXForm *xf) if (trace&T_XFM) printf("Iteration %d (row %d): Old=%e, New=%e (diff=%e)\n",iter,cnti,likeOld,likeNew,likeNew-likeOld); if (likeNew>likeOld) { - det = 0; + det = 0.0; for (j=1;j<=bsize;j++) { A[i][j] = W[j]; det += cofact[j]*W[j]; } if (uBias) { - bias[cnti] = 0; + bias[cnti] = 0.0; bias[cnti] += W[dim]; } } @@ -3715,10 +3843,331 @@ static void EstCMLLRXForm(AccStruct *accs, LinXForm *xf) if (uBias) { for (i=1;i<=xf->vecSize;i++) xf->bias[i] = bias[i]; } - xf->det = tdet*2; + xf->det = tdet*2.0; FreeDVector(&gstack, iniA); } +/* Generate the VTLN alpha matrix for a particular value of alpha + Matrix is generated using a recursive formula S(k-1,l-1) + \alpha{[S(k,l-1) - S(k-1,l)]}*/ +static void Generate_Matrix(double alpha, DMatrix alphaMat, int featLen) +{ + printf("\nInside Generate_Matrix:%e\n",alpha); + if(alphaMat == NULL) + { + printf("Error in intializing the matrix\n"); + return; + } + int loop, loop1; + + alphaMat[1][1] = 1; + for (loop=2;loop<=featLen;loop++) + { + alphaMat[1][loop] = pow(alpha,(loop-1)); + alphaMat[loop][1] = 0; + } + for (loop1=2;loop1<=featLen;loop1++) + for (loop=2;loop<=featLen;loop++) + { + alphaMat[loop1][loop] = alphaMat[loop1-1][loop-1] +\ + alpha*(alphaMat[loop1][loop-1]- alphaMat[loop1-1][loop]); + } +} + + + + /* Loglikelihood calculation for a particular value of alpha*/ +static double LogLihood(double alpha, AccStruct* acc, int featLen, int blk_no) +{ + printf("Inside Likelihood calcul:%e\n",alpha); + DMatrix alphaMat; + DMatrix alphaMat1; + double likeLihood = 0.0; + double detVal; + int i, j, k, bsize, BLOCK_SIZE, act_size,act_start, bsize1; + DVector tvec; + DMatrix tmat; + DVector kmat; + /*Generate the alpha matrix*/ + alphaMat = CreateDMatrix(&gstack,featLen,featLen); + Generate_Matrix(alpha, alphaMat,featLen); + + /* Work around for problems with higher order features. Estimate warping factors using lower order features of dimensionality 13. Change this to featLen to suppress this functionality */ + int vtln_len = 13; /* featLen; */ + alphaMat1 = CreateDMatrix(&gstack,vtln_len,vtln_len); + Generate_Matrix(alpha, alphaMat1,vtln_len); + + int rsize,csize,bn; + bsize = featLen; /* 13; */ + BLOCK_SIZE = IntVecSize(acc->blockSize); /* 3; */ + act_start = blk_no*bsize+1; + act_size = act_start+bsize-1; + printf ("Feature length inside LogLihood: %d\n",featLen); + printf("UseDynamic features: %d, Block #:%d, Block Start:%d, Block_End:%d Total # of blocks: %d\n",UseDyn, blk_no,act_start, act_size,BLOCK_SIZE); + +/* This iterates each block and then each row // By default assuming 3 blocks... */ +if(UseDyn){ + for (bn =0;bn< BLOCK_SIZE;bn++) { + act_start = bn*bsize+1; + act_size = act_start+bsize-1; + for (k=act_start;k<=act_size;k++) { + printf("Inside loop:%d\n",k); + kmat = acc->K[k]; + tvec = CreateDVector(&gstack,bsize); + tmat = CreateDMatrix(&gstack,bsize,bsize); + rsize = NumDRows(acc->G[k]); + csize = NumDCols(acc->G[k]); + printf("Rows: %d, Cols:%d\n",rsize,csize); + Tri2DMat(acc->G[k],tmat); + ZeroDVector(tvec); + for (i=1;i<=bsize;i++) + for (j=1;j<=bsize;j++) + tvec[i] += alphaMat[k-act_start+1][j]*tmat[i][j]; + for (i=1;i<=bsize;i++) + likeLihood += (tvec[i] - 2*kmat[i])*alphaMat[k-act_start+1][i]; + FreeDMatrix(&gstack,tmat); /* Added might give error */ + FreeDVector(&gstack,tvec); + } + } +}else { + printf("Original block size: %d\n",bsize); + bsize1 = bsize; + bsize = vtln_len; + printf("New block size: %d\n",bsize); + for (k=1;k<=bsize;k++) { + printf("Inside loop:%d\n",k); + kmat = acc->K[k]; + tvec = CreateDVector(&gstack,bsize); + tmat = CreateDMatrix(&gstack,bsize1,bsize1); + rsize = bsize; /* NumDRows(acc->G[k]); */ + csize = bsize; /* NumDCols(acc->G[k]); */ + printf("Rows: %d, Cols:%d\n",rsize,csize); + Tri2DMat(acc->G[k],tmat); + ZeroDVector(tvec); + for (i=1;i<=bsize;i++) + for (j=1;j<=bsize;j++) + tvec[i] += alphaMat[k][j]*tmat[i][j]; + for (i=1;i<=bsize;i++) + likeLihood += (tvec[i] - 2*kmat[i])*alphaMat[k][i]; + FreeDMatrix(&gstack,tmat); /* Added might give error */ + FreeDVector(&gstack,tvec); + } +} + + likeLihood /= 2; + + if (useDeterminant) + { + /*Using Jacobian find the Determinant of the matrix */ + detVal = log(fabs(DMatDet(alphaMat1))); + printf("Determinant calculated: %f on number of frames:%f and Likelihood:%f\n", detVal,acc->occ,likeLihood); + likeLihood -= acc->occ * detVal; + } + + FreeDMatrix(&gstack,alphaMat); + printf("Loglihood End: %f\n",likeLihood); + return (likeLihood); +} + + /* Given a function func, and given distinct initial points ax and bx, this routine searches in + the downhill direction (defined by the function as evaluated at the initial points) and returns + new points ax, bx, cx that bracket a minimum of the function. */ + +static void BracketLL(double ax, double bx, double* mid, AccStruct* acc, int dim, int blk_no) +{ + printf("\nInside BracketLL\n"); + double ulim,u,r,q,uxLL,dum; + double cx, axLL, bxLL, cxLL; + + axLL = LogLihood(ax, acc, dim, blk_no); + bxLL = LogLihood(bx, acc, dim, blk_no); + + /* Switch roles of a and b so that we can go downhill in the direction from a to b. */ + if (bxLL > axLL) { + SHFT(dum,ax,bx,dum) + SHFT(dum,bxLL,axLL,dum) + } + + /* First guess for cx. */ + cx=bx+GOLD*(bx-ax); + cxLL=LogLihood(cx,acc, dim, blk_no); + + /* Keep returning here until we bracket. */ + while (bxLL > cxLL) { + /* Compute u by parabolic extrapolation from a, b, c. TINY is used to prevent any possible division by zero. */ + r=(bx-ax)*(bxLL-cxLL); + q=(bx-cx)*(bxLL-axLL); + u=bx-((bx-cx)*q-(bx-ax)*r)/(2.0*SIGN(FMAX(fabs(q-r),TINY),q-r)); + ulim=bx+GLIMIT*(cx-bx); + + /* We won<80><99>t go farther than this. Test various possibilities: + Parabolic u is between b and c: try it. */ + if ((bx-u)*(u-cx) > 0.0) { + uxLL=LogLihood(u, acc, dim, blk_no); + /* Got a minimum between b and c.*/ + if (uxLL < cxLL) { + ax=bx; + bx=u; + axLL=bxLL; + bxLL=uxLL; + *mid = cx; + return; + /* Got a minimum between between a and u.*/ + } else if (uxLL > bxLL) { + cx=u; + cxLL=uxLL; + *mid = cx; + return; + } + + /* Parabolic fit was no use. Use default magnification.*/ + u=(cx)+GOLD*(cx-bx); + uxLL=LogLihood(u, acc,dim, blk_no); + /* Parabolic fit is between c and its allowed limit.*/ + + } else if ((u-ulim)*(ulim-cx) >= 0.0) { + u=ulim; + uxLL=LogLihood(u, acc,dim, blk_no); + /* Reject parabolic u, use default magnification.*/ + } else { + u=cx+GOLD*(cx-bx); + uxLL=LogLihood(u, acc, dim, blk_no); + } + /* Eliminate oldest point and continue. */ + SHFT(ax,bx,cx,u) + SHFT(axLL,bxLL,cxLL,uxLL) + } +} + +/* Given a function f, and given a bracketing triplet of abscissas ax, bx, cx (such that bx is + between ax and cx, and f(bx) is less than both f(ax) and f(cx)), this routine isolates + the minimum to a fractional precision of about tol using Brent’s method. The abscissa of + the minimum is returned as xmin, and the minimum function value is returned as brent, the + returned function value. */ + +static float BrentsOpt(double ax, double bx, double cx, double tol, double* xmin, AccStruct* acc, int dim, int blk_no) +{ + + printf("\nInside BrentsOpt\n"); + int iter; + double a,b,d=0.0,etemp,uxLL,vxLL,wxLL,xxLL,p,q,r,tol1,tol2,u,v,w,x,xm; + + /* This will be the distance moved on */ + double e=0.0; /* the step before last.*/ + + /* a and b must be in ascending order, */ + a=(ax < cx ? ax : cx); + /* but input abscissas need not be. */ + b=(ax > cx ? ax : cx); + /* Initializations... */ + x=w=v=bx; + wxLL=vxLL=xxLL=LogLihood(x, acc, dim, blk_no); + + /* Main program loop. */ + for (iter=1;iter<=ITMAX;iter++) { + xm=0.5*(a+b); + tol2=2.0*(tol1=tol*fabs(x)+ZEPS); + /* Test for done here. */ + if (fabs(x-xm) <= (tol2-0.5*(b-a))) { + *xmin = x; + return xxLL; + } + + /* Construct a trial parabolic fit.*/ + if (fabs(e) > tol1) { + r=(x-w)*(xxLL-vxLL); + q=(x-v)*(xxLL-wxLL); + p=(x-v)*q-(x-w)*r; + q=2.0*(q-r); + if (q > 0.0) p = -p; + q=fabs(q); + etemp=e; + e=d; + if (fabs(p) >= fabs(0.5*q*etemp) || p <= q*(a-x) || p >= q*(b-x)) + d=CGOLD*(e=(x >= xm ? a-x : b-x)); + + /* The above conditions determine the acceptability of the parabolic fit. Here we + take the golden section step into the larger of the two segments. */ + else { + /* Take the parabolic step. */ + d=p/q; + u=x+d; + if (u-a < tol2 || b-u < tol2) + d=SIGN(tol1,xm-x); + } + } else { + d=CGOLD*(e=(x >= xm ? a-x : b-x)); + } + u=(fabs(d) >= tol1 ? x+d : x+SIGN(tol1,d)); + uxLL=LogLihood(u, acc, dim, blk_no); + /* This is the one function evaluation per iteration. + Now decide what to do with our function evaluation. */ + if (uxLL <= xxLL) { + if (u >= x) a=x; else b=x; + /* Housekeeping follows: */ + SHFT(v,w,x,u) + SHFT(vxLL,wxLL,xxLL,uxLL) + } else { + if (u < x) a=u; else b=u; + if (uxLL <= wxLL || w == x) { + v=w; + w=u; + vxLL=wxLL; + wxLL=uxLL; + } else if (uxLL <= vxLL || v == x || v == w) { + v=u; + vxLL=uxLL; + } + /* Done with housekeeping. Back for another iteration.*/ + } + } + printf("Too many iterations in brent"); + return 10e10; +} + +static void EstVTLNXForm(AccStruct *accs, LinXForm *xf) +{ + int i,j; + + printf("\nInside EstVTLNXForm\n"); + int cnt, b, bsize, blk_no; + + + double alpha=0.0, bestLL; + + DMatrix A; + double tol = 1.0e-07; + double llim = -0.1, ulim = 0.1, mlim = 0; + + + /*Might have to run this code for each block/stream in the input. */ + for (b=1,cnt=1;b<=IntVecSize(accs->blockSize);b++) { + bsize = accs->blockSize[b]; + A = CreateDMatrix(&gstack, bsize, bsize); + blk_no = b-1; + llim = -0.1; + mlim = 0; + ulim = 0.1; + /* Find the bracket/limits of alpha for the Likelihood function */ +/* BracketLL(llim, ulim, &mlim, accs, bsize, blk_no);*/ + if (trace&T_XFM) + printf("Brackets for the Auxillary func: %f, %f and %f\n",llim, mlim, ulim); + bestLL = BrentsOpt(llim, mlim, ulim, tol, &alpha, accs, bsize, blk_no); + if (trace&T_XFM) + printf("Best value for alpha: %f\n",alpha); + + /*Assign the transform components */ + xf->bias = NULL; + + Generate_Matrix(alpha, A, bsize); + xf->det = DMatDet(A); + for (i=1;i<=bsize;i++) + for (j=1;j<=bsize;j++) + xf->xform[b][i][j] = A[i][j]; + ZeroDMatrix(A); + FreeDMatrix(&gstack,A); + } +} + static void AccMixPDFSemiTiedStats(HMMSet *hset,MixPDF *mp, AccStruct *accs) { VaAcc *va; @@ -4620,6 +5069,10 @@ static void EstXForm(AccStruct *accs, XFInfo *xfinfo, IntVec classes) case MLLRCOV: EstMLLRCovXForm(accs, xf); break; + case VTLN: + printf("\nVTLN estimation\n"); + EstVTLNXForm(accs, xf); + break; case CMLLR: EstCMLLRXForm(accs, xf); break; @@ -4737,16 +5190,21 @@ static Boolean GenClassXForm(XFInfo *xfinfo) printf(")\n"); if ((accs->dim>0) && ((xform->xformSet->xkind==SEMIT) || (accs->occ > thresh[s]))) { - EstXForm(accs,xfinfo,classes); - xform->xformWgts.assign[b] = xform->xformSet->numXForms; - if (mllrDiagCov) - diagCovXForm->xformWgts.assign[b] = diagCovXForm->xformSet->numXForms; + printf("To check XForm kind: %d, Stream: %d\n",xfinfo->outXKind,s); + if( ((xfinfo->outXKind==5)&&(s==1)) || (xfinfo->outXKind!=5)) + { + EstXForm(accs,xfinfo,classes); + xform->xformWgts.assign[b] = xform->xformSet->numXForms; + } + if (mllrDiagCov) + diagCovXForm->xformWgts.assign[b] = diagCovXForm->xformSet->numXForms; } else { - xform->xformWgts.assign[b] = 0; - } - Dispose(&gstack,accs); - } + xform->xformWgts.assign[b] = 0; + } + Dispose(&gstack,accs); + + } } return TRUE; } @@ -5136,7 +5594,8 @@ void ApplyCompXForm(MixPDF *mp, AdaptXForm *xform, Boolean full) for (ax=xform,full=FALSE; (ax!=NULL) && (full==FALSE); ax=ax->parentXForm) { full |= ((ax->xformSet->xkind==MLLRCOV) || (ax->xformSet->xkind==CMLLR) - || (ax->xformSet->xkind==SEMIT)); + || (ax->xformSet->xkind==SEMIT) + || (ax->xformSet->xkind==VTLN)); } } @@ -5166,7 +5625,7 @@ Vector ApplyCompFXForm(MixPDF *mp, Vector svec, AdaptXForm *xform, LogFloat *det vSize=VectorSize(mp->mean); if (xform==NULL || SpaceOrder(svec)==0 || SpaceOrder(svec)!=vSize) { - *det = 0; + *det = 0.0; return svec; } else { @@ -5217,7 +5676,8 @@ void ApplyHMMSetXForm(HMMSet *hset, AdaptXForm *xform, Boolean full) for (ax=xform,full=FALSE; (ax!=NULL) && (full==FALSE); ax=ax->parentXForm) { full |= ((ax->xformSet->xkind==MLLRCOV) || (ax->xformSet->xkind==CMLLR) - || (ax->xformSet->xkind==SEMIT)); + || (ax->xformSet->xkind==SEMIT) + || (ax->xformSet->xkind==VTLN)); } } diff --git a/HTKLib/HModel.c b/HTKLib/HModel.c index ee3049b..6095d8d 100644 --- a/HTKLib/HModel.c +++ b/HTKLib/HModel.c @@ -46,6 +46,9 @@ /* Interdisciplinary Graduate School of */ /* Science and Engineering */ /* */ +/* 2011 Idiap Research Institute */ +/* http://www.idiap.ch */ +/* */ /* All rights reserved. */ /* */ /* Redistribution and use in source and binary forms, with or */ @@ -883,6 +886,7 @@ XFormKind Str2XFormKind(char *str) else if (!(strcmp(str,"MLLRVAR"))) xkind = MLLRVAR; else if (!(strcmp(str,"CMLLR"))) xkind = CMLLR; else if (!(strcmp(str,"SEMIT"))) xkind = SEMIT; + else if (!(strcmp(str,"VTLN"))) xkind = VTLN; else HError(999,"Unknown XForm Class kind"); return xkind; } @@ -6131,7 +6135,7 @@ char *CovKind2Str(CovKind ckind, char *buf) /* EXPORT-> XFormKind2Str: Return string representation of enum XFormKind */ char *XFormKind2Str(XFormKind xkind, char *buf) { - static char *xformmap[] = {"MLLRMEAN","MLLRCOV", "MLLRVAR", "CMLLR", "SEMIT"}; + static char *xformmap[] = {"MLLRMEAN","MLLRCOV", "MLLRVAR", "CMLLR", "SEMIT", "VTLN"}; return strcpy(buf,xformmap[xkind]); } diff --git a/HTKLib/HModel.h b/HTKLib/HModel.h index 6229083..edc4bf3 100644 --- a/HTKLib/HModel.h +++ b/HTKLib/HModel.h @@ -46,6 +46,9 @@ /* Interdisciplinary Graduate School of */ /* Science and Engineering */ /* */ +/* 2011 Idiap Research Institute */ +/* http://www.idiap.ch */ +/* */ /* All rights reserved. */ /* */ /* Redistribution and use in source and binary forms, with or */ @@ -209,7 +212,7 @@ typedef HMMDef * HLink; /* --------------------- Enumerated Types -------------------- */ -enum _XFormKind {MLLRMEAN, MLLRCOV, MLLRVAR, CMLLR, SEMIT}; +enum _XFormKind {MLLRMEAN, MLLRCOV, MLLRVAR, CMLLR, SEMIT, VTLN}; typedef enum _XFormKind XFormKind; enum _AdaptKind {TREE, BASE}; diff --git a/HTKTools/HERest.c b/HTKTools/HERest.c index 3f8d8c8..106d075 100644 --- a/HTKTools/HERest.c +++ b/HTKTools/HERest.c @@ -46,6 +46,9 @@ /* Interdisciplinary Graduate School of */ /* Science and Engineering */ /* */ +/* 2011 Idiap Research Institute */ +/* http://www.idiap.ch */ +/* */ /* All rights reserved. */ /* */ /* Redistribution and use in source and binary forms, with or */ @@ -406,8 +409,8 @@ void CheckUpdateSetUp() xf = xfInfo_hmm.paXForm; if ((xfInfo_hmm.paXForm != NULL) && !(uFlags_hmm&UPXFORM)) { while (xf != NULL) { - if ((xf->xformSet->xkind != CMLLR) && (xf->xformSet->xkind != SEMIT)) - HError(999,"SAT only supported with SEMIT/CMLLR transforms"); + if ((xf->xformSet->xkind != CMLLR) && (xf->xformSet->xkind != SEMIT) && (xf->xformSet->xkind != VTLN)) + HError(999,"SAT only supported with SEMIT/CMLLR/VTLN transforms"); xf = xf->parentXForm; } } @@ -415,8 +418,8 @@ void CheckUpdateSetUp() xf = xfInfo_dur.paXForm; if ((xfInfo_dur.paXForm != NULL) && !(uFlags_dur&UPXFORM)) { while (xf != NULL) { - if ((xf->xformSet->xkind != CMLLR) && (xf->xformSet->xkind != SEMIT)) - HError(999,"SAT only supported with SEMIT/CMLLR transforms"); + if ((xf->xformSet->xkind != CMLLR) && (xf->xformSet->xkind != SEMIT) && (xf->xformSet->xkind != VTLN)) + HError(999,"SAT only supported with SEMIT/CMLLR/VTLN transforms"); xf = xf->parentXForm; } } @@ -750,7 +753,7 @@ int main(int argc, char *argv[]) if (UpdateSpkrStats(&hset,&xfInfo_hmm,datafn)) { spUtt=0; } - if (up_durLoaded) { + if ((up_durLoaded) && (xfInfo_dur.outXKind != VTLN)){ /* This should not be called for VTLN */ if (UpdateSpkrStats(&dset,&xfInfo_dur,datafn) && xfInfo_dur.useInXForm) ResetDMMPreComps(&dset); /* reset all pre-calculated duration probs */ }