在孤立词语音识别中,最为简单有效的方法是采用DTW(Dynamic Time Warping,动态时间归整)算法,该算法基于动态规划(DP)的思想,解决了发音长短不一的模板匹配问题,是语音识别中出现较早、较为经典的一种算法,用于孤立词识别。HMM算法在训练阶段需要提供大量的语音数据,通过反复计算才能得到模型参数,而DTW算法的训练中几乎不需要额外的计算。所以在孤立词语音识别中,DTW算法仍然得到广泛的应用。
原理描述请查看:https://www.cnblogs.com/luxiaoxun/archive/2013/05/09/3069036.html
其中:newQd与newCd为两个一维等长序列,matlab实现如下:
function [newQd,newCd] = dtwDuijiao(Q,C,w)
% Q,C: two sequences
% w: window parameter
% if Q(i) is matched with C(j) then |i-j|<=w
% d: resulting distance
if nargin<3
w=Inf;
end
%取两匹配波最大值用于最后百分比统一
amplitudeOne = max(Q);
amplitudeTwo = max(C);
amplitude = max(amplitudeOne,amplitudeTwo);
ratioV = 100/amplitude;
%为了和c++统一,同比例缩放,去掉
%Q = Q * ratioV;
%C = C * ratioV;
%减去均值到转到同一线
minQ = min(Q);
for i =1:length(Q)
Q(i) = Q(i)-minQ;
end
minC = min(C);
for i =1:length(C)
C(i) = C(i)-minC;
end
%用余弦相似度做分母
cosSim = yuxian(Q,C);
%% initialize DTW
% DTW(i+1,j+1): The accumulated distance between
% Q(1) to Q(i) and C(1) to C(j)
% M: the length of sequence Q
% N: the length of sequence C
M = length(Q);
N = length(C);
w = max([w,abs(M-N)]);
count=1;
%距离
for i=1:1:M
for j=1:N
dd(i,j) = abs(Q(i)-C(j));
end
end
%% DTW algorithm
% d(i,j):The distance between Q(i) and C(j)
%[DTWv,distance] = dwtFunc(Q,C,w);
%DTW计算
%进行一些必要的初始化
for i = 1:M
for j =1:N
distance(i,j) = inf;
dtwpath(i,j)=0;
end
end
%动态规划求最小距离
DTW = zeros(M+1,N+1);
%DTW(2,2)=2*abs(Q(1)-C(1));
DTW(2,2)=abs(Q(1)-C(1));
for i = 3:1:M+1%r2
DTW(i,2)=DTW(i-1,2)+abs(Q(i-1)-C(1));
end
for j = 3:1:N+1%r2
DTW(2,j)=DTW(2,j-1)+abs(Q(1)-C(j-1));
end
for i = 3:M+1
for j = max([3,i-w+1]) : min([N+1,i+w+1])
%d = norm(Q(i-1)-C(j-1),2);%norm(B,2)=norm(B)
%dd(i-1,j-1) = norm(Q(i-1)-C(j-1));%只为测试使用
%dd(j,1)=d;
d = abs(Q(i-1)-C(j-1));%不用欧式,容易放大差距
dismin=[DTW(i-1,j-1)+d,DTW(i-1,j)+d,DTW(i,j-1)+d];
%dismin=[DTW(i-1,j-1)+2*d,DTW(i-1,j)+d,DTW(i,j-1)+d];
[DTWnowvalue,DTWnowIndex]=min(dismin); %褰撳墠鏈?煭璺緞鏂瑰悜
if(DTWnowIndex==1) %鎵惧嚭鏈?煭璺緞鐨勭储寮曚綅缃?
bestlocation{i,j}(1,1)=i-1;
bestlocation{i,j}(1,2)=j-1;
bestlocation{i,j}(1,3)=DTW(i-1,j-1);
else
if(DTWnowIndex==2)
bestlocation{i,j}(1,1)=i-1;
bestlocation{i,j}(1,2)=j;
bestlocation{i,j}(1,3)=DTW(i-1,j);
else
if(DTWnowIndex==3)
bestlocation{i,j}(1,1)=i;
bestlocation{i,j}(1,2)=j-1;
bestlocation{i,j}(1,3)=DTW(i,j-1);
end
end
end
count=count+1;
%DTW(i,j) = d+ min([DTW(i-1,j-1),DTW(i-1,j),DTW(i,j-1)]);55bf
%DTW(i,j) = min([DTW(i-1,j-1)+2*d,DTW(i-1,j)+d,DTW(i,j-1)+d]);
DTW(i,j) = min([DTW(i-1,j-1)+d,DTW(i-1,j)+d,DTW(i,j-1)+d]);
end
end
d = DTW(M+1,N+1);
%鎵撳嵃鏈?匠璺緞
mindistance=cell(M+1,N+1);
mindistance{M+1,N+1}=bestlocation{M+1,N+1};
Indx=mindistance{M+1,N+1}(1,1);
Indy=mindistance{M+1,N+1}(1,2);
tempIndx=Indx;
tempIndy=Indy;
tempflag(M+1,N+1)=1;
for xi=(M+1):(-1):1
for yi=(N+1):(-1):1
if(yi==tempIndy)&&(xi==tempIndx)
Indx=tempIndx;
Indy=tempIndy;
mindistance{xi,yi}=bestlocation{Indx,Indy};
tempflag(xi,yi)=1;
if(~isempty(mindistance{Indx,Indy}))
tempIndx=mindistance{Indx,Indy}(1,1);
tempIndy=mindistance{Indx,Indy}(1,2);
end
end
end
end
tempflag(:,[1])=[];
tempflag([1],:)=[];
[flagx,flagy]=find(tempflag==1);
if ( (flagx(1)-1)==0)
tempflag(1,1:flagy(1)-1)=1;
else
if ( (flagy(1)-1)==0)
tempflag(1:flagx(1)-1,1)=1;
end
end
06bf,通过压缩行列利用余弦相似度
%列压缩
% loc=1;
% ppy=-1;
% for xii=1:1:M %鎵緉ewC
% [px,py]=find(tempflag(xii,:)==1);
% numelV = numel(py)
%
% if( numelV>1)%同一行多个值
% if ppy==py(1)
% tempv = C(py(1)+1:(py(end)))%第一个与上一步同一列,剔除
% else
% tempv = C(py(1):py(end))%对角线进入这行
%
% end
% newC(loc)=sum(tempv)/length(tempv);%取均值
% loc=loc+1;
%
% ppy=py(end);
% continue;
%
% end
%
% if(ppy~=py)
% tempv = C(py)
% newC(loc)=sum(tempv)/length(tempv);
% loc=loc+1;
%
% end
%
%
% ppy=py(end);55bf
%
% end
%
%
% %行压缩
% hloc=1;
% phx=-1;
% for yii=1:1:N %鎵緉ewC
% [hx,hy]=find(tempflag(:,yii)==1);
% numelV = numel(hx)
%
% if( numelV>1)%同一行多个值
% if phx==hx(1)
% tempv = Q(hx(1)+1:(hx(end)))%第一个与上一步同一列,剔除
% else
% tempv = Q(hx(1):hx(end))%对角线进入这行
%
% end
% newQ(hloc)=sum(tempv)/length(tempv);%取均值
% hloc=hloc+1;
%
% phx=hx(end);
% continue;
%
% end
%
% if(phx~=hx)
% tempv = Q(hx)
% newQ(hloc)=sum(tempv)/length(tempv);
% hloc=hloc+1;
%
% end
%
%
% phx=hx(end);
%
% end
%考虑对角线路径为最相似路径
ii = 1;
steps = min(M,N);
oneNum = length(find(tempflag));%最短路径上1的个数
onepinghC = oneNum;
onepinghQ = oneNum;
sumZf = 0; %振幅相关性均值
sumZfList = [];%振幅和值序列
Qduijiao = [];
Cduijiao = [];
inde = 1;
inde2 = 1;
for ii = 1:1:steps
[px,py]=find(tempflag(ii,:)==1);
[hx,hy]=find(tempflag(:,py)==1);
if length(px)>1
onepinghQ = onepinghQ - length(px);
elseif length(hx)==1
if Q(ii)<0 & C(py)<0
%sumZfList(inde) = max(Q(ii),C(py))/min(Q(ii),C(py))
if abs(Q(ii)-C(py)) == 0
sumZfList(inde) = 1
else
sumZfList(inde) = 1/abs(Q(ii)-C(py))
end
elseif Q(ii) * C(py)<=0
if abs(Q(ii)-C(py)) == 0
sumZfList(inde) = 1
else
sumZfList(inde) = -1/abs(Q(ii) - C(py))
end
else % Q(px)>0 & C(py)>0
%sumZfList(inde) = min(Q(ii),C(py))/max(Q(ii),C(py))
if abs(Q(ii)-C(py)) == 0
sumZfList(inde) = 1
else
sumZfList(inde) = 1/abs(Q(ii) - C(py))
end
end
Qduijiao(inde2) = Q(ii);
Cduijiao(inde2) = C(py);
inde = inde + 1
inde2 = inde2 + 1
end
% if length(hx)>1
% onepinghC = onepinghC -length(hx);
% end
end
%波形最短路径可视化查看
Cr = reshape(C,1,length(C));
flagQC0 = [Cr;tempflag];
Q0 = [0;Q];
flagQC = [Q0 flagQC0];
%end波形最短路径可视化查看
sumZf = mean(sumZfList)
stepsMax = max(M,N)
for ii = 1:1:stepsMax
[hx,hy]=find(tempflag(:,ii)==1);
if length(hx)>1
onepinghC = onepinghC -length(hx);
end
end
%振幅比率
minQ = min(Q);
maxQ = max(Q);
zhenfuQ = maxQ - minQ;
minC = min(C);
maxC = max(C);
zhenfuC = maxC - minC;
ratio = min(abs(maxQ-minQ),abs(maxC-minC))/max(abs(maxQ-minQ),abs(maxC-minC));
sumZfAll = sumZf*(min(zhenfuC,zhenfuQ)/max(zhenfuC,zhenfuQ));
chazhiP = abs(maxQ-maxC);%正向波幅度差值
if chazhiP == 0
chazhiP = 1;
end
chazhiN = abs(minQ-minC);%负向波幅度差值
if chazhiN == 0
chazhiN = 1;
end
chazhiA= abs(zhenfuQ-zhenfuC);%整体幅度差值
if chazhiA == 0
chazhiA = 1;
end
numR = abs(M-N);%宽度差值
if numR == 0
numR = 1;
end
%newQ = onepinghQ/oneNum + 0.2*sumZf;%对角线元素比例和对应振幅差值相似性强度
%newQ = onepinghQ/oneNum * sumZfAll* (M/N);
%newQ = 10000*onepinghQ/oneNum/chazhiP/chazhiN/chazhiA/numR;
%newQd = onepinghQ/100;
%newQ = onepinghQ/oneNum;
%newQd = onepinghQ/oneNum;
%newQd = 1000000000/d*onepinghQ/oneNum;
newQd = d/(M+N);%DTW
newCd = onepinghC/oneNum;
%newQ = bestlocation{M+1,N+1}(1,3);
%newQ = Qduijiao;%对角线元素
%newC = Cduijiao;%对角线元素
%统一到C++的百分比输出
%newQd = 1 - newQd /100 * ratioV;
%newQd = 1 - newQd /(cosSim+1.01);
%newQd = 1 - newQd /power(20,cosSim*cosSim*cosSim*cosSim*cosSim*cosSim);
newQd = 1 - newQd /power(20,cosSim);
end
C++实现如下:
float simiMatchAllFunc::DTWDistanceFun(float *A, float *B, int r, int len)
{
//A 波形一
//B 波形二
//r 匹配限制范围
//len 匹配序列长度
int i, j;
float dist;
//int I = sizeof(A) / sizeof(float);
//int J = sizeof(A) / sizeof(float);
int I = len;
int J = len;
int istart, imax;
int r2 = r + ABS(I - J);/*匹配距离*/
double g1, g2, g3;
int pathsig = 1;/*路径的标志*/
float(*distance)[21] = new float[I][21];//DTW
memset(distance, 0.0, sizeof(float)*(21 * 21));
/*检查参数的有效性*/
if (I>DTWMAXNUM || J>DTWMAXNUM){
//printf("Too big number\n");
return -1.0;
}
/*进行一些必要的初始化*/
for (i = 0; i<I; i++){
for (j = 0; j<J; j++){
distance[i][j] = DTWVERYBIG;
}
}
/*动态规划求最小距离*/
distance[0][0] = (double)ABS(A[0] - B[0]);
for (i = 1; i < r2; i++){
distance[i][0] = distance[i - 1][0] + ABS(A[i] - B[0]);
}
for (j = 1; j < r2; j++){
distance[0][j] = distance[0][j - 1] + ABS(A[0] - B[j]);
}
for (j = 1; j<J; j++){
istart = j - r2;
if (j <= r2)
istart = 1;
imax = j + r2;
if (imax >= I)
imax = I - 1;
for (i = istart; i <= imax; i++){
g1 = distance[i - 1][j] + ABS(A[i] - B[j]);
g2 = distance[i - 1][j - 1] + ABS(A[i] - B[j]);
g3 = distance[i][j - 1] + ABS(A[i] - B[j]);
g2 = MIN(g1, g2);
g3 = MIN(g2, g3);
distance[i][j] = g3;
}
}
dist = distance[I - 1][J - 1] / ((double)(I + J));
delete[]distance;
return dist;
}
华青莲,方便自己,成长他人!
本站文章版权归原作者及原出处所有 。内容为作者个人观点, 并不代表本站赞同其观点和对其真实性负责。本站是一个个人学习交流的平台,并不用于任何商业目的,如果有任何问题,请及时联系我们,我们将根据著作权人的要求,立即更正或者删除有关内容。本站拥有对此声明的最终解释权。