Compare two voices - good tool?
Community Forums/General Help/Compare two voices - good tool?
| ||
Hi all, I have a little project on the side going on, and for that I would have to compare two voice audio sound files, to find out if they are really different voices, or if there is a match. I've been traversing the web for a while now, but I couldn't find anything useful (other than expensive forensic investigation tools that cost a fortune). Does anyone know of a free or low-cost tool to do this with? Any advise or pointers are appreciated. |
| ||
Sounds like you'll have to extract the phoneme samples, sample them in the frequency domain with dft, and do a comparison there. Try googling for 'Discreet Fourier transform' and 'dynamic time warp'. I don't know of a method of extracting and normalizing phoneme samples from spoken audio automatically, you'll probably have to do it by hand. |
| ||
I think you have your work cut out there. Using an average comparison on 2 ("3 dimensional") DFT patterns in a quiet environment should be easy enough, but any noise can interfere massively. (by 3d i mean several dft "snapshots" over a time period.) Here is a public DFT/FFT algorythm i used in a recent project for tone / note recognition. public class DFT { public static final int RECTANGULAR = 0; public static final int HANN = 1; public static final int HAMMING = 2; public static final int BLACKMANN = 3; public static final double[] forwardMagnitude(double[] input) { int N = input.length; double[] mag = new double[N]; double[] c = new double[N]; double[] s = new double[N]; double twoPi = 2*Math.PI; for(int i=0; i<N; i++) { for(int j=0; j<N; j++) { c[i] += input[j]*Math.cos(i*j*twoPi/N); s[i] -= input[j]*Math.sin(i*j*twoPi/N); } c[i]/=N; s[i]/=N; mag[i]=Math.sqrt(c[i]*c[i]+s[i]*s[i]); } return mag; } public static final double[] window(double[] input, int type) { int N = input.length; double[] windowed = new double[N]; switch(type) { case RECTANGULAR: return input; case HANN: for(int n=0; n<N; n++) { windowed[n] = 0.5*(1-Math.cos(2*Math.PI*n/(N-1))) * input[n]; } break; case HAMMING: for (int n = 0; n < input.length; n++) { windowed[n] = (0.53836-0.46164*Math.cos(Tools.TWO_PI*n/(N-1))) * input[n]; } case BLACKMANN: for(int n=0; n<N; n++) { windowed[n] = (0.42-0.5*Math.cos(2*Math.PI*n/(N-1))+0.08*Math.cos(4*Math.PI*n/(N-1)) ) * input[n]; } break; } return windowed; } } public class Complex { private double real; private double imag; public Complex(double real, double imag) { this.real = real; this.imag = imag; } public String toString() { if(imag<0) { return new String(real+" - i"+Math.abs(imag)); } else { return new String(real+" + i"+imag); } } public static final Complex multiply(Complex c1, Complex c2) { double re = c1.real*c2.real - c1.imag*c2.imag; double im = c1.real*c2.imag + c1.imag*c2.real; return new Complex(re, im); } public static Complex scale(Complex c, double x) { return new Complex(c.real*x, c.imag*x); } public static final Complex add(Complex c1, Complex c2) { double re = c1.real + c2.real; double im = c1.imag + c2.imag; return new Complex(re, im); } public static final Complex substract(Complex c1, Complex c2) { double re = c1.real - c2.real; double im = c1.imag - c2.imag; return new Complex(re, im); } public static Complex conjugate(Complex c) { return new Complex(c.real, -c.imag); } public static double abs(Complex c) { return Math.sqrt(c.real*c.real+c.imag*c.imag); } public static double[] abs(Complex[] c) { int N = c.length; double[] mag = new double[N]; for(int i=0; i<N; i++) { mag[i] = Math.sqrt(c[i].real*c[i].real+c[i].imag*c[i].imag); } return mag; } public static final Complex nthRootOfUnity(int n, int N) { double re = Math.cos(2*Math.PI*n/N); double im = Math.sin(2*Math.PI*n/N); return new Complex(re, im); } } /* * http://www.cs.princeton.edu/introcs/97data/FFT.java.html * Should be optimized. w_n may be looked up from a table etc. * * Java DSP book */ public class FFT { // private static int length; private static double[] r_data = null; private static double[] i_data = null; private static boolean forward = true; /* private void computeRootArray(int N) { Complex[] w = new Complex[N/2]; for(int i=0; i<N/2; i++) { w[i] = Complex.nthRootOfUnity(i, N); } } */ public static Complex[] forward(Complex[] x) { int N = x.length; if( N == 1 ) { return new Complex[] { x[0] }; } else { Complex[] even = new Complex[N/2]; Complex[] odd = new Complex[N/2]; for(int i=0; i<N/2; i++) { even[i]=x[2*i]; odd[i]=x[2*i+1]; } Complex[] left = forward(even); Complex[] right = forward(odd); Complex[] c = new Complex[N]; for(int n=0; n<N/2; n++) { double nth = -2*n*Math.PI/N; Complex wn = new Complex(Math.cos(nth), Math.sin(nth)); c[n] = Complex.add(left[n], Complex.multiply(wn, right[n])); c[n+N/2] = Complex.substract(left[n], Complex.multiply(wn, right[n])); } return c; } } public static int bitReverse(int index) { // if length = 8 index goes from 0 to 7 // to write 8 we need log2(8)+1=3+1=4 bits. // 8 = 1000, 7 = 111 // so to write 7 we need log2(8)=3 bits. int numBits = (int)Tools.log2(8); int ret = 0; for (int i = 0; i < numBits; i++) { ret = (ret<<1) + (index&1); index = index>>1; } return ret; } public static double[] magnitudeSpectrum(double[] realPart) { // length = realPart.length; // int localN; // // int numBits = (int)Tools.log2(length); // // for(int m = 1; m <= numBits; m++) { // // localN = 2^m; // localN = 1<<m; // } double[] imaginaryPart = new double[realPart.length]; for (int i = 0; i < imaginaryPart.length; i++) { imaginaryPart[i] = 0; } forwardFFT(realPart, imaginaryPart); for (int i = 0; i < realPart.length; i++) { realPart[i] = Math.sqrt( r_data[i]*r_data[i] + i_data[i]*i_data[i] ); } return realPart; } // swap Zi with Zj private static void swapInt(int i, int j) { double tempr; int ti; int tj; ti = i - 1; tj = j - 1; tempr = r_data[tj]; r_data[tj] = r_data[ti]; r_data[ti] = tempr; tempr = i_data[tj]; i_data[tj] = i_data[ti]; i_data[ti] = tempr; } private static void bitReverse2() { // System.out.println("fft: bit reversal"); /* bit reversal */ int n = r_data.length; int j = 1; int k; for (int i = 1; i < n; i++) { if (i < j) swapInt(i, j); k = n / 2; while (k >= 1 && k < j) { j = j - k; k = k / 2; } j = j + k; } } public static void forwardFFT(double in_r[], double in_i[]) { int id; int localN; double wtemp, Wjk_r, Wjk_i, Wj_r, Wj_i; double theta, tempr, tempi; // int ti, tj; int numBits = (int)Tools.log2(in_r.length); if (forward) { //centering(in_r); } // Truncate input data to a power of two int length = 1 << numBits; // length = 2**nu int n = length; int nby2; // Copy passed references to variables to be used within // fft routines & utilities r_data = in_r; i_data = in_i; bitReverse2(); for (int m = 1; m <= numBits; m++) { // localN = 2^m; localN = 1 << m; nby2 = localN / 2; Wjk_r = 1; Wjk_i = 0; theta = Math.PI / nby2; // for recursive comptutation of sine and cosine Wj_r = Math.cos(theta); Wj_i = -Math.sin(theta); if (forward == false) { Wj_i = -Wj_i; } for (int j = 0; j < nby2; j++) { // This is the FFT innermost loop // Any optimizations that can be made here will yield // great rewards. for (int k = j; k < n; k += localN) { id = k + nby2; tempr = Wjk_r * r_data[id] - Wjk_i * i_data[id]; tempi = Wjk_r * i_data[id] + Wjk_i * r_data[id]; // Zid = Zi -C r_data[id] = r_data[k] - tempr; i_data[id] = i_data[k] - tempi; r_data[k] += tempr; i_data[k] += tempi; } // (eq 6.23) and (eq 6.24) wtemp = Wjk_r; Wjk_r = Wj_r * Wjk_r - Wj_i * Wjk_i; Wjk_i = Wj_r * Wjk_i + Wj_i * wtemp; } } // normalize output of fft. // if (forward) if(false) for (int i = 0; i < r_data.length; i++) { r_data[i] = r_data[i] / (double) n; i_data[i] = i_data[i] / (double) n; } in_r = r_data; in_r = i_data; } public static Complex[] inverse(Complex[] c) { int N = c.length; Complex[] x = new Complex[N]; for(int i=0; i<N; i++) { x[i] = Complex.conjugate(c[i]); } x = forward(x); for(int i=0; i<N; i++) { x[i] = Complex.conjugate(x[i]); x[i] = Complex.scale(x[i], 1.0/N); } return x; } } public class Tools { public static final double TWO_PI = 2*Math.PI; public static final double LOG_OF_2_BASE_10 = 1/Math.log10(2); public static double log2(double x) { return Math.log10(x)/Math.log10(2.0); } public static final double[] lowpass(double[] signal, int nPoints) { int N = signal.length; double[] ret = new double[N]; for(int i=0; i<nPoints/2; i++) { ret[i] = signal[i]; } for(int i=nPoints/2; i<N-nPoints/2; i++) { for(int j=0; j<nPoints; j++) { ret[i]=0; ret[i]+=signal[i-nPoints/2+j]; ret[i]/=nPoints; } } for(int i=N-nPoints/2; i<N; i++) { ret[i]=signal[i]; } return ret; } public static final double[] addArrays(double[] x, double[] y) { double[] sum = new double[x.length]; for(int i=0; i<x.length; i++) { sum[i] = x[i] + y[i]; } return sum; } public static final Complex[] addArrays(Complex[] x, Complex[] y) { Complex[] sum = new Complex[x.length]; for(int i=0; i<x.length; i++) { sum[i] = Complex.add(x[i], y[i]); } return sum; } public static final Complex[] substractArrays(Complex[] x, Complex[] y) { Complex[] sum = new Complex[x.length]; for(int i=0; i<x.length; i++) { sum[i] = Complex.substract(x[i], y[i]); } return sum; } public static final double[] dotProduct(double[] x, double[] y) { double[] sum = new double[x.length]; for(int i=0; i<x.length; i++) { sum[i] = x[i] * y[i]; } return sum; } public static final Complex[] dotProduct(Complex[] x, Complex[] y) { Complex[] sum = new Complex[x.length]; for(int i=0; i<x.length; i++) { sum[i] = Complex.multiply(x[i], y[i]); } return sum; } public static Complex[] makeComplex(double[] x) { int N = x.length; Complex[] c = new Complex[N]; for(int i=0; i<N; i++) { c[i] = new Complex(x[i],0); } return c; } public static void printArray(double[] arr) { for (double d : arr) { System.out.format("%.4f ", d); } System.out.println(); } } Last edited 2012 |
| ||
Hey D4NM4N, that's some pretty impressive piece of code you provided me. I'll put it into Java along with the files I need to test, and let you know how it worked out for me :) |
| ||
Not mine it is some pd code i found. You need to split the classes into 4 sep. class files However I found it very useful in my recog lib. What I did was have 2 threads, one fetching the data from the hardware and putting the sample in a "BlockingQueue". Then had the second one unpacking it from said queue and doing the fourrier transform on it. PS: Have a look for "Android Open source DTMF recogniser" this project also uses the above maths lib and shows a similar technique to what i describe with the dual task block queue. I would give you the full lib i made, but it belongs to the company and i would get a whipping :/ Last edited 2012 |
| ||
This (http://www.csee.umbc.edu/~squire/download/fft1024.c) is the FFT i use. just drop it in and go. |