speech recognition and sound comparation witth mus

2019-05-21 19:41发布

问题:

I'm trying to make an Android application with speech recognition, but unfortunately google doesn't support my language (MACEDONIAN) and i'm trying to compare two recording sounds.

I'm using http://code.google.com/p/musicg/ to record and compare speech, and i'm stack on initialization the settings for detecting the speech .Some one can tell me how to rewrite this init function for speech detection it's very important to me.. or some other idea how to do that.

this is the initialization for whistle detection

            // settings for detecting a whistle

            minFrequency = 600.0f;
            maxFrequency = Double.MAX_VALUE;

            minIntensity = 100.0f;
            maxIntensity = 100000.0f;

            minStandardDeviation = 0.1f;
            maxStandardDeviation = 1.0f;

            highPass = 500;
            lowPass = 10000;

            minNumZeroCross = 50;
            maxNumZeroCross = 200;

            numRobust = 10;

回答1:

My understanding is that the musicg DetectionApi, as it stands, is only meant to analyse a single chunk of sound and tell you whether it contains that type of sound. Such as the included whistle or clap api examples. I.e is it a clap/is it a whistle.

With musicg, the best you could probably do would be recognise whether the sound is a voice or not.. though even this might be beyond the DetectionApi.

Since you said the google api doesn't support Macedonian, perhaps you could try Pocketsphinx, which is mentioned in this stackoverflow article.



回答2:

First off, all you have to do is save your recorded sound into wav and then it is easy to use fingerprint Class from their API https://code.google.com/p/musicg/source/browse/#git%2Fsrc%2Fcom%2Fmusicg%2Ffingerprint

Here is how I'm doing the comparation, one temp recorded WAV sound with all my wav sound in my data.

public Cursor FP(String recordedClip, Context context) {

    Baza baza = new Baza(context);

    Cursor allSound = baza.getAllProtocolsForSoundCheck();

    List<Protocol> protocols = new ArrayList<Protocol>();
    int PID =-1;

    Log.d("broj",allSound.getCount()+"");

    for (int i = 0; i < allSound.getCount(); i++) {


        Protocol protocol = new Protocol();
        allSound.moveToNext();
        protocol.setSoundPath(allSound.getString(4));
        protocol.setId(Integer.parseInt(allSound.getString(1)));
        protocols.add(protocol);

        Log.d("brojProtocol",allSound.getString(2)+" ");
        baza.updateProtocolsSoundSimilarity(protocol.getId(), (float) -1);
    }

    Wave record = new Wave(recordedClip);

    List<Wave> waves = new ArrayList<Wave>();

    if (protocols != null) {
        for (int i = 0; i < protocols.size(); i++) {
            waves.add(new Wave(protocols.get(i).getSoundPath()));
        }
    }

    for (int i = 0; i < waves.size(); i++) {

        Log.d("similarity", record.getFingerprintSimilarity(waves.get(i))
                        .getSimilarity()+"");


        baza.updateProtocolsSoundSimilarity(protocols.get(i).getId(),
                record.getFingerprintSimilarity(waves.get(i))
                        .getSimilarity());
    }

    Cursor similarCursor = baza.getSimilarProtocols();
    similarCursor.moveToFirst();
    TransferClass protocolForTransfer = new TransferClass();
    protocolForTransfer.setId(Integer.parseInt(similarCursor.getString(1)));
    protocolForTransfer.setName(similarCursor.getString(2));

    Log.d("passobj",protocolForTransfer.getName()+" "+protocolForTransfer.getId());
//  return protocolForTransfer;

    return similarCursor;
}


回答3:

and here is how I'm saving my temp recorded sound into the wav format:

public class RecorderActivity {

    private static final int RECORDER_BPP = 16;
    private static final String AUDIO_RECORDER_FILE_EXT_WAV = ".wav";
    private static final String AUDIO_RECORDER_FOLDER = "HQProtocol/sound";
    private static final String AUDIO_RECORDER_TEMP_FILE = "record_temp.raw";
    private String AUDIO_RECORDER_FILE = "";
    private static final int RECORDER_SAMPLERATE = 8000;
    private static final int RECORDER_CHANNELS = AudioFormat.CHANNEL_IN_MONO;
    private static final int RECORDER_AUDIO_ENCODING = AudioFormat.ENCODING_PCM_16BIT;

    private RealDoubleFFT transformer;

    EndPointDetection endpoint;

    int blockSize = 256;

    private AudioRecord recorder = null;
    private int bufferSize = 0;
    private RecorderAsynctask recordingThread = null;
    private boolean isRecording = false;

    float tempFloatBuffer[] = new float[3];
    int tempIndex = 0;
    int totalReadBytes = 0;

    ImageView imageView;
    Bitmap bitmap;
    Canvas canvas;
    Paint paint;

    Context con;

    RecorderActivity(String file, Context con, ImageView image) {
        AUDIO_RECORDER_FILE = file;
        this.con = con;

        this.imageView = image;
        bitmap = Bitmap.createBitmap((int) 256, (int) 100,
                Bitmap.Config.ARGB_8888);
        canvas = new Canvas(bitmap);
        paint = new Paint();
        paint.setStrokeWidth(5);
        paint.setColor(Color.BLUE);
        imageView.setImageBitmap(bitmap);

        transformer = new RealDoubleFFT(256);

        bufferSize = AudioRecord.getMinBufferSize(RECORDER_SAMPLERATE,
                RECORDER_CHANNELS, RECORDER_AUDIO_ENCODING);

    }

    public String getFilename() {
        String filepath = Environment.getExternalStorageDirectory().getPath();
        File file = new File(filepath, AUDIO_RECORDER_FOLDER);

        if (!file.exists()) {
            file.mkdirs();
        }

        return (file.getAbsolutePath() + "/" + AUDIO_RECORDER_FILE + AUDIO_RECORDER_FILE_EXT_WAV);
    }

    private String getTempFilename() {
        String filepath = Environment.getExternalStorageDirectory().getPath();
        File file = new File(filepath, AUDIO_RECORDER_FOLDER);

        if (!file.exists()) {
            file.mkdirs();
        }

        File tempFile = new File(filepath, AUDIO_RECORDER_TEMP_FILE);

        if (tempFile.exists())
            tempFile.delete();

        return (file.getAbsolutePath() + "/" + AUDIO_RECORDER_TEMP_FILE);
    }

    public void startRecording() {
        recorder = new AudioRecord(MediaRecorder.AudioSource.MIC,
                RECORDER_SAMPLERATE, RECORDER_CHANNELS,
                RECORDER_AUDIO_ENCODING, bufferSize);

        recorder.startRecording();

        isRecording = true;

        recordingThread = new RecorderAsynctask();
        recordingThread.execute(this);

    }

    class RecorderAsynctask extends AsyncTask<RecorderActivity, double[], Void> {

        public void shareLockedfuntionProgreesUpdate(double[] fttrezult) {

            publishProgress(fttrezult);

        }

        @Override
        protected Void doInBackground(RecorderActivity... params) {
            // TODO Auto-generated method stub

            byte data[] = new byte[bufferSize];
            String filename = getTempFilename();
            FileOutputStream os = null;

            try {
                os = new FileOutputStream(filename);
            } catch (FileNotFoundException e) {
                // TODO Auto-generated catch block
                e.printStackTrace();
            }

            int read = 0;

            AudioTrack tempAudioTrack;

            double[] toTransform = new double[blockSize];

            if (null != os) {
                while (isRecording) {
                    // sampleRateTextField.setText(recorder.getSampleRate());

                    int bufferReadResult = recorder.read(data, 0, blockSize);

                    for (int i = 0; i < blockSize && i < bufferReadResult; i++) {
                        toTransform[i] = (double) data[i] / 32768.0; // signed
                                                                        // 16
                                                                        // bit
                    }

                    transformer.ft(toTransform);
                    publishProgress(toTransform);

                    if (AudioRecord.ERROR_INVALID_OPERATION != read) {
                        try {

                            os.write(data);
                            tempIndex++;

                        } catch (IOException e) {
                            e.printStackTrace();
                        }
                    }

                }

                try {
                    os.close();
                } catch (IOException e) {
                    e.printStackTrace();
                }
            }

            return null;
        }

        @Override
        protected void onProgressUpdate(double[]... toTransform) {
            canvas.drawColor(Color.GRAY);
            Paint p = new Paint();
            for (int i = 0; i < toTransform[0].length; i++) {

                int x = i;
                int downy = (int) (100 - (toTransform[0][i] * 10));
                int upy = 100;
                p.setColor(Color.rgb(downy % 256, i % 256, upy % 256));
                canvas.drawLine(x, upy, x, downy, p);

            }
            imageView.invalidate();
        }

    }

    public void writeAudioDataToFile(RecorderAsynctask asyntask) {
        byte data[] = new byte[bufferSize];
        String filename = getTempFilename();
        FileOutputStream os = null;

        try {
            os = new FileOutputStream(filename);
        } catch (FileNotFoundException e) {
            // TODO Auto-generated catch block
            e.printStackTrace();
        }

        int read = 0;

        double[] toTransform = new double[256];

        if (null != os) {
            while (isRecording) {
                // sampleRateTextField.setText(recorder.getSampleRate());

                int bufferReadResult = recorder.read(data, 0, 256);

                for (int i = 0; i < 256 && i < bufferReadResult; i++) {
                    toTransform[i] = (double) data[i] / 32768.0; // signed
                                                                    // 16
                                                                    // bit
                }

                transformer.ft(toTransform);
                asyntask.shareLockedfuntionProgreesUpdate(toTransform);

                if (AudioRecord.ERROR_INVALID_OPERATION != read) {
                    try {

                        os.write(data);
                        tempIndex++;

                    } catch (IOException e) {
                        e.printStackTrace();
                    }
                }

            }

            try {
                os.close();
            } catch (IOException e) {
                e.printStackTrace();
            }
        }
    }

    public void stopRecording() {
        if (null != recorder) {
            isRecording = false;

            recorder.stop();
            recorder.release();

            recorder = null;
            recordingThread = null;
        }

        copyWaveFile(getTempFilename(), getFilename());
        deleteTempFile();
    }

    private void deleteTempFile() {
        File file = new File(getTempFilename());

        file.delete();
    }

    private void copyWaveFile(String inFilename, String outFilename) {
        FileInputStream in = null;
        FileOutputStream out = null;
        long totalAudioLen = 0;
        long totalDataLen = totalAudioLen + 36;
        long longSampleRate = RECORDER_SAMPLERATE;
        int channels = 1;
        long byteRate = RECORDER_BPP * RECORDER_SAMPLERATE * channels / 8;

        byte[] data = new byte[bufferSize];

        try {
            in = new FileInputStream(inFilename);
            out = new FileOutputStream(outFilename);
            totalAudioLen = in.getChannel().size();
            totalDataLen = totalAudioLen + 36;

            WriteWaveFileHeader(out, totalAudioLen, totalDataLen,
                    longSampleRate, channels, byteRate);

            while (in.read(data) != -1) {
                out.write(data);
            }

            in.close();
            out.close();
        } catch (FileNotFoundException e) {
            e.printStackTrace();
        } catch (IOException e) {
            e.printStackTrace();
        }
    }

    private void WriteWaveFileHeader(FileOutputStream out, long totalAudioLen,
            long totalDataLen, long longSampleRate, int channels, long byteRate)
            throws IOException {

        byte[] header = new byte[44];

        header[0] = 'R'; // RIFF/WAVE header
        header[1] = 'I';
        header[2] = 'F';
        header[3] = 'F';
        header[4] = (byte) (totalDataLen & 0xff);
        header[5] = (byte) ((totalDataLen >> 8) & 0xff);
        header[6] = (byte) ((totalDataLen >> 16) & 0xff);
        header[7] = (byte) ((totalDataLen >> 24) & 0xff);
        header[8] = 'W';
        header[9] = 'A';
        header[10] = 'V';
        header[11] = 'E';
        header[12] = 'f'; // 'fmt ' chunk
        header[13] = 'm';
        header[14] = 't';
        header[15] = ' ';
        header[16] = 16; // 4 bytes: size of 'fmt ' chunk
        header[17] = 0;
        header[18] = 0;
        header[19] = 0;
        header[20] = 1; // format = 1
        header[21] = 0;
        header[22] = (byte) channels;
        header[23] = 0;
        header[24] = (byte) (longSampleRate & 0xff);
        header[25] = (byte) ((longSampleRate >> 8) & 0xff);
        header[26] = (byte) ((longSampleRate >> 16) & 0xff);
        header[27] = (byte) ((longSampleRate >> 24) & 0xff);
        header[28] = (byte) (byteRate & 0xff);
        header[29] = (byte) ((byteRate >> 8) & 0xff);
        header[30] = (byte) ((byteRate >> 16) & 0xff);
        header[31] = (byte) ((byteRate >> 24) & 0xff);
        header[32] = (byte) (2 * 16 / 8); // block align
        header[33] = 0;
        header[34] = RECORDER_BPP; // bits per sample
        header[35] = 0;
        header[36] = 'd';
        header[37] = 'a';
        header[38] = 't';
        header[39] = 'a';
        header[40] = (byte) (totalAudioLen & 0xff);
        header[41] = (byte) ((totalAudioLen >> 8) & 0xff);
        header[42] = (byte) ((totalAudioLen >> 16) & 0xff);
        header[43] = (byte) ((totalAudioLen >> 24) & 0xff);

        out.write(header, 0, 44);
    }

    public void closeThreadIfisnot() {
        recordingThread.cancel(true);
    }
}