How to use web audio api to get raw pcm audio?

2019-01-28 04:00发布

问题:

How usergetmedia to use the microphone in chrome and then stream to get raw audio? I need need to get the audio in linear 16.

回答1:

Unfortunately, the MediaRecorder doesn't support raw PCM capture. (A sad oversight, in my opinion.) Therefore, you'll need to get the raw samples and buffer/save them yourself.

You can do this with the ScriptProcessorNode. Normally, this Node is used to modify the audio data programmatically, for custom effects and what not. But, there's no reason you can't just use it as a capture point. Untested, but try something like this code:

const captureNode = audioContext.createScriptProcessor(8192, 1, 1);
captureNode.addEventListener('audioprocess', (e) => {
  const rawLeftChannelData = inputBuffer.getChannelData(0);
  // rawLeftChannelData is now a typed array with floating point samples
});

(You can find a more complete example on MDN.)

Those floating point samples are centered on zero 0 and will ideally be bound to -1 and 1. When converting to an integer range, you'll want to clamp values to this range, clipping anything beyond it. (The values can sometimes exceed -1 and 1 in the event loud sounds are mixed together in-browser. In theory, the browser can also record float32 samples from an external sound device which may also exceed that range, but I don't know of any browser/platform that does this.)

When converting to integer, it matters if the values are signed or unsigned. If signed, for 16-bit, the range is -32768 to 32767. For unsigned, it's 0 to 65535. Figure out what format you want to use and scale the -1 to 1 values up to that range.

One final note on this conversion... endianness can matter. See also: https://stackoverflow.com/a/7870190/362536



回答2:

You should look into MediaTrackConstraints.sampleSize property for the MediaDevices.getUserMedia() API. Using the sampleSize constraint, if your audio hardware permits you can set the sample size to 16 bits.

As far as the implementation goes, well that's what the links are and google are for...



回答3:

here is some Web Audio API where it uses the microphone to capture and playback raw audio (turn down your volume before running this page) ... to see snippets of raw audio in PCM format view the browser console ... for kicks it also sends this PCM into a call to FFT to obtain the frequency domain as well as the time domain of the audio curve

<html><head><meta http-equiv="Content-Type" content="text/html; charset=ISO-8859-1">
<title>capture microphone then show time & frequency domain output</title>

<script type="text/javascript">

var webaudio_tooling_obj = function () {

    var audioContext = new AudioContext();

    console.log("audio is starting up ...");

    var BUFF_SIZE_RENDERER = 16384;
    var SIZE_SHOW = 3; // number of array elements to show in console output

    var audioInput = null,
    microphone_stream = null,
    gain_node = null,
    script_processor_node = null,
    script_processor_analysis_node = null,
    analyser_node = null;

    if (!navigator.getUserMedia)
        navigator.getUserMedia = navigator.getUserMedia || navigator.webkitGetUserMedia ||
    navigator.mozGetUserMedia || navigator.msGetUserMedia;

    if (navigator.getUserMedia){

        navigator.getUserMedia({audio:true}, 
            function(stream) {
                start_microphone(stream);
            },
            function(e) {
                alert('Error capturing audio.');
            }
            );

    } else { alert('getUserMedia not supported in this browser.'); }

    // ---

    function show_some_data(given_typed_array, num_row_to_display, label) {

        var size_buffer = given_typed_array.length;
        var index = 0;

        console.log("__________ " + label);

        if (label === "time") {

            for (; index < num_row_to_display && index < size_buffer; index += 1) {

                var curr_value_time = (given_typed_array[index] / 128) - 1.0;

                console.log(curr_value_time);
            }

        } else if (label === "frequency") {

            for (; index < num_row_to_display && index < size_buffer; index += 1) {

                console.log(given_typed_array[index]);
            }

        } else {

            throw new Error("ERROR - must pass time or frequency");
        }
    }

    function process_microphone_buffer(event) {

        var i, N, inp, microphone_output_buffer;

        // not needed for basic feature set
        // microphone_output_buffer = event.inputBuffer.getChannelData(0); // just mono - 1 channel for now
    }

    function start_microphone(stream){

        gain_node = audioContext.createGain();
        gain_node.connect( audioContext.destination );

        microphone_stream = audioContext.createMediaStreamSource(stream);
        microphone_stream.connect(gain_node); 

        script_processor_node = audioContext.createScriptProcessor(BUFF_SIZE_RENDERER, 1, 1);
        script_processor_node.onaudioprocess = process_microphone_buffer;

        microphone_stream.connect(script_processor_node);

        // --- enable volume control for output speakers

        document.getElementById('volume').addEventListener('change', function() {

            var curr_volume = this.value;
            gain_node.gain.value = curr_volume;

            console.log("curr_volume ", curr_volume);
        });

        // --- setup FFT

        script_processor_analysis_node = audioContext.createScriptProcessor(2048, 1, 1);
        script_processor_analysis_node.connect(gain_node);

        analyser_node = audioContext.createAnalyser();
        analyser_node.smoothingTimeConstant = 0;
        analyser_node.fftSize = 2048;

        microphone_stream.connect(analyser_node);

        analyser_node.connect(script_processor_analysis_node);

        var buffer_length = analyser_node.frequencyBinCount;

        var array_freq_domain = new Uint8Array(buffer_length);
        var array_time_domain = new Uint8Array(buffer_length);

        console.log("buffer_length " + buffer_length);

        script_processor_analysis_node.onaudioprocess = function() {

            // get the average for the first channel
            analyser_node.getByteFrequencyData(array_freq_domain);
            analyser_node.getByteTimeDomainData(array_time_domain);

            // draw the spectrogram
            if (microphone_stream.playbackState == microphone_stream.PLAYING_STATE) {

                show_some_data(array_freq_domain, SIZE_SHOW, "frequency");
                show_some_data(array_time_domain, SIZE_SHOW, "time"); // store this to record to aggregate buffer/file
            }
        };
    }

}(); //  webaudio_tooling_obj = function()

</script>

</head>
<body>

    <p>Volume</p>
    <input id="volume" type="range" min="0" max="1" step="0.1" value="0.0"/>

</body>
</html>


回答4:

The only two examples I've found that are clear and make sense are the following:

AWS Labs: https://github.com/awslabs/aws-lex-browser-audio-capture/blob/master/lib/worker.js

The AWS resource is very good. It shows you how to export your recorded audio to "WAV format encoded as PCM". Amazon Lex, which is a transcription service offered by AWS requires the audio to be PCM encoded and wrapped in a WAV container. You can merely adapt some of the code to make it work for you! AWS has some additional features such as "downsampling" which allows you to change the sample rate without affecting the recording.

RecordRTC: https://github.com/muaz-khan/RecordRTC/blob/master/simple-demos/raw-pcm.html

RecordRTC is a complete library. You can, once again, adapt their code or find the snippet of code that encodes the audio to raw PCM. You could also implement their library and use the code as-is. Using the "desiredSampleRate" option for audio config with this library negatively affects the recording.

They are both excellent resources and you'll definitely be able to solve your question.