I'm am currently using C# invokes to call the FFmpeg APIs to handle video and audio. I have the following code in place to extract the audio from a video and write it to a file.
while (ffmpeg.av_read_frame(formatContext, &packet) >= 0)
{
if (packet.stream_index == streamIndex)
{
while (packet.size > 0)
{
int frameDecoded;
int frameDecodedResult = ffmpeg.avcodec_decode_audio4(codecContext, frame, &frameDecoded, packet);
if (frameDecoded > 0 && frameDecodedResult >= 0)
{
//writeAudio.WriteFrame(frame);
packet.data += totalBytesDecoded;
packet.size -= totalBytesDecoded;
}
}
frameIndex++;
}
Avcodec.av_free_packet(&packet);
}
This is all working correctly. I'm currently using the FFmpeg.AutoGen project for the API access.
I want to be able to increase/decrease the volume of the audio before its written to the file, but I cannot seem to find a command or any help with this. Does it have to be done manually?
Update 1:
After receiving some help, this is the class layout I have:
public unsafe class FilterVolume
{
#region Private Member Variables
private AVFilterGraph* m_filterGraph = null;
private AVFilterContext* m_aBufferSourceFilterContext = null;
private AVFilterContext* m_aBufferSinkFilterContext = null;
#endregion
#region Private Constant Member Variables
private const int EAGAIN = 11;
#endregion
public FilterVolume(AVCodecContext* codecContext, AVStream* stream, float volume)
{
CodecContext = codecContext;
Stream = stream;
Volume = volume;
Initialise();
}
public AVFrame* Adjust(AVFrame* frame)
{
AVFrame* returnFilteredFrame = ffmpeg.av_frame_alloc();
if (m_aBufferSourceFilterContext != null && m_aBufferSinkFilterContext != null)
{
int bufferSourceAddFrameResult = ffmpeg.av_buffersrc_add_frame(m_aBufferSourceFilterContext, frame);
if (bufferSourceAddFrameResult < 0)
{
}
int bufferSinkGetFrameResult = ffmpeg.av_buffersink_get_frame(m_aBufferSinkFilterContext, returnFilteredFrame);
if (bufferSinkGetFrameResult < 0 && bufferSinkGetFrameResult != -EAGAIN)
{
}
}
return returnFilteredFrame;
}
public void Dispose()
{
Cleanup(m_filterGraph);
}
#region Private Properties
private AVCodecContext* CodecContext { get; set; }
private AVStream* Stream { get; set; }
private float Volume { get; set; }
#endregion
#region Private Setup Helper Functions
private void Initialise()
{
m_filterGraph = GetAllocatedFilterGraph();
string aBufferFilterArguments = string.Format("sample_fmt={0}:channel_layout={1}:sample_rate={2}:time_base={3}/{4}",
(int)CodecContext->sample_fmt,
CodecContext->channel_layout,
CodecContext->sample_rate,
Stream->time_base.num,
Stream->time_base.den);
AVFilterContext* aBufferSourceFilterContext = CreateFilter("abuffer", m_filterGraph, aBufferFilterArguments);
AVFilterContext* volumeFilterContext = CreateFilter("volume", m_filterGraph, string.Format("volume={0}", Volume));
AVFilterContext* aBufferSinkFilterContext = CreateFilter("abuffersink", m_filterGraph);
LinkFilter(aBufferSourceFilterContext, volumeFilterContext);
LinkFilter(volumeFilterContext, aBufferSinkFilterContext);
SetFilterGraphConfiguration(m_filterGraph, null);
m_aBufferSourceFilterContext = aBufferSourceFilterContext;
m_aBufferSinkFilterContext = aBufferSinkFilterContext;
}
#endregion
#region Private Cleanup Helper Functions
private static void Cleanup(AVFilterGraph* filterGraph)
{
if (filterGraph != null)
{
ffmpeg.avfilter_graph_free(&filterGraph);
}
}
#endregion
#region Provate Helpers
private AVFilterGraph* GetAllocatedFilterGraph()
{
AVFilterGraph* filterGraph = ffmpeg.avfilter_graph_alloc();
if (filterGraph == null)
{
}
return filterGraph;
}
private AVFilter* GetFilterByName(string name)
{
AVFilter* filter = ffmpeg.avfilter_get_by_name(name);
if (filter == null)
{
}
return filter;
}
private void SetFilterGraphConfiguration(AVFilterGraph* filterGraph, void* logContext)
{
int filterGraphConfigResult = ffmpeg.avfilter_graph_config(filterGraph, logContext);
if (filterGraphConfigResult < 0)
{
}
}
private AVFilterContext* CreateFilter(string filterName, AVFilterGraph* filterGraph, string filterArguments = null)
{
AVFilter* filter = GetFilterByName(filterName);
AVFilterContext* filterContext;
int aBufferFilterCreateResult = ffmpeg.avfilter_graph_create_filter(&filterContext, filter, filterName, filterArguments, null, filterGraph);
if (aBufferFilterCreateResult < 0)
{
}
return filterContext;
}
private void LinkFilter(AVFilterContext* source, AVFilterContext* destination)
{
int filterLinkResult = ffmpeg.avfilter_link(source, 0, destination, 0);
if (filterLinkResult < 0)
{
}
}
#endregion
}
The Adjust() function is called after a frame is decoded. I'm currently getting a -22 error when av_buffersrc_add_frame() is called. This indicates that a parameter is invalid, but after debugging, I cannot see anything that would be causing this.
This is how the code is called:
while (ffmpeg.av_read_frame(formatContext, &packet) >= 0)
{
if (packet.stream_index == streamIndex)
{
while (packet.size > 0)
{
int frameDecoded;
int frameDecodedResult = ffmpeg.avcodec_decode_audio4(codecContext, frame, &frameDecoded, packet);
if (frameDecoded > 0 && frameDecodedResult >= 0)
{
AVFrame* filteredFrame = m_filterVolume.Adjust(frame);
//writeAudio.WriteFrame(filteredFrame);
packet.data += totalBytesDecoded;
packet.size -= totalBytesDecoded;
}
}
frameIndex++;
}
Avcodec.av_free_packet(&packet);
}
Update 2:
Cracked it. The "channel_layout" option in the filter argument string is supposed to be a hexadecimal. This is what the string formatting should look like:
string aBufferFilterArguments = string.Format("sample_fmt={0}:channel_layout=0x{1}:sample_rate={2}:time_base={3}/{4}",
(int)CodecContext->sample_fmt,
CodecContext->channel_layout,
CodecContext->sample_rate,
Stream->time_base.num,
Stream->time_base.den);
What you need to do is build a filter graph and process the audio stream through that graph. In your case, the graph is just INPUT ("abuffer") -> VOLUME -> OUTPUT ("abuffersink").
Here is a sample console app that demonstrates that. It's loosely based on ffmpeg samples filtering_audio, filter_audio and remuxing.
You can use it like this:
ChangeVolume.exe http://www.quirksmode.org/html5/videos/big_buck_bunny.mp4 bunny_half.mp4 0.5
And here is the code:
class Program
{
static unsafe void Main(string[] args)
{
Console.WriteLine(@"Current directory: " + Environment.CurrentDirectory);
Console.WriteLine(@"Running in {0}-bit mode.", Environment.Is64BitProcess ? @"64" : @"32");
// adapt this to your context
var ffmpegPath = string.Format(@"../../../FFmpeg/bin/{0}", Environment.Is64BitProcess ? @"x64" : @"x86");
InteropHelper.SetDllDirectory(ffmpegPath);
int ret, i;
if (args.Length < 3)
{
Console.WriteLine("usage: ChangeVolume input output <volume ratio>");
return;
}
string in_filename = args[0];
string out_filename = args[1];
double ratio = double.Parse(args[2]);
ffmpeg.av_register_all();
ffmpeg.avfilter_register_all();
// open input file
AVFormatContext* ifmt_ctx = null;
InteropHelper.Check(ffmpeg.avformat_open_input(&ifmt_ctx, in_filename, null, null));
// dump input
ffmpeg.av_dump_format(ifmt_ctx, 0, in_filename, 0);
// get streams info to determine audio stream index
InteropHelper.Check(ffmpeg.avformat_find_stream_info(ifmt_ctx, null));
// determine input decoder
AVCodec* dec;
int audio_stream_index = ffmpeg.av_find_best_stream(ifmt_ctx, AVMediaType.AVMEDIA_TYPE_AUDIO, -1, -1, &dec, 0);
AVCodecContext* dec_ctx = ifmt_ctx->streams[audio_stream_index]->codec;
// open input decoder
InteropHelper.Check(ffmpeg.avcodec_open2(dec_ctx, dec, null));
// build a filter graph
AVFilterContext* buffersrc_ctx;
AVFilterContext* buffersink_ctx;
AVFilterGraph* filter_graph = init_filter_graph(ifmt_ctx, dec_ctx, audio_stream_index, &buffersrc_ctx, &buffersink_ctx, ratio);
// prepare output
AVFormatContext* ofmt_ctx = null;
InteropHelper.Check(ffmpeg.avformat_alloc_output_context2(&ofmt_ctx, null, null, out_filename));
InteropHelper.Check(ofmt_ctx);
// create output streams
AVCodecContext* enc_ctx = null;
ofmt_ctx->oformat->flags |= InteropHelper.AVFMT_NOTIMESTAMPS;
for (i = 0; i < ifmt_ctx->nb_streams; i++)
{
AVStream* in_stream = ifmt_ctx->streams[i];
if (in_stream->codec->codec_type == AVMediaType.AVMEDIA_TYPE_DATA) // skip these
continue;
AVStream* out_stream = ffmpeg.avformat_new_stream(ofmt_ctx, in_stream->codec->codec);
InteropHelper.Check(out_stream);
InteropHelper.Check(ffmpeg.avcodec_copy_context(out_stream->codec, in_stream->codec));
out_stream->codec->codec_tag = 0;
if ((ofmt_ctx->oformat->flags & InteropHelper.AVFMT_GLOBALHEADER) != 0)
{
out_stream->codec->flags |= InteropHelper.AV_CODEC_FLAG_GLOBAL_HEADER;
}
if (i == audio_stream_index)
{
// create audio encoder from audio decoder
AVCodec* enc = ffmpeg.avcodec_find_encoder(dec_ctx->codec_id);
InteropHelper.Check(enc);
enc_ctx = ffmpeg.avcodec_alloc_context3(enc);
InteropHelper.Check(enc_ctx);
enc_ctx->sample_rate = dec_ctx->sample_rate;
enc_ctx->channel_layout = dec_ctx->channel_layout;
enc_ctx->channels = ffmpeg.av_get_channel_layout_nb_channels(enc_ctx->channel_layout);
enc_ctx->sample_fmt = enc->sample_fmts[0];
enc_ctx->time_base.num = 1;
enc_ctx->time_base.den = enc_ctx->sample_rate;
InteropHelper.Check(ffmpeg.avcodec_open2(enc_ctx, enc, null));
}
}
// dump output
ffmpeg.av_dump_format(ofmt_ctx, 0, out_filename, 1);
if ((ofmt_ctx->oformat->flags & InteropHelper.AVFMT_NOFILE) == 0)
{
// open output file
InteropHelper.Check(ffmpeg.avio_open(&ofmt_ctx->pb, out_filename, InteropHelper.AVIO_FLAG_WRITE));
}
// write output file header
InteropHelper.Check(ffmpeg.avformat_write_header(ofmt_ctx, null));
// read all packets and process
AVFrame* frame = ffmpeg.av_frame_alloc();
AVFrame* filt_frame = ffmpeg.av_frame_alloc();
while (true)
{
AVStream* in_stream;
AVStream* out_stream;
AVPacket pkt;
ret = ffmpeg.av_read_frame(ifmt_ctx, &pkt);
if (ret < 0)
break;
in_stream = ifmt_ctx->streams[pkt.stream_index];
if (in_stream->codec->codec_type == AVMediaType.AVMEDIA_TYPE_DATA)
continue;
// audio stream? we need to pass it through our filter graph
if (pkt.stream_index == audio_stream_index)
{
// decode audio (packet -> frame)
int got_frame = 0;
InteropHelper.Check(ffmpeg.avcodec_decode_audio4(dec_ctx, frame, &got_frame, &pkt));
if (got_frame > 0)
{
// add the frame into the filter graph
InteropHelper.Check(ffmpeg.av_buffersrc_add_frame(buffersrc_ctx, frame));
while (true)
{
// get the frame out from the filter graph
ret = ffmpeg.av_buffersink_get_frame(buffersink_ctx, filt_frame);
const int EAGAIN = 11;
if (ret == -EAGAIN)
break;
InteropHelper.Check(ret);
// encode audio (frame -> packet)
AVPacket enc_pkt = new AVPacket();
int got_packet = 0;
InteropHelper.Check(ffmpeg.avcodec_encode_audio2(enc_ctx, &enc_pkt, filt_frame, &got_packet));
enc_pkt.stream_index = pkt.stream_index;
InteropHelper.Check(ffmpeg.av_interleaved_write_frame(ofmt_ctx, &enc_pkt));
ffmpeg.av_frame_unref(filt_frame);
}
}
}
else
{
// write other (video) streams
out_stream = ofmt_ctx->streams[pkt.stream_index];
pkt.pts = ffmpeg.av_rescale_q_rnd(pkt.pts, in_stream->time_base, out_stream->time_base, AVRounding.AV_ROUND_NEAR_INF | AVRounding.AV_ROUND_PASS_MINMAX);
pkt.dts = ffmpeg.av_rescale_q_rnd(pkt.dts, in_stream->time_base, out_stream->time_base, AVRounding.AV_ROUND_NEAR_INF | AVRounding.AV_ROUND_PASS_MINMAX);
pkt.duration = ffmpeg.av_rescale_q(pkt.duration, in_stream->time_base, out_stream->time_base);
pkt.pos = -1;
InteropHelper.Check(ffmpeg.av_interleaved_write_frame(ofmt_ctx, &pkt));
}
ffmpeg.av_packet_unref(&pkt);
}
// write trailer, close file
ffmpeg.av_write_trailer(ofmt_ctx);
ffmpeg.avformat_close_input(&ifmt_ctx);
if ((ofmt_ctx->oformat->flags & InteropHelper.AVFMT_NOFILE) == 0)
{
ffmpeg.avio_closep(&ofmt_ctx->pb);
}
ffmpeg.avformat_free_context(ofmt_ctx);
ffmpeg.av_frame_free(&filt_frame);
ffmpeg.av_frame_free(&frame);
ffmpeg.avfilter_graph_free(&filter_graph);
return;
}
static unsafe AVFilterGraph* init_filter_graph(AVFormatContext* format, AVCodecContext* codec, int audio_stream_index, AVFilterContext** buffersrc_ctx, AVFilterContext** buffersink_ctx, double volumeRatio)
{
// create graph
var filter_graph = ffmpeg.avfilter_graph_alloc();
InteropHelper.Check(filter_graph);
// add input filter
var abuffersrc = ffmpeg.avfilter_get_by_name("abuffer");
if (abuffersrc == null) InteropHelper.CheckTag("\x00F8FIL");
string args = string.Format("sample_fmt={0}:channel_layout={1}:sample_rate={2}:time_base={3}/{4}",
(int)codec->sample_fmt,
codec->channel_layout,
codec->sample_rate,
format->streams[audio_stream_index]->time_base.num,
format->streams[audio_stream_index]->time_base.den);
InteropHelper.Check(ffmpeg.avfilter_graph_create_filter(buffersrc_ctx, abuffersrc, "IN", args, null, filter_graph));
// add volume filter
var volume = ffmpeg.avfilter_get_by_name("volume");
if (volume == null) InteropHelper.CheckTag("\x00F8FIL");
AVFilterContext* volume_ctx;
InteropHelper.Check(ffmpeg.avfilter_graph_create_filter(&volume_ctx, volume, "VOL", "volume=" + volumeRatio.ToString(CultureInfo.InvariantCulture), null, filter_graph));
// add output filter
var abuffersink = ffmpeg.avfilter_get_by_name("abuffersink");
if (abuffersink == null) InteropHelper.CheckTag("\x00F8FIL");
InteropHelper.Check(ffmpeg.avfilter_graph_create_filter(buffersink_ctx, abuffersink, "OUT", "", null, filter_graph));
// connect input -> volume -> output
InteropHelper.Check(ffmpeg.avfilter_link(*buffersrc_ctx, 0, volume_ctx, 0));
InteropHelper.Check(ffmpeg.avfilter_link(volume_ctx, 0, *buffersink_ctx, 0));
InteropHelper.Check(ffmpeg.avfilter_graph_config(filter_graph, null));
return filter_graph;
}
}
It uses a utility InteropHelper class derived from AutoGen's:
public class InteropHelper
{
[DllImport("kernel32", SetLastError = true)]
public static extern bool SetDllDirectory(string lpPathName);
public static readonly int AVERROR_EOF = -GetTag("EOF ");
public static readonly int AVERROR_UNKNOWN = -GetTag("UNKN");
public static readonly int AVFMT_GLOBALHEADER = 0x0040;
public static readonly int AVFMT_NOFILE = 0x0001;
public static readonly int AVIO_FLAG_WRITE = 2;
public static readonly int AV_CODEC_FLAG_GLOBAL_HEADER = (1 << 22);
public static readonly int AV_ROUND_ZERO = 0;
public static readonly int AV_ROUND_INF = 1;
public static readonly int AV_ROUND_DOWN = 2;
public static readonly int AV_ROUND_UP = 3;
public static readonly int AV_ROUND_PASS_MINMAX = 8192;
public static readonly int AV_ROUND_NEAR_INF = 5;
public static readonly int AVFMT_NOTIMESTAMPS = 0x0080;
public static unsafe void Check(void* ptr)
{
if (ptr != null)
return;
const int ENOMEM = 12;
Check(-ENOMEM);
}
public static unsafe void Check(IntPtr ptr)
{
if (ptr != IntPtr.Zero)
return;
Check((void*)null);
}
// example: "\x00F8FIL" is "Filter not found" (check libavutil/error.h)
public static void CheckTag(string tag)
{
Check(-GetTag(tag));
}
public static int GetTag(string tag)
{
var bytes = new byte[4];
for (int i = 0; i < 4; i++)
{
bytes[i] = (byte)tag[i];
}
return BitConverter.ToInt32(bytes, 0);
}
public static void Check(int res)
{
if (res >= 0)
return;
string err = "ffmpeg error " + res;
string text = GetErrorText(res);
if (!string.IsNullOrWhiteSpace(text))
{
err += ": " + text;
}
throw new Exception(err);
}
public static string GetErrorText(int res)
{
IntPtr err = Marshal.AllocHGlobal(256);
try
{
ffmpeg.av_strerror(res, err, 256);
return Marshal.PtrToStringAnsi(err);
}
finally
{
Marshal.FreeHGlobal(err);
}
}
}
I do not know what API you are using, but ffmpeg has a command that allows to increase or decrease audio:
Decrease to half:
ffmpeg -i input.wav -af "volume=0.5" output.wav
Increase 50%:
ffmpeg -i input.wav -af "volume=1.5" output.wav
or in dB:
ffmpeg -i input.wav -af "volume=10dB" output.wav
Hopes it helps you