mirror of https://github.com/davisking/dlib.git
155 lines
6.0 KiB
C++
155 lines
6.0 KiB
C++
// The contents of this file are in the public domain. See LICENSE_FOR_EXAMPLE_PROGRAMS.txt
|
|
/*
|
|
|
|
This is an example illustrating the use of the ffmpeg wrappers,
|
|
in this case the demuxer API.
|
|
|
|
This is a pretty simple example. It loads a video file, and plots
|
|
the image frames on a GUI window.
|
|
*/
|
|
|
|
#include <cstdio>
|
|
#include <dlib/media.h>
|
|
#include <dlib/gui_widgets.h>
|
|
#include <dlib/cmd_line_parser.h>
|
|
|
|
using namespace std;
|
|
using namespace dlib;
|
|
|
|
int main(const int argc, const char** argv)
|
|
try
|
|
{
|
|
command_line_parser parser;
|
|
parser.add_option("i", "input video", 1);
|
|
parser.set_group_name("Help Options");
|
|
parser.add_option("h", "alias of --help");
|
|
parser.add_option("help", "display this message and exit");
|
|
|
|
parser.parse(argc, argv);
|
|
const char* one_time_opts[] = {"i"};
|
|
parser.check_one_time_options(one_time_opts);
|
|
|
|
if (parser.option("h") || parser.option("help"))
|
|
{
|
|
parser.print_options();
|
|
return 0;
|
|
}
|
|
|
|
const std::string filepath = get_option(parser, "i", "");
|
|
|
|
image_window win;
|
|
|
|
/*
|
|
For simplicity we use the constructor which takes a filepath only. All other parameters are defaulted or guessed.
|
|
Equivalently, we could have done:
|
|
|
|
ffmpeg::demuxer::args args;
|
|
args.filepath = filepath;
|
|
ffmpeg::demuxer cap(args);
|
|
|
|
Furthermore, we can set additional settings in args, for example:
|
|
|
|
// This disables extracting and decoding images.
|
|
// You may want to do this if you're only interesting in extracting audio
|
|
args.enable_image = false;
|
|
|
|
// This disables extracting and decoding audio.
|
|
// You may want to do this if you don't care about audio in your video.
|
|
// This saves processing time and you don't have to deal with audio frame objects in your code.
|
|
args.enable_audio = false;
|
|
|
|
// This will resize frames before presenting them to the user.
|
|
// I.e. frames "returned" by demuxer::read() will have this height.
|
|
// By default, the demuxer object does not resize frames.
|
|
args.image_options.h = SOME_HEIGHT;
|
|
|
|
// Same as above but for width.
|
|
args.image_options.w = SOME_WIDTH;
|
|
|
|
// By default, demuxer reformats frames from the default format in the encoded stream to RGB.
|
|
// You can set this to AV_PIX_FMT_NONE and demuxer will leave frames in their default format.
|
|
// This is likely to be AV_PIX_FMT_YUV420P.
|
|
// However, you can set it to anything that FFMPEG supports, and frames will be presented
|
|
// in that format.
|
|
args.image_options.fmt = SOME_OTHER_PIXEL_FORMAT;
|
|
|
|
// Same as above, by default, demuxer leaves audio frames in their default sample rate.
|
|
// But user can change this, and audio will be resampled to that rate.
|
|
// Note, reducing the sample rate reduces the quality of the audio.
|
|
// You can artificially upsample audio, but it won't make the quality any better.
|
|
args.audio_options.sample_rate = SOME_SAMPLE_RATE;
|
|
|
|
// You may want to do this if you want more or less channels.
|
|
// Note, dlib only has one audio object "audio_frame", which is stereo and uses int16_t sample format.
|
|
// So if you're going to use other layouts and sample formats, you won't be able to use audio_frame.
|
|
// You will have to use ffmpeg::frame directly. Use with care and please visit ffmpeg's documentation.
|
|
args.audio_options.channel_layout = SOME_OTHER_LAYOUT; // e.g. AV_CH_LAYOUT_MONO, AV_CH_LAYOUT_STEREO. See libavutil/channel_layout.h
|
|
|
|
// This changes the default sample format.
|
|
args.audio_options.fmt = SOME_OTHER_SAMPLE_FORMAT;
|
|
*/
|
|
|
|
ffmpeg::demuxer cap(filepath);
|
|
if (!cap.is_open())
|
|
{
|
|
printf("%s is not a valid video file\n", filepath.c_str());
|
|
return EXIT_FAILURE;
|
|
}
|
|
|
|
printf("Video properties:\n\n");
|
|
printf("Estimated duration : %f\n", cap.duration());
|
|
printf("Video contains images : %i\n", cap.video_enabled());
|
|
if (cap.video_enabled())
|
|
{
|
|
printf(" height : %i\n", cap.height());
|
|
printf(" width : %i\n", cap.width());
|
|
printf(" pixel format : %s\n", ffmpeg::get_pixel_fmt_str(cap.pixel_fmt()).c_str());
|
|
printf(" fps : %f\n", cap.fps());
|
|
printf(" nframes : %d\n", cap.estimated_nframes());
|
|
printf(" codec : %s\n", cap.get_video_codec_name().c_str());
|
|
}
|
|
printf("Video contains audio : %i\n", cap.audio_enabled());
|
|
if (cap.audio_enabled())
|
|
{
|
|
printf(" sample rate : %i\n", cap.sample_rate());
|
|
printf(" channel layout : %s\n", ffmpeg::get_channel_layout_str(cap.channel_layout()).c_str());
|
|
printf(" sample format : %s\n", ffmpeg::get_audio_fmt_str(cap.sample_fmt()).c_str());
|
|
printf(" nchannels : %i\n", cap.nchannels());
|
|
printf(" estimated samples : %i\n", cap.estimated_total_samples());
|
|
printf(" codec : %s\n", cap.get_audio_codec_name().c_str());
|
|
}
|
|
|
|
printf("\n\n");
|
|
printf("Video metadata:\n");
|
|
|
|
for (auto&& metadata : cap.get_metadata())
|
|
printf(" key : %-32s ; val : %-32s\n", metadata.first.c_str(), metadata.second.c_str());
|
|
|
|
ffmpeg::frame frame;
|
|
array2d<rgb_pixel> img;
|
|
size_t audio_samples{0};
|
|
|
|
while (cap.read(frame))
|
|
{
|
|
if (frame.is_image() && frame.pixfmt() == AV_PIX_FMT_RGB24)
|
|
{
|
|
convert(frame, img);
|
|
win.set_image(img);
|
|
}
|
|
|
|
if (frame.is_audio())
|
|
{
|
|
audio_samples += frame.nsamples();
|
|
printf("\r\tDecoding %zu samples", audio_samples); fflush(stdout);
|
|
}
|
|
}
|
|
|
|
printf("\n");
|
|
|
|
return EXIT_SUCCESS;
|
|
}
|
|
catch (const std::exception& e)
|
|
{
|
|
printf("%s\n", e.what());
|
|
return EXIT_FAILURE;
|
|
} |