KJB
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
video.h
Go to the documentation of this file.
1 /* $Id: video.h 18554 2015-02-13 20:43:09Z jguan1 $ */
2 /* {{{=========================================================================== *
3  |
4  | Copyright (c) 1994-2011 by Kobus Barnard (author)
5  |
6  | Personal and educational use of this code is granted, provided that this
7  | header is kept intact, and that the authorship is not misrepresented, that
8  | its use is acknowledged in publications, and relevant papers are cited.
9  |
10  | For other use contact the author (kobus AT cs DOT arizona DOT edu).
11  |
12  | Please note that the code in this file has not necessarily been adequately
13  | tested. Naturally, there is no guarantee of performance, support, or fitness
14  | for any particular task. Nonetheless, I am interested in hearing about
15  | problems that you encounter.
16  |
17  | Author: Kyle Simek
18  * =========================================================================== }}}*/
19 
20 // vim: tabstop=4 shiftwidth=4 foldmethod=marker
21 
22 #ifndef KJB_CPP_I_VIDEO_H
23 #define KJB_CPP_I_VIDEO_H
24 
31 #ifdef KJB_HAVE_FFMPEG
32 
33 // this is necessary to ensure stdint.h defines the UINT64_C macro,
34 // which is needed in libavutil/common.h :
35 #ifndef __STDC_CONSTANT_MACROS
36 #define __STDC_CONSTANT_MACROS
37 #endif
38 
39 extern "C" {
40 #include <libavcodec/avcodec.h>
41 #include <libswscale/swscale.h>
42 #include <libavformat/avformat.h>
43 }
44 
45 #define AVFORMAT_IS_RECENT (defined(LIBAVFORMAT_VERSION_MAJOR ) && LIBAVFORMAT_VERSION_MAJOR >= 53 && LIBAVFORMAT_VERSION_MINOR >= 4)
46 
47 #endif /* KJB_HAVE_FFMPEG */
48 
49 #include <boost/shared_array.hpp>
50 #include <i_cpp/i_image.h>
51 
52 namespace kjb
53 {
54 
56 {
57 public:
58  Video_frame(boost::shared_array<unsigned char> data, size_t width, size_t height) :
59  data_(data),
60  width_(width),
61  height_(height)
62  { }
63 private:
64  boost::shared_array<unsigned char> data_;
65  size_t width_;
66  size_t height_;
67 };
68 
69 // (eventually would like to have a lazy video class in addition to the video class.
70 // The lazy video would load frames only when needed. Hence the abstract base class here.
72 {
73 public:
74  virtual size_t size() const = 0;
75  virtual Video_frame operator[](size_t i) const = 0;
76 
77  virtual size_t get_width() const = 0;
78  virtual size_t get_height() const = 0;
79  virtual float get_frame_rate() const = 0;
80 };
81 
82 class Video : public Abstract_video
83 {
84 public:
85  Video() {}
86 
87  Video(const std::vector<std::string>& fnames, float frame_rate = 30.0)
88  {
89  load_images(fnames, frame_rate);
90  }
91 
92  Video(const std::string& fnames)
93  {
94  decode_video(fnames);
95  }
96 
97  void decode_video(const std::string& fname)
98  {
99 #if defined(KJB_HAVE_FFMPEG) && AVFORMAT_IS_RECENT
100  AVFormatContext *pFormatCtx = NULL;
101  int i, videoStream;
102  AVCodecContext *pCodecCtx = NULL;
103  AVCodec *pCodec = NULL;
104  AVFrame *pFrame = NULL;
105  AVFrame *pFrameRGB = NULL;
106  AVPacket packet;
107  int frameFinished;
108  int numBytes;
109  uint8_t *buffer = NULL;
110 
111  /* Register all formats and codecs */
112  if(!ffmpeg_registered_)
113  {
114  av_register_all();
115  }
116 
117 
118 
119  /* Open video file */
120  if(avformat_open_input(&pFormatCtx, fname.c_str(), NULL, NULL)!=0)
121  KJB_THROW_3(kjb::IO_error, "Couldn't open file %s", (fname.c_str()));
122 
123  /* Retrieve stream information */
124  if(avformat_find_stream_info(pFormatCtx, NULL)<0)
125  KJB_THROW_2(kjb::Runtime_error, "Failed to find stream info");
126 
127  // /* Dump information about file onto standard error */
128  // dump_format(pFormatCtx, 0, argv[1], false);
129 
130  /* Find the first video stream */
131  videoStream=-1;
132  for(i=0; i< (int) pFormatCtx->nb_streams; i++)
133  {
134  if(pFormatCtx->streams[i]->codec->codec_type==AVMEDIA_TYPE_VIDEO)
135  {
136  videoStream=i;
137  break;
138  }
139  }
140 
141  if(videoStream==-1)
142  KJB_THROW_2(kjb::Runtime_error, "Didn't find a video stream");
143 
144  /* Get a pointer to the codec context for the video stream */
145  pCodecCtx=pFormatCtx->streams[videoStream]->codec;
146  size_t num_frames = pFormatCtx->streams[videoStream]->nb_frames;
147 
148  frames_.resize(0);
149  frames_.reserve(num_frames);
150  AVRational fps = pFormatCtx->streams[videoStream]->r_frame_rate;
151  frame_rate_ = (float) fps.num / fps.den;
152 
153  if(frame_rate_ == 0)
154  {
155  fps = pFormatCtx->streams[videoStream]->avg_frame_rate;
156  frame_rate_ = (float) fps.num / fps.den;
157 
158  }
159 
160  assert(frame_rate_ > 0);
161 
162  /* Find the decoder for the video stream */
163  pCodec=avcodec_find_decoder(pCodecCtx->codec_id);
164  if(pCodec==NULL)
165  KJB_THROW_2(kjb::Runtime_error, "Codec not found");
166 
167  /* Open codec */
168  if(avcodec_open2(pCodecCtx, pCodec, NULL)<0)
169  KJB_THROW_2(kjb::Runtime_error, "Codec not found");
170 
171  /* Hack to correct wrong frame rates that seem to be generated by some codecs */
172  if(pCodecCtx->time_base.num>1000 && pCodecCtx->time_base.den==1)
173  pCodecCtx->time_base.den=1000;
174 
175  /* Allocate video frame */
176  pFrame=avcodec_alloc_frame();
177 
178  /* Allocate an AVFrame structure */
179  pFrameRGB=avcodec_alloc_frame();
180  if(pFrameRGB==NULL)
181  KJB_THROW_2(kjb::Runtime_error, "Failed to allocate AVFrame");
182 
183  /* Determine required buffer size and allocate buffer */
184  numBytes=avpicture_get_size(PIX_FMT_RGB24, pCodecCtx->width,
185  pCodecCtx->height);
186 
187  width_ = pCodecCtx->width;
188  height_ = pCodecCtx->height;
189 
190  buffer=(uint8_t*)malloc(numBytes);
191 
192  /* Assign appropriate parts of buffer to image planes in pFrameRGB */
193  avpicture_fill((AVPicture *)pFrameRGB, buffer, PIX_FMT_RGB24,
194  pCodecCtx->width, pCodecCtx->height);
195 
196  /* Read all frames */
197 
198  while(av_read_frame(pFormatCtx, &packet)>=0)
199  {
200  /* Is this a packet from the video stream? */
201  if(packet.stream_index==videoStream)
202  {
203  /* Decode video frame */
204  avcodec_decode_video2(pCodecCtx,
205  pFrame, &frameFinished, &packet);
206 
207  /* Did we get a video frame? */
208  if(frameFinished)
209  {
210  static struct SwsContext *img_convert_ctx;
211 
212  /* Convert the image into YUV format that SDL uses */
213  if(img_convert_ctx == NULL) {
214  int w = pCodecCtx->width;
215  int h = pCodecCtx->height;
216 
217  img_convert_ctx = sws_getContext(w, h,
218  pCodecCtx->pix_fmt,
219  w, h, PIX_FMT_RGB24, SWS_BICUBIC,
220  NULL, NULL, NULL);
221  if(img_convert_ctx == NULL) {
222  fprintf(stderr, "Cannot initialize the conversion context!\n");
223  exit(1);
224  }
225  }
226  sws_scale(img_convert_ctx, pFrame->data, pFrame->linesize, 0,
227  pCodecCtx->height, pFrameRGB->data, pFrameRGB->linesize);
228 
229  /* Save the frame to disk */
230  const size_t& width = pCodecCtx->width;
231  const size_t& height = pCodecCtx->height;
232 
233  boost::shared_array<unsigned char> buffer(new unsigned char[width * height * 3]);
234 
235  for(size_t row = 0; row < height; row++)
236  {
237  const uint8_t* source_begin = pFrameRGB->data[0] + row * pFrameRGB->linesize[0];
238  unsigned char* dest_begin = buffer.get() + (height - row - 1) * width * 3;
239  size_t row_length = width * 3;
240 
241  std::copy(source_begin,
242  source_begin + row_length,
243  dest_begin);
244  }
245 
246  frames_.push_back(buffer);
247  }
248  }
249 
250  /* Free the packet that was allocated by av_read_frame */
251  av_free_packet(&packet);
252  }
253 
254 // assert(num_frames == frames_.size());
255 
256  /* Free the RGB image */
257  free(buffer);
258  av_free(pFrameRGB);
259 
260  /* Free the YUV frame */
261  av_free(pFrame);
262 
263  /* Close the codec */
264  avcodec_close(pCodecCtx);
265 
266  /* Close the video file */
267  avformat_close_input(&pFormatCtx);
268 
269 #else
270  KJB_THROW_2(Missing_dependency, "ffmpeg");
271 #endif
272  }
273 
274  void load_images(const std::vector<Image>& images, float /* frame_rate = 30.0 */)
275  {
276  frames_.resize(images.size());
278  }
279 
280  template<class Iterator>
281  void load_images(Iterator first, Iterator last, float frame_rate = 30.0)
282  {
283  size_t nfrms = std::distance(first, last);
284 
285  frames_.resize(nfrms);
286  frame_rate_ = frame_rate;
287 
288  size_t i = 0;
289  for(; first != last; first++, i++)
290  {
291  Image img(*first);
292 
293  if(i == 0)
294  {
295  width_ = img.get_num_cols();
296  height_ = img.get_num_rows();
297  }
298  else
299  {
300  if((int) width_ != img.get_num_cols() ||
301  (int) height_ != img.get_num_rows())
302  {
303  KJB_THROW_2(Illegal_argument, "All images must be same size.");
304  }
305  }
306 
307  frames_[i] = image_to_frame_(img);
308  }
309  }
310 
311  void load_images(const std::vector<std::string>& fnames, float frame_rate = 30.0)
312  {
313  load_images(fnames.begin(), fnames.end(), frame_rate);
314  }
315 
316  virtual size_t size() const
317  {
318  return frames_.size();
319  }
320 
321  virtual Video_frame operator[](size_t i) const
322  {
323  if(i > size())
324  {
326  }
327 
328  return Video_frame(frames_[i], width_, height_);
329  }
330 
331  size_t get_width() const
332  {
333  return width_;
334  }
335 
336  size_t get_height() const
337  {
338  return height_;
339  }
340 
341  float get_frame_rate() const
342  {
343  return frame_rate_;
344  }
345 
346  const unsigned char* get_buffer(size_t i) const
347  {
348  return frames_[i].get();
349  }
350 
351  template <class Output_iterator>
352  void to_images(Output_iterator frame_it)
353  {
354  Image img;
355  size_t num_cols = width_;
356  size_t num_rows = height_;
357  img = Image(num_cols, num_rows);
358 
359  size_t stride = 3 * num_cols;
360  for(size_t f = 0; f < frames_.size(); f++)
361  {
362  size_t out_col = 0;
363  int out_row = num_rows - 1;
364  size_t offset = 0;
365 
366  for(size_t row = 0; row < num_rows; row++)
367  {
368  for(size_t col = 0; col < 3*num_cols; col += 3)
369  {
370  img(out_row, out_col, Image::RED) = (float) frames_[f][offset + col + 0 ];
371  img(out_row, out_col, Image::GREEN) = (float) frames_[f][offset + col + 1];
372  img(out_row, out_col, Image::BLUE) = (float) frames_[f][offset + col + 2];
373  ++out_col;
374  }
375  --out_row;
376  offset += stride;
377  }
378 
379  *frame_it++ = img;
380  }
381  }
382 
383 protected:
384  static boost::shared_array<unsigned char> image_to_frame_(const Image& img)
385  {
386  size_t num_cols = img.get_num_cols();
387  size_t num_rows = img.get_num_rows();
388 
389  boost::shared_array<unsigned char> buffer(new unsigned char[3 * num_rows * num_cols]);
390 
391  for(size_t row = 0; row < num_rows; row++)
392  {
393  for(size_t col = 0; col < 3*num_cols; col += 3)
394  {
395  size_t r = num_rows - row - 1;
396 
397  buffer[col + 0 + row*3*num_cols] = (unsigned char) img(r, col/3, Image::RED);
398  buffer[col + 1 + row*3*num_cols] = (unsigned char) img(r, col/3, Image::GREEN);
399  buffer[col + 2 + row*3*num_cols] = (unsigned char) img(r, col/3, Image::BLUE);
400  }
401  }
402 
403  return buffer;
404  }
405 private:
406  std::vector<boost::shared_array<unsigned char> > frames_;
407  size_t width_;
408  size_t height_;
409  float frame_rate_;
410 
411  static bool ffmpeg_registered_;
412 };
413 
414 }
415 #endif
416 
Video()
Definition: video.h:85
Definition: video.h:71
Definition: video.h:55
virtual size_t get_height() const =0
Object thrown when an index argument exceeds the size of a container.
Definition: l_exception.h:399
void load_images(const std::vector< std::string > &fnames, float frame_rate=30.0)
Definition: video.h:311
Definition: i_image.h:140
Definition: i_image.h:140
height
Definition: APPgetLargeConnectedEdges.m:33
r
Definition: APPgetLargeConnectedEdges.m:127
#define KJB_THROW(ex)
Definition: l_exception.h:46
void load_images(const std::vector< Image > &images, float)
Definition: video.h:274
virtual Video_frame operator[](size_t i) const
Definition: video.h:321
const unsigned char * get_buffer(size_t i) const
Definition: video.h:346
virtual size_t size() const =0
void load_images(Iterator first, Iterator last, float frame_rate=30.0)
Definition: video.h:281
static boost::shared_array< unsigned char > image_to_frame_(const Image &img)
Definition: video.h:384
Video(const std::vector< std::string > &fnames, float frame_rate=30.0)
Definition: video.h:87
Video(const std::string &fnames)
Definition: video.h:92
size_t get_height() const
Definition: video.h:336
int get_num_rows() const
Return the number of rows in the image.
Definition: i_image.h:256
Video_frame(boost::shared_array< unsigned char > data, size_t width, size_t height)
Definition: video.h:58
virtual size_t get_width() const =0
Definition: i_image.h:140
#define KJB_THROW_2(ex, msg)
Definition: l_exception.h:48
#define KJB_THROW_3(ex, fmt, params)
Definition: l_exception.h:56
virtual Video_frame operator[](size_t i) const =0
void to_images(Output_iterator frame_it)
Definition: video.h:352
size_t get_width() const
Definition: video.h:331
int get_num_cols() const
Return the number of columns in the image.
Definition: i_image.h:262
Object thrown when an argument to a function is not acceptable.
Definition: l_exception.h:377
Object thrown when attempting to use unimplemented functionality.
Definition: l_exception.h:281
void decode_video(const std::string &fname)
Definition: video.h:97
get the indices of edges in each direction for i
Definition: APPgetLargeConnectedEdges.m:48
float get_frame_rate() const
Definition: video.h:341
Object thrown when input or output fails.
Definition: l_exception.h:496
Code for a wrapper class around the C struct KJB_Image.
virtual float get_frame_rate() const =0
Wrapped version of the C struct KJB_image.
Definition: i_image.h:76
Object thrown when a program lacks required resources or libraries.
Definition: l_exception.h:539
virtual size_t size() const
Definition: video.h:316
Definition: video.h:82
Object thrown when computation fails somehow during execution.
Definition: l_exception.h:321