Discussion:
[Libva] [PATCH] H.264 encoder: add a simple VBR rate control mode
Mark Thompson
2017-01-01 14:28:24 UTC
Permalink
Signed-off-by: Mark Thompson <***@jkqxz.net>
---
From the comment in the code:

// This implements a simple reactive VBR rate controller for single-layer H.264.
// The main idea here is to try to keep the HRD buffer above the target level most of the time,
// so that when a large frame is generated (on a scene change) we have plenty of slack to be
// able to encode it without compromising quality on the following frames. It is optimistic
// about the complexity of future frames, so after generating a large frame on a significant
// change (particularly whole-screen transitions) it will try to keep the QP at its current
// level unless the HRD buffer bounds force a change to maintain the intended rate.

The primary aim of this is to avoid the problematic behaviour that the CBR rate controller has on scene changes, where the QP can get pushed up by a large amount and compromise the quality of following frames to a very visible degree.

To visualise the effect of it, here is the QP and frame sizes of same sequence encoded with the same parameters with the CBR and VBR RC modes:

<http://ixia.jkqxz.net/~mrt/libva/rc/cbr.svg>
<http://ixia.jkqxz.net/~mrt/libva/rc/vbr.svg>

(The two graphs have identical scales. The sequence is the first 10000 frames of Big Buck Bunny (which usefully has very varied complexity): 1280x720 at 60fps, target bitrate 2Mbps, HRD buffer 12Mb, 250 frame GOP, 2 B frames, min QP 18, initial QP 32.)

Note in particular how the spikes in QP from the CBR rate controller are mostly avoided (around frames 1600, 3100, 6300, 9100 in the example), and how the VBR mode has much less variation in the QP level. Also note how the VBR mode often has higher average QP than the CBR mode does, particularly when complexity is decreasing - this is what is lost in the attempt to improve the worst-case behaviour.

Written and tested on gen9; hopefully it works on the older platforms too though I haven't actually tested it. It only works for single-layer video, I haven't considered multiple-layer video at all - probably it wants some code to at least reject that case, but I don't have any test setup for that so I've avoided it for now.

Thanks,

- Mark


src/gen6_mfc.c | 10 ++---
src/gen6_mfc_common.c | 118 ++++++++++++++++++++++++++++++++++++++++++++++++--
src/gen75_mfc.c | 10 ++---
src/gen8_mfc.c | 10 ++---
src/i965_drv_video.c | 5 ++-
5 files changed, 133 insertions(+), 20 deletions(-)

diff --git a/src/gen6_mfc.c b/src/gen6_mfc.c
index 8077c14..1765530 100644
--- a/src/gen6_mfc.c
+++ b/src/gen6_mfc.c
@@ -798,7 +798,7 @@ gen6_mfc_avc_pipeline_slice_programing(VADriverContextP ctx,
int qp_mb;

qp_slice = qp;
- if (rate_control_mode == VA_RC_CBR) {
+ if (rate_control_mode != VA_RC_CQP) {
qp = mfc_context->brc.qp_prime_y[encoder_context->layer.curr_frame_layer_id][slice_type];
if (encode_state->slice_header_index[slice_index] == 0) {
pSliceParameter->slice_qp_delta = qp - pPicParameter->pic_init_qp;
@@ -816,7 +816,7 @@ gen6_mfc_avc_pipeline_slice_programing(VADriverContextP ctx,
pPicParameter,
pSliceParameter,
encode_state, encoder_context,
- (rate_control_mode == VA_RC_CBR), qp_slice, slice_batch);
+ (rate_control_mode != VA_RC_CQP), qp_slice, slice_batch);

if ( slice_index == 0)
intel_mfc_avc_pipeline_header_programing(ctx, encode_state, encoder_context, slice_batch);
@@ -1188,7 +1188,7 @@ gen6_mfc_avc_batchbuffer_slice(VADriverContextP ctx,
int qp_slice;

qp_slice = qp;
- if (rate_control_mode == VA_RC_CBR) {
+ if (rate_control_mode != VA_RC_CQP) {
qp = mfc_context->brc.qp_prime_y[encoder_context->layer.curr_frame_layer_id][slice_type];
if (encode_state->slice_header_index[slice_index] == 0) {
pSliceParameter->slice_qp_delta = qp - pPicParameter->pic_init_qp;
@@ -1209,7 +1209,7 @@ gen6_mfc_avc_batchbuffer_slice(VADriverContextP ctx,
pSliceParameter,
encode_state,
encoder_context,
- (rate_control_mode == VA_RC_CBR),
+ (rate_control_mode != VA_RC_CQP),
qp_slice,
slice_batch);

@@ -1368,7 +1368,7 @@ gen6_mfc_avc_encode_picture(VADriverContextP ctx,
/*Programing bcs pipeline*/
gen6_mfc_avc_pipeline_programing(ctx, encode_state, encoder_context); //filling the pipeline
gen6_mfc_run(ctx, encode_state, encoder_context);
- if (rate_control_mode == VA_RC_CBR /*|| rate_control_mode == VA_RC_VBR*/) {
+ if (rate_control_mode == VA_RC_CBR || rate_control_mode == VA_RC_VBR) {
gen6_mfc_stop(ctx, encode_state, encoder_context, &current_frame_bits_size);
sts = intel_mfc_brc_postpack(encode_state, encoder_context, current_frame_bits_size);
if (sts == BRC_NO_HRD_VIOLATION) {
diff --git a/src/gen6_mfc_common.c b/src/gen6_mfc_common.c
index 8907751..95afa36 100644
--- a/src/gen6_mfc_common.c
+++ b/src/gen6_mfc_common.c
@@ -218,9 +218,9 @@ int intel_mfc_update_hrd(struct encode_state *encode_state,
return BRC_NO_HRD_VIOLATION;
}

-int intel_mfc_brc_postpack(struct encode_state *encode_state,
- struct intel_encoder_context *encoder_context,
- int frame_bits)
+static int intel_mfc_brc_postpack_cbr(struct encode_state *encode_state,
+ struct intel_encoder_context *encoder_context,
+ int frame_bits)
{
struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
gen6_brc_status sts = BRC_NO_HRD_VIOLATION;
@@ -366,6 +366,116 @@ int intel_mfc_brc_postpack(struct encode_state *encode_state,
return sts;
}

+static int intel_mfc_brc_postpack_vbr(struct encode_state *encode_state,
+ struct intel_encoder_context *encoder_context,
+ int frame_bits)
+{
+ struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
+ gen6_brc_status sts;
+ VAEncSliceParameterBufferH264 *pSliceParameter = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer;
+ int slice_type = intel_avc_enc_slice_type_fixup(pSliceParameter->slice_type);
+ int *qp = mfc_context->brc.qp_prime_y[0];
+ int qp_delta, large_frame_adjustment;
+
+ // This implements a simple reactive VBR rate controller for single-layer H.264.
+ // The main idea here is to try to keep the HRD buffer above the target level most of the time,
+ // so that when a large frame is generated (on a scene change) we have plenty of slack to be
+ // able to encode it without compromising quality on the following frames. It is optimistic
+ // about the complexity of future frames, so after generating a large frame on a significant
+ // change (particularly whole-screen transitions) it will try to keep the QP at its current
+ // level unless the HRD buffer bounds force a change to maintain the intended rate.
+
+ sts = intel_mfc_update_hrd(encode_state, encoder_context, frame_bits);
+
+ // This adjustment is applied to increase the QP by more than we normally would if a very
+ // large frame is encountered and we are in danger of running out of slack.
+ large_frame_adjustment = rint(2.0 * log(frame_bits / mfc_context->brc.target_frame_size[0][slice_type]));
+
+ if (sts == BRC_UNDERFLOW) {
+ // The frame is far too big and we don't have the bits available to send it, so it will
+ // have to be re-encoded at a higher QP.
+ qp_delta = +2;
+ if (frame_bits > mfc_context->brc.target_frame_size[0][slice_type])
+ qp_delta += large_frame_adjustment;
+ } else if (sts == BRC_OVERFLOW) {
+ // The frame is very small and we are now overflowing the HRD buffer. Currently this case
+ // does not occur because we ignore overflow in VBR mode.
+ assert(0 && "Overflow in VBR mode");
+ } else if (frame_bits <= mfc_context->brc.target_frame_size[0][slice_type]) {
+ // The frame is smaller than the average size expected for this frame type.
+ if (mfc_context->hrd.current_buffer_fullness[0] >
+ (mfc_context->hrd.target_buffer_fullness[0] + mfc_context->hrd.buffer_size[0]) / 2.0) {
+ // We currently have lots of bits available, so decrease the QP slightly for the next
+ // frame.
+ qp_delta = -1;
+ } else {
+ // The HRD buffer fullness is increasing, so do nothing. (We may be under the target
+ // level here, but are moving in the right direction.)
+ qp_delta = 0;
+ }
+ } else {
+ // The frame is larger than the average size expected for this frame type.
+ if (mfc_context->hrd.current_buffer_fullness[0] > mfc_context->hrd.target_buffer_fullness[0]) {
+ // We are currently over the target level, so do nothing.
+ qp_delta = 0;
+ } else if (mfc_context->hrd.current_buffer_fullness[0] > mfc_context->hrd.target_buffer_fullness[0] / 2.0) {
+ // We are under the target level, but not critically. Increase the QP by one step if
+ // continuing like this would underflow soon (currently within one second).
+ if (mfc_context->hrd.current_buffer_fullness[0] /
+ (double)(frame_bits - mfc_context->brc.target_frame_size[0][slice_type] + 1) <
+ ((double)encoder_context->brc.framerate[0].num / (double)encoder_context->brc.framerate[0].den))
+ qp_delta = +1;
+ else
+ qp_delta = 0;
+ } else {
+ // We are a long way under the target level. Always increase the QP, possibly by a
+ // larger amount dependent on how big the frame we just made actually was.
+ qp_delta = +1 + large_frame_adjustment;
+ }
+ }
+
+ switch (slice_type) {
+ case SLICE_TYPE_I:
+ qp[SLICE_TYPE_I] += qp_delta;
+ qp[SLICE_TYPE_P] = qp[SLICE_TYPE_I] + BRC_I_P_QP_DIFF;
+ qp[SLICE_TYPE_B] = qp[SLICE_TYPE_I] + BRC_I_B_QP_DIFF;
+ break;
+ case SLICE_TYPE_P:
+ qp[SLICE_TYPE_P] += qp_delta;
+ qp[SLICE_TYPE_I] = qp[SLICE_TYPE_P] - BRC_I_P_QP_DIFF;
+ qp[SLICE_TYPE_B] = qp[SLICE_TYPE_P] + BRC_P_B_QP_DIFF;
+ break;
+ case SLICE_TYPE_B:
+ qp[SLICE_TYPE_B] += qp_delta;
+ qp[SLICE_TYPE_I] = qp[SLICE_TYPE_B] - BRC_I_B_QP_DIFF;
+ qp[SLICE_TYPE_P] = qp[SLICE_TYPE_B] - BRC_P_B_QP_DIFF;
+ break;
+ }
+ BRC_CLIP(mfc_context->brc.qp_prime_y[0][SLICE_TYPE_I], (int)encoder_context->brc.min_qp, 51);
+ BRC_CLIP(mfc_context->brc.qp_prime_y[0][SLICE_TYPE_P], (int)encoder_context->brc.min_qp, 51);
+ BRC_CLIP(mfc_context->brc.qp_prime_y[0][SLICE_TYPE_B], (int)encoder_context->brc.min_qp, 51);
+
+ if (sts == BRC_UNDERFLOW && qp[slice_type] == 51)
+ sts = BRC_UNDERFLOW_WITH_MAX_QP;
+ if (sts == BRC_OVERFLOW && qp[slice_type] == encoder_context->brc.min_qp)
+ sts = BRC_OVERFLOW_WITH_MIN_QP;
+
+ return sts;
+}
+
+int intel_mfc_brc_postpack(struct encode_state *encode_state,
+ struct intel_encoder_context *encoder_context,
+ int frame_bits)
+{
+ switch (encoder_context->rate_control_mode) {
+ case VA_RC_CBR:
+ return intel_mfc_brc_postpack_cbr(encode_state, encoder_context, frame_bits);
+ case VA_RC_VBR:
+ return intel_mfc_brc_postpack_vbr(encode_state, encoder_context, frame_bits);
+ }
+ assert(0 && "Invalid RC mode");
+}
+
static void intel_mfc_hrd_context_init(struct encode_state *encode_state,
struct intel_encoder_context *encoder_context)
{
@@ -425,7 +535,7 @@ void intel_mfc_brc_prepare(struct encode_state *encode_state,
encoder_context->codec != CODEC_H264_MVC)
return;

- if (rate_control_mode == VA_RC_CBR) {
+ if (rate_control_mode != VA_RC_CQP) {
/*Programing bit rate control */
if (encoder_context->brc.need_reset) {
intel_mfc_bit_rate_control_context_init(encode_state, encoder_context);
diff --git a/src/gen75_mfc.c b/src/gen75_mfc.c
index 0fbbe76..7b76b99 100644
--- a/src/gen75_mfc.c
+++ b/src/gen75_mfc.c
@@ -1174,7 +1174,7 @@ gen75_mfc_avc_pipeline_slice_programing(VADriverContextP ctx,
int qp_mb;

qp_slice = qp;
- if (rate_control_mode == VA_RC_CBR) {
+ if (rate_control_mode != VA_RC_CQP) {
qp = mfc_context->brc.qp_prime_y[encoder_context->layer.curr_frame_layer_id][slice_type];
if (encode_state->slice_header_index[slice_index] == 0) {
pSliceParameter->slice_qp_delta = qp - pPicParameter->pic_init_qp;
@@ -1192,7 +1192,7 @@ gen75_mfc_avc_pipeline_slice_programing(VADriverContextP ctx,
pPicParameter,
pSliceParameter,
encode_state, encoder_context,
- (rate_control_mode == VA_RC_CBR), qp_slice, slice_batch);
+ (rate_control_mode != VA_RC_CQP), qp_slice, slice_batch);

if ( slice_index == 0)
intel_mfc_avc_pipeline_header_programing(ctx, encode_state, encoder_context, slice_batch);
@@ -1521,7 +1521,7 @@ gen75_mfc_avc_batchbuffer_slice(VADriverContextP ctx,
int qp_slice;

qp_slice = qp;
- if (rate_control_mode == VA_RC_CBR) {
+ if (rate_control_mode != VA_RC_CQP) {
qp = mfc_context->brc.qp_prime_y[encoder_context->layer.curr_frame_layer_id][slice_type];
if (encode_state->slice_header_index[slice_index] == 0) {
pSliceParameter->slice_qp_delta = qp - pPicParameter->pic_init_qp;
@@ -1540,7 +1540,7 @@ gen75_mfc_avc_batchbuffer_slice(VADriverContextP ctx,
pSliceParameter,
encode_state,
encoder_context,
- (rate_control_mode == VA_RC_CBR),
+ (rate_control_mode != VA_RC_CQP),
qp_slice,
slice_batch);

@@ -1702,7 +1702,7 @@ gen75_mfc_avc_encode_picture(VADriverContextP ctx,
/*Programing bcs pipeline*/
gen75_mfc_avc_pipeline_programing(ctx, encode_state, encoder_context); //filling the pipeline
gen75_mfc_run(ctx, encode_state, encoder_context);
- if (rate_control_mode == VA_RC_CBR /*|| rate_control_mode == VA_RC_VBR*/) {
+ if (rate_control_mode == VA_RC_CBR || rate_control_mode == VA_RC_VBR) {
gen75_mfc_stop(ctx, encode_state, encoder_context, &current_frame_bits_size);
sts = intel_mfc_brc_postpack(encode_state, encoder_context, current_frame_bits_size);
if (sts == BRC_NO_HRD_VIOLATION) {
diff --git a/src/gen8_mfc.c b/src/gen8_mfc.c
index 90119d7..8e68c7c 100644
--- a/src/gen8_mfc.c
+++ b/src/gen8_mfc.c
@@ -1177,7 +1177,7 @@ gen8_mfc_avc_pipeline_slice_programing(VADriverContextP ctx,
int qp_mb;

qp_slice = qp;
- if (rate_control_mode == VA_RC_CBR) {
+ if (rate_control_mode != VA_RC_CQP) {
qp = mfc_context->brc.qp_prime_y[encoder_context->layer.curr_frame_layer_id][slice_type];
if (encode_state->slice_header_index[slice_index] == 0) {
pSliceParameter->slice_qp_delta = qp - pPicParameter->pic_init_qp;
@@ -1195,7 +1195,7 @@ gen8_mfc_avc_pipeline_slice_programing(VADriverContextP ctx,
pPicParameter,
pSliceParameter,
encode_state, encoder_context,
- (rate_control_mode == VA_RC_CBR), qp_slice, slice_batch);
+ (rate_control_mode != VA_RC_CQP), qp_slice, slice_batch);

if ( slice_index == 0)
intel_mfc_avc_pipeline_header_programing(ctx, encode_state, encoder_context, slice_batch);
@@ -1534,7 +1534,7 @@ gen8_mfc_avc_batchbuffer_slice(VADriverContextP ctx,
int qp_slice;

qp_slice = qp;
- if (rate_control_mode == VA_RC_CBR) {
+ if (rate_control_mode != VA_RC_CQP) {
qp = mfc_context->brc.qp_prime_y[encoder_context->layer.curr_frame_layer_id][slice_type];
if (encode_state->slice_header_index[slice_index] == 0) {
pSliceParameter->slice_qp_delta = qp - pPicParameter->pic_init_qp;
@@ -1553,7 +1553,7 @@ gen8_mfc_avc_batchbuffer_slice(VADriverContextP ctx,
pSliceParameter,
encode_state,
encoder_context,
- (rate_control_mode == VA_RC_CBR),
+ (rate_control_mode != VA_RC_CQP),
qp_slice,
slice_batch);

@@ -1729,7 +1729,7 @@ gen8_mfc_avc_encode_picture(VADriverContextP ctx,
/*Programing bcs pipeline*/
gen8_mfc_avc_pipeline_programing(ctx, encode_state, encoder_context); //filling the pipeline
gen8_mfc_run(ctx, encode_state, encoder_context);
- if (rate_control_mode == VA_RC_CBR /*|| rate_control_mode == VA_RC_VBR*/) {
+ if (rate_control_mode == VA_RC_CBR || rate_control_mode == VA_RC_VBR) {
gen8_mfc_stop(ctx, encode_state, encoder_context, &current_frame_bits_size);
sts = intel_mfc_brc_postpack(encode_state, encoder_context, current_frame_bits_size);
if (sts == BRC_NO_HRD_VIOLATION) {
diff --git a/src/i965_drv_video.c b/src/i965_drv_video.c
index 51a708c..b5e4c17 100644
--- a/src/i965_drv_video.c
+++ b/src/i965_drv_video.c
@@ -936,7 +936,10 @@ i965_GetConfigAttributes(VADriverContextP ctx,
profile != VAProfileMPEG2Simple)
attrib_list[i].value |= VA_RC_CBR;

- if (profile == VAProfileVP9Profile0)
+ if (profile == VAProfileVP9Profile0 ||
+ profile == VAProfileH264ConstrainedBaseline ||
+ profile == VAProfileH264Main ||
+ profile == VAProfileH264High)
attrib_list[i].value |= VA_RC_VBR;

break;
--
2.11.0
Xiang, Haihao
2017-01-06 07:09:50 UTC
Permalink
Post by Mark Thompson
---
// This implements a simple reactive VBR rate controller for single-layer H.264.
// The main idea here is to try to keep the HRD buffer above the target level
most of the time,
// so that when a large frame is generated (on a scene change) we have plenty
of slack to be
// able to encode it without compromising quality on the following frames.  It
is optimistic
// about the complexity of future frames, so after generating a large frame on
a significant
// change (particularly whole-screen transitions) it will try to keep the QP at its current
// level unless the HRD buffer bounds force a change to maintain the intended rate.
The primary aim of this is to avoid the problematic behaviour that the CBR
rate controller has on scene changes, where the QP can get pushed up by a
large amount and compromise the quality of following frames to a very visible
degree.
To visualise the effect of it, here is the QP and frame sizes of same sequence
<http://ixia.jkqxz.net/~mrt/libva/rc/cbr.svg>
<http://ixia.jkqxz.net/~mrt/libva/rc/vbr.svg>
(The two graphs have identical scales.  The sequence is the first 10000 frames
of Big Buck Bunny (which usefully has very varied complexity): 1280x720 at
60fps, target bitrate 2Mbps, HRD buffer 12Mb, 250 frame GOP, 2 B frames, min
QP 18, initial QP 32.)
Note in particular how the spikes in QP from the CBR rate controller are
mostly avoided (around frames 1600, 3100, 6300, 9100 in the example), and how
the VBR mode has much less variation in the QP level.  Also note how the VBR
mode often has higher average QP than the CBR mode does, particularly when
complexity is decreasing - this is what is lost in the attempt to improve the
worst-case behaviour.
Written and tested on gen9; hopefully it works on the older platforms too
though I haven't actually tested it.  It only works for single-layer video, I
haven't considered multiple-layer video at all - probably it wants some code
to at least reject that case, but I don't have any test setup for that so I've
avoided it for now.
Could you add the above message in your commit log? I don't see the message
after applying this patch to my local branch.

The expected target bitrate for VBR is (target_percentage * bits_per_second), 
e.g. for vp9

vp9_state->target_bit_rate = vp9_state->max_bit_rate * encoder_context-
Post by Mark Thompson
brc.target_percentage[0] / 100
we should keep the same behavior for all codecs.

Thanks
Haihao
Post by Mark Thompson
Thanks,
- Mark
 src/gen6_mfc.c        |  10 ++---
 src/gen6_mfc_common.c | 118 ++++++++++++++++++++++++++++++++++++++++++++++++-
-
 src/gen75_mfc.c       |  10 ++---
 src/gen8_mfc.c        |  10 ++---
 src/i965_drv_video.c  |   5 ++-
 5 files changed, 133 insertions(+), 20 deletions(-)
diff --git a/src/gen6_mfc.c b/src/gen6_mfc.c
index 8077c14..1765530 100644
--- a/src/gen6_mfc.c
+++ b/src/gen6_mfc.c
@@ -798,7 +798,7 @@ gen6_mfc_avc_pipeline_slice_programing(VADriverContextP ctx,
     int qp_mb;
 
     qp_slice = qp;
-    if (rate_control_mode == VA_RC_CBR) {
+    if (rate_control_mode != VA_RC_CQP) {
         qp = mfc_context->brc.qp_prime_y[encoder_context-
Post by Mark Thompson
layer.curr_frame_layer_id][slice_type];
         if (encode_state->slice_header_index[slice_index] == 0) {
             pSliceParameter->slice_qp_delta = qp - pPicParameter-
Post by Mark Thompson
pic_init_qp;
@@ -816,7 +816,7 @@ gen6_mfc_avc_pipeline_slice_programing(VADriverContextP ctx,
                              pPicParameter,
                              pSliceParameter,
                              encode_state, encoder_context,
-                             (rate_control_mode == VA_RC_CBR), qp_slice,
slice_batch);
+                             (rate_control_mode != VA_RC_CQP), qp_slice,
slice_batch);
 
     if ( slice_index == 0)
         intel_mfc_avc_pipeline_header_programing(ctx, encode_state,
encoder_context, slice_batch);
@@ -1188,7 +1188,7 @@ gen6_mfc_avc_batchbuffer_slice(VADriverContextP ctx,
     int qp_slice;
 
     qp_slice = qp;
-    if (rate_control_mode == VA_RC_CBR) {
+    if (rate_control_mode != VA_RC_CQP) {
         qp = mfc_context->brc.qp_prime_y[encoder_context-
Post by Mark Thompson
layer.curr_frame_layer_id][slice_type];
         if (encode_state->slice_header_index[slice_index] == 0) {
             pSliceParameter->slice_qp_delta = qp - pPicParameter-
Post by Mark Thompson
pic_init_qp;
@@ -1209,7 +1209,7 @@ gen6_mfc_avc_batchbuffer_slice(VADriverContextP ctx,
                              pSliceParameter,
                              encode_state,
                              encoder_context,
-                             (rate_control_mode == VA_RC_CBR),
+                             (rate_control_mode != VA_RC_CQP),
                              qp_slice,
                              slice_batch);
 
@@ -1368,7 +1368,7 @@ gen6_mfc_avc_encode_picture(VADriverContextP ctx,
         /*Programing bcs pipeline*/
         gen6_mfc_avc_pipeline_programing(ctx, encode_state, encoder_context);
//filling the pipeline
         gen6_mfc_run(ctx, encode_state, encoder_context);
-        if (rate_control_mode == VA_RC_CBR /*|| rate_control_mode ==
VA_RC_VBR*/) {
+        if (rate_control_mode == VA_RC_CBR || rate_control_mode == VA_RC_VBR)
{
             gen6_mfc_stop(ctx, encode_state, encoder_context,
&current_frame_bits_size);
             sts = intel_mfc_brc_postpack(encode_state, encoder_context,
current_frame_bits_size);
             if (sts == BRC_NO_HRD_VIOLATION) {
diff --git a/src/gen6_mfc_common.c b/src/gen6_mfc_common.c
index 8907751..95afa36 100644
--- a/src/gen6_mfc_common.c
+++ b/src/gen6_mfc_common.c
@@ -218,9 +218,9 @@ int intel_mfc_update_hrd(struct encode_state *encode_state,
     return BRC_NO_HRD_VIOLATION;
 }
 
-int intel_mfc_brc_postpack(struct encode_state *encode_state,
-                           struct intel_encoder_context *encoder_context,
-                           int frame_bits)
+static int intel_mfc_brc_postpack_cbr(struct encode_state *encode_state,
+                                      struct intel_encoder_context
*encoder_context,
+                                      int frame_bits)
 {
     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
     gen6_brc_status sts = BRC_NO_HRD_VIOLATION;
@@ -366,6 +366,116 @@ int intel_mfc_brc_postpack(struct encode_state *encode_state,
     return sts;
 }
 
+static int intel_mfc_brc_postpack_vbr(struct encode_state *encode_state,
+                                      struct intel_encoder_context
*encoder_context,
+                                      int frame_bits)
+{
+    struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
+    gen6_brc_status sts;
+    VAEncSliceParameterBufferH264 *pSliceParameter =
(VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer;
+    int slice_type = intel_avc_enc_slice_type_fixup(pSliceParameter-
Post by Mark Thompson
slice_type);
+    int *qp = mfc_context->brc.qp_prime_y[0];
+    int qp_delta, large_frame_adjustment;
+
+    // This implements a simple reactive VBR rate controller for single-layer
H.264.
+    // The main idea here is to try to keep the HRD buffer above the target
level most of the time,
+    // so that when a large frame is generated (on a scene change) we have
plenty of slack to be
+    // able to encode it without compromising quality on the following
frames.  It is optimistic
+    // about the complexity of future frames, so after generating a large
frame on a significant
+    // change (particularly whole-screen transitions) it will try to keep the
QP at its current
+    // level unless the HRD buffer bounds force a change to maintain the
intended rate.
+
+    sts = intel_mfc_update_hrd(encode_state, encoder_context, frame_bits);
+
+    // This adjustment is applied to increase the QP by more than we normally
would if a very
+    // large frame is encountered and we are in danger of running out of
slack.
+    large_frame_adjustment = rint(2.0 * log(frame_bits / mfc_context-
Post by Mark Thompson
brc.target_frame_size[0][slice_type]));
+
+    if (sts == BRC_UNDERFLOW) {
+        // The frame is far too big and we don't have the bits available to
send it, so it will
+        // have to be re-encoded at a higher QP.
+        qp_delta = +2;
+        if (frame_bits > mfc_context->brc.target_frame_size[0][slice_type])
+            qp_delta += large_frame_adjustment;
+    } else if (sts == BRC_OVERFLOW) {
+        // The frame is very small and we are now overflowing the HRD
buffer.  Currently this case
+        // does not occur because we ignore overflow in VBR mode.
+        assert(0 && "Overflow in VBR mode");
+    } else if (frame_bits <= mfc_context-
Post by Mark Thompson
brc.target_frame_size[0][slice_type]) {
+        // The frame is smaller than the average size expected for this frame
type.
+        if (mfc_context->hrd.current_buffer_fullness[0] >
+            (mfc_context->hrd.target_buffer_fullness[0] + mfc_context-
Post by Mark Thompson
hrd.buffer_size[0]) / 2.0) {
+            // We currently have lots of bits available, so decrease the QP
slightly for the next
+            // frame.
+            qp_delta = -1;
+        } else {
+            // The HRD buffer fullness is increasing, so do nothing.  (We may
be under the target
+            // level here, but are moving in the right direction.)
+            qp_delta = 0;
+        }
+    } else {
+        // The frame is larger than the average size expected for this frame
type.
+        if (mfc_context->hrd.current_buffer_fullness[0] > mfc_context-
Post by Mark Thompson
hrd.target_buffer_fullness[0]) {
+            // We are currently over the target level, so do nothing.
+            qp_delta = 0;
+        } else if (mfc_context->hrd.current_buffer_fullness[0] > mfc_context-
Post by Mark Thompson
hrd.target_buffer_fullness[0] / 2.0) {
+            // We are under the target level, but not critically.  Increase
the QP by one step if
+            // continuing like this would underflow soon (currently within
one second).
+            if (mfc_context->hrd.current_buffer_fullness[0] /
+                (double)(frame_bits - mfc_context-
Post by Mark Thompson
brc.target_frame_size[0][slice_type] + 1) <
+                ((double)encoder_context->brc.framerate[0].num /
(double)encoder_context->brc.framerate[0].den))
+                qp_delta = +1;
+            else
+                qp_delta = 0;
+        } else {
+            // We are a long way under the target level.  Always increase the
QP, possibly by a
+            // larger amount dependent on how big the frame we just made
actually was.
+            qp_delta = +1 + large_frame_adjustment;
+        }
+    }
+
+    switch (slice_type) {
+        qp[SLICE_TYPE_I] += qp_delta;
+        qp[SLICE_TYPE_P]  = qp[SLICE_TYPE_I] + BRC_I_P_QP_DIFF;
+        qp[SLICE_TYPE_B]  = qp[SLICE_TYPE_I] + BRC_I_B_QP_DIFF;
+        break;
+        qp[SLICE_TYPE_P] += qp_delta;
+        qp[SLICE_TYPE_I]  = qp[SLICE_TYPE_P] - BRC_I_P_QP_DIFF;
+        qp[SLICE_TYPE_B]  = qp[SLICE_TYPE_P] + BRC_P_B_QP_DIFF;
+        break;
+        qp[SLICE_TYPE_B] += qp_delta;
+        qp[SLICE_TYPE_I]  = qp[SLICE_TYPE_B] - BRC_I_B_QP_DIFF;
+        qp[SLICE_TYPE_P]  = qp[SLICE_TYPE_B] - BRC_P_B_QP_DIFF;
+        break;
+    }
+    BRC_CLIP(mfc_context->brc.qp_prime_y[0][SLICE_TYPE_I],
(int)encoder_context->brc.min_qp, 51);
+    BRC_CLIP(mfc_context->brc.qp_prime_y[0][SLICE_TYPE_P],
(int)encoder_context->brc.min_qp, 51);
+    BRC_CLIP(mfc_context->brc.qp_prime_y[0][SLICE_TYPE_B],
(int)encoder_context->brc.min_qp, 51);
+
+    if (sts == BRC_UNDERFLOW && qp[slice_type] == 51)
+        sts = BRC_UNDERFLOW_WITH_MAX_QP;
+    if (sts == BRC_OVERFLOW && qp[slice_type] == encoder_context->brc.min_qp)
+        sts = BRC_OVERFLOW_WITH_MIN_QP;
+
+    return sts;
+}
+
+int intel_mfc_brc_postpack(struct encode_state *encode_state,
+                           struct intel_encoder_context *encoder_context,
+                           int frame_bits)
+{
+    switch (encoder_context->rate_control_mode) {
+        return intel_mfc_brc_postpack_cbr(encode_state, encoder_context,
frame_bits);
+        return intel_mfc_brc_postpack_vbr(encode_state, encoder_context,
frame_bits);
+    }
+    assert(0 && "Invalid RC mode");
+}
+
 static void intel_mfc_hrd_context_init(struct encode_state *encode_state,
                                        struct intel_encoder_context
*encoder_context)
 {
@@ -425,7 +535,7 @@ void intel_mfc_brc_prepare(struct encode_state *encode_state,
         encoder_context->codec != CODEC_H264_MVC)
         return;
 
-    if (rate_control_mode == VA_RC_CBR) {
+    if (rate_control_mode != VA_RC_CQP) {
         /*Programing bit rate control */
         if (encoder_context->brc.need_reset) {
             intel_mfc_bit_rate_control_context_init(encode_state,
encoder_context);
diff --git a/src/gen75_mfc.c b/src/gen75_mfc.c
index 0fbbe76..7b76b99 100644
--- a/src/gen75_mfc.c
+++ b/src/gen75_mfc.c
@@ -1174,7 +1174,7 @@ gen75_mfc_avc_pipeline_slice_programing(VADriverContextP ctx,
     int qp_mb;
 
     qp_slice = qp;
-    if (rate_control_mode == VA_RC_CBR) {
+    if (rate_control_mode != VA_RC_CQP) {
         qp = mfc_context->brc.qp_prime_y[encoder_context-
Post by Mark Thompson
layer.curr_frame_layer_id][slice_type];
         if (encode_state->slice_header_index[slice_index] == 0) {
             pSliceParameter->slice_qp_delta = qp - pPicParameter-
Post by Mark Thompson
pic_init_qp;
@@ -1192,7 +1192,7 @@ gen75_mfc_avc_pipeline_slice_programing(VADriverContextP ctx,
                               pPicParameter,
                               pSliceParameter,
                               encode_state, encoder_context,
-                              (rate_control_mode == VA_RC_CBR), qp_slice,
slice_batch);
+                              (rate_control_mode != VA_RC_CQP), qp_slice,
slice_batch);
 
     if ( slice_index == 0)
         intel_mfc_avc_pipeline_header_programing(ctx, encode_state,
encoder_context, slice_batch);
@@ -1521,7 +1521,7 @@ gen75_mfc_avc_batchbuffer_slice(VADriverContextP ctx,
     int qp_slice;
 
     qp_slice = qp;
-    if (rate_control_mode == VA_RC_CBR) {
+    if (rate_control_mode != VA_RC_CQP) {
         qp = mfc_context->brc.qp_prime_y[encoder_context-
Post by Mark Thompson
layer.curr_frame_layer_id][slice_type];
         if (encode_state->slice_header_index[slice_index] == 0) {
             pSliceParameter->slice_qp_delta = qp - pPicParameter-
Post by Mark Thompson
pic_init_qp;
@@ -1540,7 +1540,7 @@ gen75_mfc_avc_batchbuffer_slice(VADriverContextP ctx,
                               pSliceParameter,
                               encode_state,
                               encoder_context,
-                              (rate_control_mode == VA_RC_CBR),
+                              (rate_control_mode != VA_RC_CQP),
                               qp_slice,
                               slice_batch);
 
@@ -1702,7 +1702,7 @@ gen75_mfc_avc_encode_picture(VADriverContextP ctx,
         /*Programing bcs pipeline*/
         gen75_mfc_avc_pipeline_programing(ctx, encode_state,
encoder_context); //filling the pipeline
         gen75_mfc_run(ctx, encode_state, encoder_context);
-        if (rate_control_mode == VA_RC_CBR /*|| rate_control_mode ==
VA_RC_VBR*/) {
+        if (rate_control_mode == VA_RC_CBR || rate_control_mode == VA_RC_VBR)
{
             gen75_mfc_stop(ctx, encode_state, encoder_context,
&current_frame_bits_size);
             sts = intel_mfc_brc_postpack(encode_state, encoder_context,
current_frame_bits_size);
             if (sts == BRC_NO_HRD_VIOLATION) {
diff --git a/src/gen8_mfc.c b/src/gen8_mfc.c
index 90119d7..8e68c7c 100644
--- a/src/gen8_mfc.c
+++ b/src/gen8_mfc.c
@@ -1177,7 +1177,7 @@ gen8_mfc_avc_pipeline_slice_programing(VADriverContextP ctx,
     int qp_mb;
 
     qp_slice = qp;
-    if (rate_control_mode == VA_RC_CBR) {
+    if (rate_control_mode != VA_RC_CQP) {
         qp = mfc_context->brc.qp_prime_y[encoder_context-
Post by Mark Thompson
layer.curr_frame_layer_id][slice_type];
         if (encode_state->slice_header_index[slice_index] == 0) {
             pSliceParameter->slice_qp_delta = qp - pPicParameter-
Post by Mark Thompson
pic_init_qp;
@@ -1195,7 +1195,7 @@ gen8_mfc_avc_pipeline_slice_programing(VADriverContextP ctx,
                              pPicParameter,
                              pSliceParameter,
                              encode_state, encoder_context,
-                             (rate_control_mode == VA_RC_CBR), qp_slice,
slice_batch);
+                             (rate_control_mode != VA_RC_CQP), qp_slice,
slice_batch);
 
     if ( slice_index == 0)
         intel_mfc_avc_pipeline_header_programing(ctx, encode_state,
encoder_context, slice_batch);
@@ -1534,7 +1534,7 @@ gen8_mfc_avc_batchbuffer_slice(VADriverContextP ctx,
     int qp_slice;
 
     qp_slice = qp;
-    if (rate_control_mode == VA_RC_CBR) {
+    if (rate_control_mode != VA_RC_CQP) {
         qp = mfc_context->brc.qp_prime_y[encoder_context-
Post by Mark Thompson
layer.curr_frame_layer_id][slice_type];
         if (encode_state->slice_header_index[slice_index] == 0) {
             pSliceParameter->slice_qp_delta = qp - pPicParameter-
Post by Mark Thompson
pic_init_qp;
@@ -1553,7 +1553,7 @@ gen8_mfc_avc_batchbuffer_slice(VADriverContextP ctx,
                               pSliceParameter,
                               encode_state,
                               encoder_context,
-                              (rate_control_mode == VA_RC_CBR),
+                              (rate_control_mode != VA_RC_CQP),
                               qp_slice,
                               slice_batch);
 
@@ -1729,7 +1729,7 @@ gen8_mfc_avc_encode_picture(VADriverContextP ctx,
         /*Programing bcs pipeline*/
         gen8_mfc_avc_pipeline_programing(ctx, encode_state, encoder_context);
//filling the pipeline
         gen8_mfc_run(ctx, encode_state, encoder_context);
-        if (rate_control_mode == VA_RC_CBR /*|| rate_control_mode ==
VA_RC_VBR*/) {
+        if (rate_control_mode == VA_RC_CBR || rate_control_mode == VA_RC_VBR)
{
             gen8_mfc_stop(ctx, encode_state, encoder_context,
&current_frame_bits_size);
             sts = intel_mfc_brc_postpack(encode_state, encoder_context,
current_frame_bits_size);
             if (sts == BRC_NO_HRD_VIOLATION) {
diff --git a/src/i965_drv_video.c b/src/i965_drv_video.c
index 51a708c..b5e4c17 100644
--- a/src/i965_drv_video.c
+++ b/src/i965_drv_video.c
@@ -936,7 +936,10 @@ i965_GetConfigAttributes(VADriverContextP ctx,
                     profile != VAProfileMPEG2Simple)
                     attrib_list[i].value |= VA_RC_CBR;
 
-                if (profile == VAProfileVP9Profile0)
+                if (profile == VAProfileVP9Profile0 ||
+                    profile == VAProfileH264ConstrainedBaseline ||
+                    profile == VAProfileH264Main ||
+                    profile == VAProfileH264High)
                     attrib_list[i].value |= VA_RC_VBR;
 
                 break;
Mark Thompson
2017-01-08 22:37:02 UTC
Permalink
This implements a simple reactive VBR rate control mode for single-layer H.264.
The primary aim here is to avoid the problematic behaviour that the CBR rate
controller displays on scene changes, where the QP can get pushed up by a large
amount in a short period and compromise the quality of following frames to a
very visible degree.

The main idea, then, is to try to keep the HRD buffering above the target level
most of the time, so that when a large frame is generated (on a scene change or
when the stream complexity increases) we have plenty of slack to be able to
encode the more difficult region without compromising quality immediately on
the following frames. It is optimistic about the complexity of future frames,
so even after generating one or more large frames on a significant change it
will try to keep the QP at its current level until the HRD buffer bounds force
a change to maintain the intended rate.

Compared to the CBR rate controller, it keeps the quality level much more
stable - QP does not always spike up as large frames are generated when the
complexity of the stream increases transiently, but equally it does not reduce
as quickly when the complexity of the stream decreases.

Signed-off-by: Mark Thompson <***@jkqxz.net>
---
Post by Xiang, Haihao
Post by Mark Thompson
...
Could you add the above message in your commit log? I don't see the message
after applying this patch to my local branch.
It was more meant as a general explanation rather than a commit message. But yes, it should probably have something more. I've rewritten the comment in the code to be a bit more complete and also set it as the commit message above.
Post by Xiang, Haihao
The expected target bitrate for VBR is (target_percentage * bits_per_second),
e.g. for vp9
vp9_state->target_bit_rate = vp9_state->max_bit_rate * encoder_context-
Post by Mark Thompson
brc.target_percentage[0] / 100
we should keep the same behavior for all codecs.
Hmm. I initially did do this, but decided it wasn't quite right because this rate controller doesn't really have a maximum bitrate - it is constrained only by the HRD buffering.

Still, the point about keeping the behaviour consistent is probably more important, so I've made the simple change to multiply by the percentage. It might be worth considering later how the maximum bitrate should actually be treated here, though.

Thanks,

- Mark


src/gen6_mfc.c | 10 ++--
src/gen6_mfc_common.c | 125 ++++++++++++++++++++++++++++++++++++++++++++++++--
src/gen75_mfc.c | 10 ++--
src/gen8_mfc.c | 10 ++--
src/i965_drv_video.c | 5 +-
5 files changed, 140 insertions(+), 20 deletions(-)

diff --git a/src/gen6_mfc.c b/src/gen6_mfc.c
index 8077c14..1765530 100644
--- a/src/gen6_mfc.c
+++ b/src/gen6_mfc.c
@@ -798,7 +798,7 @@ gen6_mfc_avc_pipeline_slice_programing(VADriverContextP ctx,
int qp_mb;

qp_slice = qp;
- if (rate_control_mode == VA_RC_CBR) {
+ if (rate_control_mode != VA_RC_CQP) {
qp = mfc_context->brc.qp_prime_y[encoder_context->layer.curr_frame_layer_id][slice_type];
if (encode_state->slice_header_index[slice_index] == 0) {
pSliceParameter->slice_qp_delta = qp - pPicParameter->pic_init_qp;
@@ -816,7 +816,7 @@ gen6_mfc_avc_pipeline_slice_programing(VADriverContextP ctx,
pPicParameter,
pSliceParameter,
encode_state, encoder_context,
- (rate_control_mode == VA_RC_CBR), qp_slice, slice_batch);
+ (rate_control_mode != VA_RC_CQP), qp_slice, slice_batch);

if ( slice_index == 0)
intel_mfc_avc_pipeline_header_programing(ctx, encode_state, encoder_context, slice_batch);
@@ -1188,7 +1188,7 @@ gen6_mfc_avc_batchbuffer_slice(VADriverContextP ctx,
int qp_slice;

qp_slice = qp;
- if (rate_control_mode == VA_RC_CBR) {
+ if (rate_control_mode != VA_RC_CQP) {
qp = mfc_context->brc.qp_prime_y[encoder_context->layer.curr_frame_layer_id][slice_type];
if (encode_state->slice_header_index[slice_index] == 0) {
pSliceParameter->slice_qp_delta = qp - pPicParameter->pic_init_qp;
@@ -1209,7 +1209,7 @@ gen6_mfc_avc_batchbuffer_slice(VADriverContextP ctx,
pSliceParameter,
encode_state,
encoder_context,
- (rate_control_mode == VA_RC_CBR),
+ (rate_control_mode != VA_RC_CQP),
qp_slice,
slice_batch);

@@ -1368,7 +1368,7 @@ gen6_mfc_avc_encode_picture(VADriverContextP ctx,
/*Programing bcs pipeline*/
gen6_mfc_avc_pipeline_programing(ctx, encode_state, encoder_context); //filling the pipeline
gen6_mfc_run(ctx, encode_state, encoder_context);
- if (rate_control_mode == VA_RC_CBR /*|| rate_control_mode == VA_RC_VBR*/) {
+ if (rate_control_mode == VA_RC_CBR || rate_control_mode == VA_RC_VBR) {
gen6_mfc_stop(ctx, encode_state, encoder_context, &current_frame_bits_size);
sts = intel_mfc_brc_postpack(encode_state, encoder_context, current_frame_bits_size);
if (sts == BRC_NO_HRD_VIOLATION) {
diff --git a/src/gen6_mfc_common.c b/src/gen6_mfc_common.c
index fbedc94..9e0bb55 100644
--- a/src/gen6_mfc_common.c
+++ b/src/gen6_mfc_common.c
@@ -127,6 +127,9 @@ static void intel_mfc_brc_init(struct encode_state *encode_state,
((double)encoder_context->brc.framerate[i - 1].num / (double)encoder_context->brc.framerate[i - 1].den);
}

+ if (mfc_context->brc.mode == VA_RC_VBR && encoder_context->brc.target_percentage[i])
+ bitrate = bitrate * encoder_context->brc.target_percentage[i] / 100;
+
if (i == encoder_context->layer.num_layers - 1)
factor = 1.0;
else {
@@ -219,9 +222,9 @@ int intel_mfc_update_hrd(struct encode_state *encode_state,
return BRC_NO_HRD_VIOLATION;
}

-int intel_mfc_brc_postpack(struct encode_state *encode_state,
- struct intel_encoder_context *encoder_context,
- int frame_bits)
+static int intel_mfc_brc_postpack_cbr(struct encode_state *encode_state,
+ struct intel_encoder_context *encoder_context,
+ int frame_bits)
{
struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
gen6_brc_status sts = BRC_NO_HRD_VIOLATION;
@@ -368,6 +371,120 @@ int intel_mfc_brc_postpack(struct encode_state *encode_state,
return sts;
}

+static int intel_mfc_brc_postpack_vbr(struct encode_state *encode_state,
+ struct intel_encoder_context *encoder_context,
+ int frame_bits)
+{
+ struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
+ gen6_brc_status sts;
+ VAEncSliceParameterBufferH264 *pSliceParameter = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer;
+ int slice_type = intel_avc_enc_slice_type_fixup(pSliceParameter->slice_type);
+ int *qp = mfc_context->brc.qp_prime_y[0];
+ int qp_delta, large_frame_adjustment;
+
+ // This implements a simple reactive VBR rate control mode for single-layer H.264. The primary
+ // aim here is to avoid the problematic behaviour that the CBR rate controller displays on
+ // scene changes, where the QP can get pushed up by a large amount in a short period and
+ // compromise the quality of following frames to a very visible degree.
+ // The main idea, then, is to try to keep the HRD buffering above the target level most of the
+ // time, so that when a large frame is generated (on a scene change or when the stream
+ // complexity increases) we have plenty of slack to be able to encode the more difficult region
+ // without compromising quality immediately on the following frames. It is optimistic about
+ // the complexity of future frames, so even after generating one or more large frames on a
+ // significant change it will try to keep the QP at its current level until the HRD buffer
+ // bounds force a change to maintain the intended rate.
+
+ sts = intel_mfc_update_hrd(encode_state, encoder_context, frame_bits);
+
+ // This adjustment is applied to increase the QP by more than we normally would if a very
+ // large frame is encountered and we are in danger of running out of slack.
+ large_frame_adjustment = rint(2.0 * log(frame_bits / mfc_context->brc.target_frame_size[0][slice_type]));
+
+ if (sts == BRC_UNDERFLOW) {
+ // The frame is far too big and we don't have the bits available to send it, so it will
+ // have to be re-encoded at a higher QP.
+ qp_delta = +2;
+ if (frame_bits > mfc_context->brc.target_frame_size[0][slice_type])
+ qp_delta += large_frame_adjustment;
+ } else if (sts == BRC_OVERFLOW) {
+ // The frame is very small and we are now overflowing the HRD buffer. Currently this case
+ // does not occur because we ignore overflow in VBR mode.
+ assert(0 && "Overflow in VBR mode");
+ } else if (frame_bits <= mfc_context->brc.target_frame_size[0][slice_type]) {
+ // The frame is smaller than the average size expected for this frame type.
+ if (mfc_context->hrd.current_buffer_fullness[0] >
+ (mfc_context->hrd.target_buffer_fullness[0] + mfc_context->hrd.buffer_size[0]) / 2.0) {
+ // We currently have lots of bits available, so decrease the QP slightly for the next
+ // frame.
+ qp_delta = -1;
+ } else {
+ // The HRD buffer fullness is increasing, so do nothing. (We may be under the target
+ // level here, but are moving in the right direction.)
+ qp_delta = 0;
+ }
+ } else {
+ // The frame is larger than the average size expected for this frame type.
+ if (mfc_context->hrd.current_buffer_fullness[0] > mfc_context->hrd.target_buffer_fullness[0]) {
+ // We are currently over the target level, so do nothing.
+ qp_delta = 0;
+ } else if (mfc_context->hrd.current_buffer_fullness[0] > mfc_context->hrd.target_buffer_fullness[0] / 2.0) {
+ // We are under the target level, but not critically. Increase the QP by one step if
+ // continuing like this would underflow soon (currently within one second).
+ if (mfc_context->hrd.current_buffer_fullness[0] /
+ (double)(frame_bits - mfc_context->brc.target_frame_size[0][slice_type] + 1) <
+ ((double)encoder_context->brc.framerate[0].num / (double)encoder_context->brc.framerate[0].den))
+ qp_delta = +1;
+ else
+ qp_delta = 0;
+ } else {
+ // We are a long way under the target level. Always increase the QP, possibly by a
+ // larger amount dependent on how big the frame we just made actually was.
+ qp_delta = +1 + large_frame_adjustment;
+ }
+ }
+
+ switch (slice_type) {
+ case SLICE_TYPE_I:
+ qp[SLICE_TYPE_I] += qp_delta;
+ qp[SLICE_TYPE_P] = qp[SLICE_TYPE_I] + BRC_I_P_QP_DIFF;
+ qp[SLICE_TYPE_B] = qp[SLICE_TYPE_I] + BRC_I_B_QP_DIFF;
+ break;
+ case SLICE_TYPE_P:
+ qp[SLICE_TYPE_P] += qp_delta;
+ qp[SLICE_TYPE_I] = qp[SLICE_TYPE_P] - BRC_I_P_QP_DIFF;
+ qp[SLICE_TYPE_B] = qp[SLICE_TYPE_P] + BRC_P_B_QP_DIFF;
+ break;
+ case SLICE_TYPE_B:
+ qp[SLICE_TYPE_B] += qp_delta;
+ qp[SLICE_TYPE_I] = qp[SLICE_TYPE_B] - BRC_I_B_QP_DIFF;
+ qp[SLICE_TYPE_P] = qp[SLICE_TYPE_B] - BRC_P_B_QP_DIFF;
+ break;
+ }
+ BRC_CLIP(mfc_context->brc.qp_prime_y[0][SLICE_TYPE_I], (int)encoder_context->brc.min_qp, 51);
+ BRC_CLIP(mfc_context->brc.qp_prime_y[0][SLICE_TYPE_P], (int)encoder_context->brc.min_qp, 51);
+ BRC_CLIP(mfc_context->brc.qp_prime_y[0][SLICE_TYPE_B], (int)encoder_context->brc.min_qp, 51);
+
+ if (sts == BRC_UNDERFLOW && qp[slice_type] == 51)
+ sts = BRC_UNDERFLOW_WITH_MAX_QP;
+ if (sts == BRC_OVERFLOW && qp[slice_type] == encoder_context->brc.min_qp)
+ sts = BRC_OVERFLOW_WITH_MIN_QP;
+
+ return sts;
+}
+
+int intel_mfc_brc_postpack(struct encode_state *encode_state,
+ struct intel_encoder_context *encoder_context,
+ int frame_bits)
+{
+ switch (encoder_context->rate_control_mode) {
+ case VA_RC_CBR:
+ return intel_mfc_brc_postpack_cbr(encode_state, encoder_context, frame_bits);
+ case VA_RC_VBR:
+ return intel_mfc_brc_postpack_vbr(encode_state, encoder_context, frame_bits);
+ }
+ assert(0 && "Invalid RC mode");
+}
+
static void intel_mfc_hrd_context_init(struct encode_state *encode_state,
struct intel_encoder_context *encoder_context)
{
@@ -427,7 +544,7 @@ void intel_mfc_brc_prepare(struct encode_state *encode_state,
encoder_context->codec != CODEC_H264_MVC)
return;

- if (rate_control_mode == VA_RC_CBR) {
+ if (rate_control_mode != VA_RC_CQP) {
/*Programing bit rate control */
if (encoder_context->brc.need_reset) {
intel_mfc_bit_rate_control_context_init(encode_state, encoder_context);
diff --git a/src/gen75_mfc.c b/src/gen75_mfc.c
index 0fbbe76..7b76b99 100644
--- a/src/gen75_mfc.c
+++ b/src/gen75_mfc.c
@@ -1174,7 +1174,7 @@ gen75_mfc_avc_pipeline_slice_programing(VADriverContextP ctx,
int qp_mb;

qp_slice = qp;
- if (rate_control_mode == VA_RC_CBR) {
+ if (rate_control_mode != VA_RC_CQP) {
qp = mfc_context->brc.qp_prime_y[encoder_context->layer.curr_frame_layer_id][slice_type];
if (encode_state->slice_header_index[slice_index] == 0) {
pSliceParameter->slice_qp_delta = qp - pPicParameter->pic_init_qp;
@@ -1192,7 +1192,7 @@ gen75_mfc_avc_pipeline_slice_programing(VADriverContextP ctx,
pPicParameter,
pSliceParameter,
encode_state, encoder_context,
- (rate_control_mode == VA_RC_CBR), qp_slice, slice_batch);
+ (rate_control_mode != VA_RC_CQP), qp_slice, slice_batch);

if ( slice_index == 0)
intel_mfc_avc_pipeline_header_programing(ctx, encode_state, encoder_context, slice_batch);
@@ -1521,7 +1521,7 @@ gen75_mfc_avc_batchbuffer_slice(VADriverContextP ctx,
int qp_slice;

qp_slice = qp;
- if (rate_control_mode == VA_RC_CBR) {
+ if (rate_control_mode != VA_RC_CQP) {
qp = mfc_context->brc.qp_prime_y[encoder_context->layer.curr_frame_layer_id][slice_type];
if (encode_state->slice_header_index[slice_index] == 0) {
pSliceParameter->slice_qp_delta = qp - pPicParameter->pic_init_qp;
@@ -1540,7 +1540,7 @@ gen75_mfc_avc_batchbuffer_slice(VADriverContextP ctx,
pSliceParameter,
encode_state,
encoder_context,
- (rate_control_mode == VA_RC_CBR),
+ (rate_control_mode != VA_RC_CQP),
qp_slice,
slice_batch);

@@ -1702,7 +1702,7 @@ gen75_mfc_avc_encode_picture(VADriverContextP ctx,
/*Programing bcs pipeline*/
gen75_mfc_avc_pipeline_programing(ctx, encode_state, encoder_context); //filling the pipeline
gen75_mfc_run(ctx, encode_state, encoder_context);
- if (rate_control_mode == VA_RC_CBR /*|| rate_control_mode == VA_RC_VBR*/) {
+ if (rate_control_mode == VA_RC_CBR || rate_control_mode == VA_RC_VBR) {
gen75_mfc_stop(ctx, encode_state, encoder_context, &current_frame_bits_size);
sts = intel_mfc_brc_postpack(encode_state, encoder_context, current_frame_bits_size);
if (sts == BRC_NO_HRD_VIOLATION) {
diff --git a/src/gen8_mfc.c b/src/gen8_mfc.c
index 90119d7..8e68c7c 100644
--- a/src/gen8_mfc.c
+++ b/src/gen8_mfc.c
@@ -1177,7 +1177,7 @@ gen8_mfc_avc_pipeline_slice_programing(VADriverContextP ctx,
int qp_mb;

qp_slice = qp;
- if (rate_control_mode == VA_RC_CBR) {
+ if (rate_control_mode != VA_RC_CQP) {
qp = mfc_context->brc.qp_prime_y[encoder_context->layer.curr_frame_layer_id][slice_type];
if (encode_state->slice_header_index[slice_index] == 0) {
pSliceParameter->slice_qp_delta = qp - pPicParameter->pic_init_qp;
@@ -1195,7 +1195,7 @@ gen8_mfc_avc_pipeline_slice_programing(VADriverContextP ctx,
pPicParameter,
pSliceParameter,
encode_state, encoder_context,
- (rate_control_mode == VA_RC_CBR), qp_slice, slice_batch);
+ (rate_control_mode != VA_RC_CQP), qp_slice, slice_batch);

if ( slice_index == 0)
intel_mfc_avc_pipeline_header_programing(ctx, encode_state, encoder_context, slice_batch);
@@ -1534,7 +1534,7 @@ gen8_mfc_avc_batchbuffer_slice(VADriverContextP ctx,
int qp_slice;

qp_slice = qp;
- if (rate_control_mode == VA_RC_CBR) {
+ if (rate_control_mode != VA_RC_CQP) {
qp = mfc_context->brc.qp_prime_y[encoder_context->layer.curr_frame_layer_id][slice_type];
if (encode_state->slice_header_index[slice_index] == 0) {
pSliceParameter->slice_qp_delta = qp - pPicParameter->pic_init_qp;
@@ -1553,7 +1553,7 @@ gen8_mfc_avc_batchbuffer_slice(VADriverContextP ctx,
pSliceParameter,
encode_state,
encoder_context,
- (rate_control_mode == VA_RC_CBR),
+ (rate_control_mode != VA_RC_CQP),
qp_slice,
slice_batch);

@@ -1729,7 +1729,7 @@ gen8_mfc_avc_encode_picture(VADriverContextP ctx,
/*Programing bcs pipeline*/
gen8_mfc_avc_pipeline_programing(ctx, encode_state, encoder_context); //filling the pipeline
gen8_mfc_run(ctx, encode_state, encoder_context);
- if (rate_control_mode == VA_RC_CBR /*|| rate_control_mode == VA_RC_VBR*/) {
+ if (rate_control_mode == VA_RC_CBR || rate_control_mode == VA_RC_VBR) {
gen8_mfc_stop(ctx, encode_state, encoder_context, &current_frame_bits_size);
sts = intel_mfc_brc_postpack(encode_state, encoder_context, current_frame_bits_size);
if (sts == BRC_NO_HRD_VIOLATION) {
diff --git a/src/i965_drv_video.c b/src/i965_drv_video.c
index 76cb915..cc37190 100644
--- a/src/i965_drv_video.c
+++ b/src/i965_drv_video.c
@@ -936,7 +936,10 @@ i965_GetConfigAttributes(VADriverContextP ctx,
profile != VAProfileMPEG2Simple)
attrib_list[i].value |= VA_RC_CBR;

- if (profile == VAProfileVP9Profile0)
+ if (profile == VAProfileVP9Profile0 ||
+ profile == VAProfileH264ConstrainedBaseline ||
+ profile == VAProfileH264Main ||
+ profile == VAProfileH264High)
attrib_list[i].value |= VA_RC_VBR;

break;
--
2.11.0
Xiang, Haihao
2017-01-09 05:23:56 UTC
Permalink
Post by Mark Thompson
This implements a simple reactive VBR rate control mode for single-layer H.264.
The primary aim here is to avoid the problematic behaviour that the CBR rate
controller displays on scene changes, where the QP can get pushed up by a large
amount in a short period and compromise the quality of following frames to a
very visible degree.
The main idea, then, is to try to keep the HRD buffering above the target level
most of the time, so that when a large frame is generated (on a scene change or
when the stream complexity increases) we have plenty of slack to be able to
encode the more difficult region without compromising quality immediately on
the following frames.   It is optimistic about the complexity of future frames,
so even after generating one or more large frames on a significant change it
will try to keep the QP at its current level until the HRD buffer bounds force
a change to maintain the intended rate.
Compared to the CBR rate controller, it keeps the quality level much more
stable - QP does not always spike up as large frames are generated when the
complexity of the stream increases transiently, but equally it does not reduce
as quickly when the complexity of the stream decreases.
---
Post by Xiang, Haihao
Post by Mark Thompson
...
Could you add the above message in your commit log? I don't see the message
after applying this patch to my local branch.
It was more meant as a general explanation rather than a commit message.  But yes, it should probably have something more.  I've rewritten the comment in the code to be a bit more complete and also set it as the commit message above.
Post by Xiang, Haihao
The expected target bitrate for VBR is (target_percentage * bits_per_second),
e.g. for vp9
vp9_state->target_bit_rate = vp9_state->max_bit_rate * encoder_context-
Post by Mark Thompson
brc.target_percentage[0] / 100
we should keep the same behavior for all codecs.
Hmm.  I initially did do this, but decided it wasn't quite right because this rate controller doesn't really have a maximum bitrate - it is constrained only by the HRD buffering.
Still, the point about keeping the behaviour consistent is probably more important, so I've made the simple change to multiply by the percentage.  It might be worth considering later how the maximum bitrate should actually be treated here, though.
Thanks,
- Mark
 src/gen6_mfc.c        |  10 ++--
 src/gen6_mfc_common.c | 125 ++++++++++++++++++++++++++++++++++++++++++++++++--
 src/gen75_mfc.c       |  10 ++--
 src/gen8_mfc.c        |  10 ++--
 src/i965_drv_video.c  |   5 +-
 5 files changed, 140 insertions(+), 20 deletions(-)
diff --git a/src/gen6_mfc.c b/src/gen6_mfc.c
index 8077c14..1765530 100644
--- a/src/gen6_mfc.c
+++ b/src/gen6_mfc.c
@@ -798,7 +798,7 @@ gen6_mfc_avc_pipeline_slice_programing(VADriverContextP ctx,
     int qp_mb;
 
     qp_slice = qp;
-    if (rate_control_mode == VA_RC_CBR) {
+    if (rate_control_mode != VA_RC_CQP) {
         qp = mfc_context->brc.qp_prime_y[encoder_context->layer.curr_frame_layer_id][slice_type];
         if (encode_state->slice_header_index[slice_index] == 0) {
             pSliceParameter->slice_qp_delta = qp - pPicParameter->pic_init_qp;
@@ -816,7 +816,7 @@ gen6_mfc_avc_pipeline_slice_programing(VADriverContextP ctx,
                              pPicParameter,
                              pSliceParameter,
                              encode_state, encoder_context,
-                             (rate_control_mode == VA_RC_CBR), qp_slice, slice_batch);
+                             (rate_control_mode != VA_RC_CQP), qp_slice, slice_batch);
 
     if ( slice_index == 0)
         intel_mfc_avc_pipeline_header_programing(ctx, encode_state, encoder_context, slice_batch);
@@ -1188,7 +1188,7 @@ gen6_mfc_avc_batchbuffer_slice(VADriverContextP ctx,
     int qp_slice;
 
     qp_slice = qp;
-    if (rate_control_mode == VA_RC_CBR) {
+    if (rate_control_mode != VA_RC_CQP) {
         qp = mfc_context->brc.qp_prime_y[encoder_context->layer.curr_frame_layer_id][slice_type];
         if (encode_state->slice_header_index[slice_index] == 0) {
             pSliceParameter->slice_qp_delta = qp - pPicParameter->pic_init_qp;
@@ -1209,7 +1209,7 @@ gen6_mfc_avc_batchbuffer_slice(VADriverContextP ctx,
                              pSliceParameter,
                              encode_state,
                              encoder_context,
-                             (rate_control_mode == VA_RC_CBR),
+                             (rate_control_mode != VA_RC_CQP),
                              qp_slice,
                              slice_batch);
 
@@ -1368,7 +1368,7 @@ gen6_mfc_avc_encode_picture(VADriverContextP ctx,
         /*Programing bcs pipeline*/
         gen6_mfc_avc_pipeline_programing(ctx, encode_state, encoder_context);> > //filling the pipeline
         gen6_mfc_run(ctx, encode_state, encoder_context);
-        if (rate_control_mode == VA_RC_CBR /*|| rate_control_mode == VA_RC_VBR*/) {
+        if (rate_control_mode == VA_RC_CBR || rate_control_mode == VA_RC_VBR) {
             gen6_mfc_stop(ctx, encode_state, encoder_context, ¤t_frame_bits_size);
             sts = intel_mfc_brc_postpack(encode_state, encoder_context, current_frame_bits_size);
             if (sts == BRC_NO_HRD_VIOLATION) {
diff --git a/src/gen6_mfc_common.c b/src/gen6_mfc_common.c
index fbedc94..9e0bb55 100644
--- a/src/gen6_mfc_common.c
+++ b/src/gen6_mfc_common.c
@@ -127,6 +127,9 @@ static void intel_mfc_brc_init(struct encode_state *encode_state,
                 ((double)encoder_context->brc.framerate[i - 1].num / (double)encoder_context->brc.framerate[i - 1].den);
         }
 
+        if (mfc_context->brc.mode == VA_RC_VBR && encoder_context->brc.target_percentage[i])
+            bitrate = bitrate * encoder_context->brc.target_percentage[i] / 100;
+
         if (i == encoder_context->layer.num_layers - 1)
             factor = 1.0;
         else {
@@ -219,9 +222,9 @@ int intel_mfc_update_hrd(struct encode_state *encode_state,
     return BRC_NO_HRD_VIOLATION;
 }
 
-int intel_mfc_brc_postpack(struct encode_state *encode_state,
-                           struct intel_encoder_context *encoder_context,
-                           int frame_bits)
+static int intel_mfc_brc_postpack_cbr(struct encode_state *encode_state,
+                                      struct intel_encoder_context *encoder_context,
+                                      int frame_bits)
 {
     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
     gen6_brc_status sts = BRC_NO_HRD_VIOLATION;
@@ -368,6 +371,120 @@ int intel_mfc_brc_postpack(struct encode_state *encode_state,
     return sts;
 }
 
+static int intel_mfc_brc_postpack_vbr(struct encode_state *encode_state,
+                                      struct intel_encoder_context *encoder_context,
+                                      int frame_bits)
+{
+    struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
+    gen6_brc_status sts;
+    VAEncSliceParameterBufferH264 *pSliceParameter = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer;
+    int slice_type = intel_avc_enc_slice_type_fixup(pSliceParameter->slice_type);
+    int *qp = mfc_context->brc.qp_prime_y[0];
+    int qp_delta, large_frame_adjustment;
+
+    // This implements a simple reactive VBR rate control mode for single-layer H.264.  The primary
+    // aim here is to avoid the problematic behaviour that the CBR rate controller displays on
+    // scene changes, where the QP can get pushed up by a large amount in a short period and
+    // compromise the quality of following frames to a very visible degree.
+    // The main idea, then, is to try to keep the HRD buffering above the target level most of the
+    // time, so that when a large frame is generated (on a scene change or when the stream
+    // complexity increases) we have plenty of slack to be able to encode the more difficult region
+    // without compromising quality immediately on the following frames.   It is optimistic about
+    // the complexity of future frames, so even after generating one or more large frames on a
+    // significant change it will try to keep the QP at its current level until the HRD buffer
+    // bounds force a change to maintain the intended rate.
+
+    sts = intel_mfc_update_hrd(encode_state, encoder_context, frame_bits);
+
+    // This adjustment is applied to increase the QP by more than we normally would if a very
+    // large frame is encountered and we are in danger of running out of slack.
+    large_frame_adjustment = rint(2.0 * log(frame_bits / mfc_context->brc.target_frame_size[0][slice_type]));
+
+    if (sts == BRC_UNDERFLOW) {
+        // The frame is far too big and we don't have the bits available to send it, so it will
+        // have to be re-encoded at a higher QP.
+        qp_delta = +2;
+        if (frame_bits > mfc_context->brc.target_frame_size[0][slice_type])
+            qp_delta += large_frame_adjustment;
+    } else if (sts == BRC_OVERFLOW) {
+        // The frame is very small and we are now overflowing the HRD buffer.  Currently this case
+        // does not occur because we ignore overflow in VBR mode.
+        assert(0 && "Overflow in VBR mode");
+    } else if (frame_bits <= mfc_context->brc.target_frame_size[0][slice_type]) {
+        // The frame is smaller than the average size expected for this frame type.
+        if (mfc_context->hrd.current_buffer_fullness[0] >
+            (mfc_context->hrd.target_buffer_fullness[0] + mfc_context->hrd.buffer_size[0]) / 2.0) {
+            // We currently have lots of bits available, so decrease the QP slightly for the next
+            // frame.
+            qp_delta = -1;
+        } else {
+            // The HRD buffer fullness is increasing, so do nothing.  (We may be under the target
+            // level here, but are moving in the right direction.)
+            qp_delta = 0;
+        }
+    } else {
+        // The frame is larger than the average size expected for this frame type.
+        if (mfc_context->hrd.current_buffer_fullness[0] > mfc_context->hrd.target_buffer_fullness[0]) {
+            // We are currently over the target level, so do nothing.
+            qp_delta = 0;
+        } else if (mfc_context->hrd.current_buffer_fullness[0] > mfc_context->hrd.target_buffer_fullness[0] / 2.0) {
+            // We are under the target level, but not critically.  Increase the QP by one step if
+            // continuing like this would underflow soon (currently within one second).
+            if (mfc_context->hrd.current_buffer_fullness[0] /
+                (double)(frame_bits - mfc_context->brc.target_frame_size[0][slice_type] + 1) <
+                ((double)encoder_context->brc.framerate[0].num / (double)encoder_context->brc.framerate[0].den))
+                qp_delta = +1;
+            else
+                qp_delta = 0;
+        } else {
+            // We are a long way under the target level.  Always increase the QP, possibly by a
+            // larger amount dependent on how big the frame we just made actually was.
+            qp_delta = +1 + large_frame_adjustment;
+        }
+    }
+
+    switch (slice_type) {
+        qp[SLICE_TYPE_I] += qp_delta;
+        qp[SLICE_TYPE_P]  = qp[SLICE_TYPE_I] + BRC_I_P_QP_DIFF;
+        qp[SLICE_TYPE_B]  = qp[SLICE_TYPE_I] + BRC_I_B_QP_DIFF;
+        break;
+        qp[SLICE_TYPE_P] += qp_delta;
+        qp[SLICE_TYPE_I]  = qp[SLICE_TYPE_P] - BRC_I_P_QP_DIFF;
+        qp[SLICE_TYPE_B]  = qp[SLICE_TYPE_P] + BRC_P_B_QP_DIFF;
+        break;
+        qp[SLICE_TYPE_B] += qp_delta;
+        qp[SLICE_TYPE_I]  = qp[SLICE_TYPE_B] - BRC_I_B_QP_DIFF;
+        qp[SLICE_TYPE_P]  = qp[SLICE_TYPE_B] - BRC_P_B_QP_DIFF;
+        break;
+    }
+    BRC_CLIP(mfc_context->brc.qp_prime_y[0][SLICE_TYPE_I], (int)encoder_context->brc.min_qp, 51);
+    BRC_CLIP(mfc_context->brc.qp_prime_y[0][SLICE_TYPE_P], (int)encoder_context->brc.min_qp, 51);
+    BRC_CLIP(mfc_context->brc.qp_prime_y[0][SLICE_TYPE_B], (int)encoder_context->brc.min_qp, 51);
The lower bound is 1 when encoder_context->brc.min_qp is equal to 0.
Post by Mark Thompson
+
+    if (sts == BRC_UNDERFLOW && qp[slice_type] == 51)
+        sts = BRC_UNDERFLOW_WITH_MAX_QP;
+    if (sts == BRC_OVERFLOW && qp[slice_type] == encoder_context->brc.min_qp)
Same as above
Post by Mark Thompson
+        sts = BRC_OVERFLOW_WITH_MIN_QP;
+
+    return sts;
+}
+
+int intel_mfc_brc_postpack(struct encode_state *encode_state,
+                           struct intel_encoder_context *encoder_context,
+                           int frame_bits)
+{
+    switch (encoder_context->rate_control_mode) {
+        return intel_mfc_brc_postpack_cbr(encode_state, encoder_context, frame_bits);
+        return intel_mfc_brc_postpack_vbr(encode_state, encoder_context, frame_bits);
+    }
+    assert(0 && "Invalid RC mode");
+}
+
 static void intel_mfc_hrd_context_init(struct encode_state *encode_state,
                                        struct intel_encoder_context *encoder_context)
 {
@@ -427,7 +544,7 @@ void intel_mfc_brc_prepare(struct encode_state *encode_state,
         encoder_context->codec != CODEC_H264_MVC)
         return;
 
-    if (rate_control_mode == VA_RC_CBR) {
+    if (rate_control_mode != VA_RC_CQP) {
         /*Programing bit rate control */
         if (encoder_context->brc.need_reset) {
             intel_mfc_bit_rate_control_context_init(encode_state, encoder_context);
diff --git a/src/gen75_mfc.c b/src/gen75_mfc.c
index 0fbbe76..7b76b99 100644
--- a/src/gen75_mfc.c
+++ b/src/gen75_mfc.c
@@ -1174,7 +1174,7 @@ gen75_mfc_avc_pipeline_slice_programing(VADriverContextP ctx,
     int qp_mb;
 
     qp_slice = qp;
-    if (rate_control_mode == VA_RC_CBR) {
+    if (rate_control_mode != VA_RC_CQP) {
         qp = mfc_context->brc.qp_prime_y[encoder_context->layer.curr_frame_layer_id][slice_type];
         if (encode_state->slice_header_index[slice_index] == 0) {
             pSliceParameter->slice_qp_delta = qp - pPicParameter->pic_init_qp;
@@ -1192,7 +1192,7 @@ gen75_mfc_avc_pipeline_slice_programing(VADriverContextP ctx,
                               pPicParameter,
                               pSliceParameter,
                               encode_state, encoder_context,
-                              (rate_control_mode == VA_RC_CBR), qp_slice, slice_batch);
+                              (rate_control_mode != VA_RC_CQP), qp_slice, slice_batch);
 
     if ( slice_index == 0)
         intel_mfc_avc_pipeline_header_programing(ctx, encode_state, encoder_context, slice_batch);
@@ -1521,7 +1521,7 @@ gen75_mfc_avc_batchbuffer_slice(VADriverContextP ctx,
     int qp_slice;
 
     qp_slice = qp;
-    if (rate_control_mode == VA_RC_CBR) {
+    if (rate_control_mode != VA_RC_CQP) {
         qp = mfc_context->brc.qp_prime_y[encoder_context->layer.curr_frame_layer_id][slice_type];
         if (encode_state->slice_header_index[slice_index] == 0) {
             pSliceParameter->slice_qp_delta = qp - pPicParameter->pic_init_qp;
@@ -1540,7 +1540,7 @@ gen75_mfc_avc_batchbuffer_slice(VADriverContextP ctx,
                               pSliceParameter,
                               encode_state,
                               encoder_context,
-                              (rate_control_mode == VA_RC_CBR),
+                              (rate_control_mode != VA_RC_CQP),
                               qp_slice,
                               slice_batch);
 
@@ -1702,7 +1702,7 @@ gen75_mfc_avc_encode_picture(VADriverContextP ctx,
         /*Programing bcs pipeline*/
         gen75_mfc_avc_pipeline_programing(ctx, encode_state, encoder_context);> > //filling the pipeline
         gen75_mfc_run(ctx, encode_state, encoder_context);
-        if (rate_control_mode == VA_RC_CBR /*|| rate_control_mode == VA_RC_VBR*/) {
+        if (rate_control_mode == VA_RC_CBR || rate_control_mode == VA_RC_VBR) {
             gen75_mfc_stop(ctx, encode_state, encoder_context, ¤t_frame_bits_size);
             sts = intel_mfc_brc_postpack(encode_state, encoder_context, current_frame_bits_size);
             if (sts == BRC_NO_HRD_VIOLATION) {
diff --git a/src/gen8_mfc.c b/src/gen8_mfc.c
index 90119d7..8e68c7c 100644
--- a/src/gen8_mfc.c
+++ b/src/gen8_mfc.c
@@ -1177,7 +1177,7 @@ gen8_mfc_avc_pipeline_slice_programing(VADriverContextP ctx,
     int qp_mb;
 
     qp_slice = qp;
-    if (rate_control_mode == VA_RC_CBR) {
+    if (rate_control_mode != VA_RC_CQP) {
         qp = mfc_context->brc.qp_prime_y[encoder_context->layer.curr_frame_layer_id][slice_type];
         if (encode_state->slice_header_index[slice_index] == 0) {
             pSliceParameter->slice_qp_delta = qp - pPicParameter->pic_init_qp;
@@ -1195,7 +1195,7 @@ gen8_mfc_avc_pipeline_slice_programing(VADriverContextP ctx,
                              pPicParameter,
                              pSliceParameter,
                              encode_state, encoder_context,
-                             (rate_control_mode == VA_RC_CBR), qp_slice, slice_batch);
+                             (rate_control_mode != VA_RC_CQP), qp_slice, slice_batch);
 
     if ( slice_index == 0)
         intel_mfc_avc_pipeline_header_programing(ctx, encode_state, encoder_context, slice_batch);
@@ -1534,7 +1534,7 @@ gen8_mfc_avc_batchbuffer_slice(VADriverContextP ctx,
     int qp_slice;
 
     qp_slice = qp;
-    if (rate_control_mode == VA_RC_CBR) {
+    if (rate_control_mode != VA_RC_CQP) {
         qp = mfc_context->brc.qp_prime_y[encoder_context->layer.curr_frame_layer_id][slice_type];
         if (encode_state->slice_header_index[slice_index] == 0) {
             pSliceParameter->slice_qp_delta = qp - pPicParameter->pic_init_qp;
@@ -1553,7 +1553,7 @@ gen8_mfc_avc_batchbuffer_slice(VADriverContextP ctx,
                               pSliceParameter,
                               encode_state,
                               encoder_context,
-                              (rate_control_mode == VA_RC_CBR),
+                              (rate_control_mode != VA_RC_CQP),
                               qp_slice,
                               slice_batch);
 
@@ -1729,7 +1729,7 @@ gen8_mfc_avc_encode_picture(VADriverContextP ctx,
         /*Programing bcs pipeline*/
         gen8_mfc_avc_pipeline_programing(ctx, encode_state, encoder_context);> > //filling the pipeline
         gen8_mfc_run(ctx, encode_state, encoder_context);
-        if (rate_control_mode == VA_RC_CBR /*|| rate_control_mode == VA_RC_VBR*/) {
+        if (rate_control_mode == VA_RC_CBR || rate_control_mode == VA_RC_VBR) {
             gen8_mfc_stop(ctx, encode_state, encoder_context, ¤t_frame_bits_size);
             sts = intel_mfc_brc_postpack(encode_state, encoder_context, current_frame_bits_size);
             if (sts == BRC_NO_HRD_VIOLATION) {
diff --git a/src/i965_drv_video.c b/src/i965_drv_video.c
index 76cb915..cc37190 100644
--- a/src/i965_drv_video.c
+++ b/src/i965_drv_video.c
@@ -936,7 +936,10 @@ i965_GetConfigAttributes(VADriverContextP ctx,
                     profile != VAProfileMPEG2Simple)
                     attrib_list[i].value |= VA_RC_CBR;
 
-                if (profile == VAProfileVP9Profile0)
+                if (profile == VAProfileVP9Profile0 ||
+                    profile == VAProfileH264ConstrainedBaseline ||
+                    profile == VAProfileH264Main ||
+                    profile == VAProfileH264High)
                     attrib_list[i].value |= VA_RC_VBR;
 
                 break;
Mark Thompson
2017-01-10 00:04:43 UTC
Permalink
This implements a simple reactive VBR rate control mode for single-layer H.264.
The primary aim here is to avoid the problematic behaviour that the CBR rate
controller displays on scene changes, where the QP can get pushed up by a large
amount in a short period and compromise the quality of following frames to a
very visible degree.

The main idea, then, is to try to keep the HRD buffering above the target level
most of the time, so that when a large frame is generated (on a scene change or
when the stream complexity increases) we have plenty of slack to be able to
encode the more difficult region without compromising quality immediately on
the following frames. It is optimistic about the complexity of future frames,
so even after generating one or more large frames on a significant change it
will try to keep the QP at its current level until the HRD buffer bounds force
a change to maintain the intended rate.

Compared to the CBR rate controller, it keeps the quality level much more
stable - QP does not always spike up as large frames are generated when the
complexity of the stream increases transiently, but equally it does not reduce
as quickly when the complexity of the stream decreases.

Signed-off-by: Mark Thompson <***@jkqxz.net>
---
Post by Mark Thompson
+ BRC_CLIP(mfc_context->brc.qp_prime_y[0][SLICE_TYPE_I], (int)encoder_context->brc.min_qp, 51);
+ BRC_CLIP(mfc_context->brc.qp_prime_y[0][SLICE_TYPE_P], (int)encoder_context->brc.min_qp, 51);
+ BRC_CLIP(mfc_context->brc.qp_prime_y[0][SLICE_TYPE_B], (int)encoder_context->brc.min_qp, 51);
The lower bound is 1 when encoder_context->brc.min_qp is equal to 0.
Post by Mark Thompson
+
+ if (sts == BRC_UNDERFLOW && qp[slice_type] == 51)
+ sts = BRC_UNDERFLOW_WITH_MAX_QP;
+ if (sts == BRC_OVERFLOW && qp[slice_type] == encoder_context->brc.min_qp)
Same as above
Apologies, I missed updating it to match 33a32935ac9e2622adc5c59045d565b4e5904749.

Fixed in the same way as that patch in the version.

Thanks,

- Mark


src/gen6_mfc.c | 10 ++--
src/gen6_mfc_common.c | 126 ++++++++++++++++++++++++++++++++++++++++++++++++--
src/gen75_mfc.c | 10 ++--
src/gen8_mfc.c | 10 ++--
src/i965_drv_video.c | 5 +-
5 files changed, 141 insertions(+), 20 deletions(-)

diff --git a/src/gen6_mfc.c b/src/gen6_mfc.c
index 8077c14..1765530 100644
--- a/src/gen6_mfc.c
+++ b/src/gen6_mfc.c
@@ -798,7 +798,7 @@ gen6_mfc_avc_pipeline_slice_programing(VADriverContextP ctx,
int qp_mb;

qp_slice = qp;
- if (rate_control_mode == VA_RC_CBR) {
+ if (rate_control_mode != VA_RC_CQP) {
qp = mfc_context->brc.qp_prime_y[encoder_context->layer.curr_frame_layer_id][slice_type];
if (encode_state->slice_header_index[slice_index] == 0) {
pSliceParameter->slice_qp_delta = qp - pPicParameter->pic_init_qp;
@@ -816,7 +816,7 @@ gen6_mfc_avc_pipeline_slice_programing(VADriverContextP ctx,
pPicParameter,
pSliceParameter,
encode_state, encoder_context,
- (rate_control_mode == VA_RC_CBR), qp_slice, slice_batch);
+ (rate_control_mode != VA_RC_CQP), qp_slice, slice_batch);

if ( slice_index == 0)
intel_mfc_avc_pipeline_header_programing(ctx, encode_state, encoder_context, slice_batch);
@@ -1188,7 +1188,7 @@ gen6_mfc_avc_batchbuffer_slice(VADriverContextP ctx,
int qp_slice;

qp_slice = qp;
- if (rate_control_mode == VA_RC_CBR) {
+ if (rate_control_mode != VA_RC_CQP) {
qp = mfc_context->brc.qp_prime_y[encoder_context->layer.curr_frame_layer_id][slice_type];
if (encode_state->slice_header_index[slice_index] == 0) {
pSliceParameter->slice_qp_delta = qp - pPicParameter->pic_init_qp;
@@ -1209,7 +1209,7 @@ gen6_mfc_avc_batchbuffer_slice(VADriverContextP ctx,
pSliceParameter,
encode_state,
encoder_context,
- (rate_control_mode == VA_RC_CBR),
+ (rate_control_mode != VA_RC_CQP),
qp_slice,
slice_batch);

@@ -1368,7 +1368,7 @@ gen6_mfc_avc_encode_picture(VADriverContextP ctx,
/*Programing bcs pipeline*/
gen6_mfc_avc_pipeline_programing(ctx, encode_state, encoder_context); //filling the pipeline
gen6_mfc_run(ctx, encode_state, encoder_context);
- if (rate_control_mode == VA_RC_CBR /*|| rate_control_mode == VA_RC_VBR*/) {
+ if (rate_control_mode == VA_RC_CBR || rate_control_mode == VA_RC_VBR) {
gen6_mfc_stop(ctx, encode_state, encoder_context, &current_frame_bits_size);
sts = intel_mfc_brc_postpack(encode_state, encoder_context, current_frame_bits_size);
if (sts == BRC_NO_HRD_VIOLATION) {
diff --git a/src/gen6_mfc_common.c b/src/gen6_mfc_common.c
index fbedc94..0d21a11 100644
--- a/src/gen6_mfc_common.c
+++ b/src/gen6_mfc_common.c
@@ -127,6 +127,9 @@ static void intel_mfc_brc_init(struct encode_state *encode_state,
((double)encoder_context->brc.framerate[i - 1].num / (double)encoder_context->brc.framerate[i - 1].den);
}

+ if (mfc_context->brc.mode == VA_RC_VBR && encoder_context->brc.target_percentage[i])
+ bitrate = bitrate * encoder_context->brc.target_percentage[i] / 100;
+
if (i == encoder_context->layer.num_layers - 1)
factor = 1.0;
else {
@@ -219,9 +222,9 @@ int intel_mfc_update_hrd(struct encode_state *encode_state,
return BRC_NO_HRD_VIOLATION;
}

-int intel_mfc_brc_postpack(struct encode_state *encode_state,
- struct intel_encoder_context *encoder_context,
- int frame_bits)
+static int intel_mfc_brc_postpack_cbr(struct encode_state *encode_state,
+ struct intel_encoder_context *encoder_context,
+ int frame_bits)
{
struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
gen6_brc_status sts = BRC_NO_HRD_VIOLATION;
@@ -368,6 +371,121 @@ int intel_mfc_brc_postpack(struct encode_state *encode_state,
return sts;
}

+static int intel_mfc_brc_postpack_vbr(struct encode_state *encode_state,
+ struct intel_encoder_context *encoder_context,
+ int frame_bits)
+{
+ struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
+ gen6_brc_status sts;
+ VAEncSliceParameterBufferH264 *pSliceParameter = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer;
+ int slice_type = intel_avc_enc_slice_type_fixup(pSliceParameter->slice_type);
+ int *qp = mfc_context->brc.qp_prime_y[0];
+ int min_qp = MAX(1, encoder_context->brc.min_qp);
+ int qp_delta, large_frame_adjustment;
+
+ // This implements a simple reactive VBR rate control mode for single-layer H.264. The primary
+ // aim here is to avoid the problematic behaviour that the CBR rate controller displays on
+ // scene changes, where the QP can get pushed up by a large amount in a short period and
+ // compromise the quality of following frames to a very visible degree.
+ // The main idea, then, is to try to keep the HRD buffering above the target level most of the
+ // time, so that when a large frame is generated (on a scene change or when the stream
+ // complexity increases) we have plenty of slack to be able to encode the more difficult region
+ // without compromising quality immediately on the following frames. It is optimistic about
+ // the complexity of future frames, so even after generating one or more large frames on a
+ // significant change it will try to keep the QP at its current level until the HRD buffer
+ // bounds force a change to maintain the intended rate.
+
+ sts = intel_mfc_update_hrd(encode_state, encoder_context, frame_bits);
+
+ // This adjustment is applied to increase the QP by more than we normally would if a very
+ // large frame is encountered and we are in danger of running out of slack.
+ large_frame_adjustment = rint(2.0 * log(frame_bits / mfc_context->brc.target_frame_size[0][slice_type]));
+
+ if (sts == BRC_UNDERFLOW) {
+ // The frame is far too big and we don't have the bits available to send it, so it will
+ // have to be re-encoded at a higher QP.
+ qp_delta = +2;
+ if (frame_bits > mfc_context->brc.target_frame_size[0][slice_type])
+ qp_delta += large_frame_adjustment;
+ } else if (sts == BRC_OVERFLOW) {
+ // The frame is very small and we are now overflowing the HRD buffer. Currently this case
+ // does not occur because we ignore overflow in VBR mode.
+ assert(0 && "Overflow in VBR mode");
+ } else if (frame_bits <= mfc_context->brc.target_frame_size[0][slice_type]) {
+ // The frame is smaller than the average size expected for this frame type.
+ if (mfc_context->hrd.current_buffer_fullness[0] >
+ (mfc_context->hrd.target_buffer_fullness[0] + mfc_context->hrd.buffer_size[0]) / 2.0) {
+ // We currently have lots of bits available, so decrease the QP slightly for the next
+ // frame.
+ qp_delta = -1;
+ } else {
+ // The HRD buffer fullness is increasing, so do nothing. (We may be under the target
+ // level here, but are moving in the right direction.)
+ qp_delta = 0;
+ }
+ } else {
+ // The frame is larger than the average size expected for this frame type.
+ if (mfc_context->hrd.current_buffer_fullness[0] > mfc_context->hrd.target_buffer_fullness[0]) {
+ // We are currently over the target level, so do nothing.
+ qp_delta = 0;
+ } else if (mfc_context->hrd.current_buffer_fullness[0] > mfc_context->hrd.target_buffer_fullness[0] / 2.0) {
+ // We are under the target level, but not critically. Increase the QP by one step if
+ // continuing like this would underflow soon (currently within one second).
+ if (mfc_context->hrd.current_buffer_fullness[0] /
+ (double)(frame_bits - mfc_context->brc.target_frame_size[0][slice_type] + 1) <
+ ((double)encoder_context->brc.framerate[0].num / (double)encoder_context->brc.framerate[0].den))
+ qp_delta = +1;
+ else
+ qp_delta = 0;
+ } else {
+ // We are a long way under the target level. Always increase the QP, possibly by a
+ // larger amount dependent on how big the frame we just made actually was.
+ qp_delta = +1 + large_frame_adjustment;
+ }
+ }
+
+ switch (slice_type) {
+ case SLICE_TYPE_I:
+ qp[SLICE_TYPE_I] += qp_delta;
+ qp[SLICE_TYPE_P] = qp[SLICE_TYPE_I] + BRC_I_P_QP_DIFF;
+ qp[SLICE_TYPE_B] = qp[SLICE_TYPE_I] + BRC_I_B_QP_DIFF;
+ break;
+ case SLICE_TYPE_P:
+ qp[SLICE_TYPE_P] += qp_delta;
+ qp[SLICE_TYPE_I] = qp[SLICE_TYPE_P] - BRC_I_P_QP_DIFF;
+ qp[SLICE_TYPE_B] = qp[SLICE_TYPE_P] + BRC_P_B_QP_DIFF;
+ break;
+ case SLICE_TYPE_B:
+ qp[SLICE_TYPE_B] += qp_delta;
+ qp[SLICE_TYPE_I] = qp[SLICE_TYPE_B] - BRC_I_B_QP_DIFF;
+ qp[SLICE_TYPE_P] = qp[SLICE_TYPE_B] - BRC_P_B_QP_DIFF;
+ break;
+ }
+ BRC_CLIP(mfc_context->brc.qp_prime_y[0][SLICE_TYPE_I], min_qp, 51);
+ BRC_CLIP(mfc_context->brc.qp_prime_y[0][SLICE_TYPE_P], min_qp, 51);
+ BRC_CLIP(mfc_context->brc.qp_prime_y[0][SLICE_TYPE_B], min_qp, 51);
+
+ if (sts == BRC_UNDERFLOW && qp[slice_type] == 51)
+ sts = BRC_UNDERFLOW_WITH_MAX_QP;
+ if (sts == BRC_OVERFLOW && qp[slice_type] == min_qp)
+ sts = BRC_OVERFLOW_WITH_MIN_QP;
+
+ return sts;
+}
+
+int intel_mfc_brc_postpack(struct encode_state *encode_state,
+ struct intel_encoder_context *encoder_context,
+ int frame_bits)
+{
+ switch (encoder_context->rate_control_mode) {
+ case VA_RC_CBR:
+ return intel_mfc_brc_postpack_cbr(encode_state, encoder_context, frame_bits);
+ case VA_RC_VBR:
+ return intel_mfc_brc_postpack_vbr(encode_state, encoder_context, frame_bits);
+ }
+ assert(0 && "Invalid RC mode");
+}
+
static void intel_mfc_hrd_context_init(struct encode_state *encode_state,
struct intel_encoder_context *encoder_context)
{
@@ -427,7 +545,7 @@ void intel_mfc_brc_prepare(struct encode_state *encode_state,
encoder_context->codec != CODEC_H264_MVC)
return;

- if (rate_control_mode == VA_RC_CBR) {
+ if (rate_control_mode != VA_RC_CQP) {
/*Programing bit rate control */
if (encoder_context->brc.need_reset) {
intel_mfc_bit_rate_control_context_init(encode_state, encoder_context);
diff --git a/src/gen75_mfc.c b/src/gen75_mfc.c
index 0fbbe76..7b76b99 100644
--- a/src/gen75_mfc.c
+++ b/src/gen75_mfc.c
@@ -1174,7 +1174,7 @@ gen75_mfc_avc_pipeline_slice_programing(VADriverContextP ctx,
int qp_mb;

qp_slice = qp;
- if (rate_control_mode == VA_RC_CBR) {
+ if (rate_control_mode != VA_RC_CQP) {
qp = mfc_context->brc.qp_prime_y[encoder_context->layer.curr_frame_layer_id][slice_type];
if (encode_state->slice_header_index[slice_index] == 0) {
pSliceParameter->slice_qp_delta = qp - pPicParameter->pic_init_qp;
@@ -1192,7 +1192,7 @@ gen75_mfc_avc_pipeline_slice_programing(VADriverContextP ctx,
pPicParameter,
pSliceParameter,
encode_state, encoder_context,
- (rate_control_mode == VA_RC_CBR), qp_slice, slice_batch);
+ (rate_control_mode != VA_RC_CQP), qp_slice, slice_batch);

if ( slice_index == 0)
intel_mfc_avc_pipeline_header_programing(ctx, encode_state, encoder_context, slice_batch);
@@ -1521,7 +1521,7 @@ gen75_mfc_avc_batchbuffer_slice(VADriverContextP ctx,
int qp_slice;

qp_slice = qp;
- if (rate_control_mode == VA_RC_CBR) {
+ if (rate_control_mode != VA_RC_CQP) {
qp = mfc_context->brc.qp_prime_y[encoder_context->layer.curr_frame_layer_id][slice_type];
if (encode_state->slice_header_index[slice_index] == 0) {
pSliceParameter->slice_qp_delta = qp - pPicParameter->pic_init_qp;
@@ -1540,7 +1540,7 @@ gen75_mfc_avc_batchbuffer_slice(VADriverContextP ctx,
pSliceParameter,
encode_state,
encoder_context,
- (rate_control_mode == VA_RC_CBR),
+ (rate_control_mode != VA_RC_CQP),
qp_slice,
slice_batch);

@@ -1702,7 +1702,7 @@ gen75_mfc_avc_encode_picture(VADriverContextP ctx,
/*Programing bcs pipeline*/
gen75_mfc_avc_pipeline_programing(ctx, encode_state, encoder_context); //filling the pipeline
gen75_mfc_run(ctx, encode_state, encoder_context);
- if (rate_control_mode == VA_RC_CBR /*|| rate_control_mode == VA_RC_VBR*/) {
+ if (rate_control_mode == VA_RC_CBR || rate_control_mode == VA_RC_VBR) {
gen75_mfc_stop(ctx, encode_state, encoder_context, &current_frame_bits_size);
sts = intel_mfc_brc_postpack(encode_state, encoder_context, current_frame_bits_size);
if (sts == BRC_NO_HRD_VIOLATION) {
diff --git a/src/gen8_mfc.c b/src/gen8_mfc.c
index 90119d7..8e68c7c 100644
--- a/src/gen8_mfc.c
+++ b/src/gen8_mfc.c
@@ -1177,7 +1177,7 @@ gen8_mfc_avc_pipeline_slice_programing(VADriverContextP ctx,
int qp_mb;

qp_slice = qp;
- if (rate_control_mode == VA_RC_CBR) {
+ if (rate_control_mode != VA_RC_CQP) {
qp = mfc_context->brc.qp_prime_y[encoder_context->layer.curr_frame_layer_id][slice_type];
if (encode_state->slice_header_index[slice_index] == 0) {
pSliceParameter->slice_qp_delta = qp - pPicParameter->pic_init_qp;
@@ -1195,7 +1195,7 @@ gen8_mfc_avc_pipeline_slice_programing(VADriverContextP ctx,
pPicParameter,
pSliceParameter,
encode_state, encoder_context,
- (rate_control_mode == VA_RC_CBR), qp_slice, slice_batch);
+ (rate_control_mode != VA_RC_CQP), qp_slice, slice_batch);

if ( slice_index == 0)
intel_mfc_avc_pipeline_header_programing(ctx, encode_state, encoder_context, slice_batch);
@@ -1534,7 +1534,7 @@ gen8_mfc_avc_batchbuffer_slice(VADriverContextP ctx,
int qp_slice;

qp_slice = qp;
- if (rate_control_mode == VA_RC_CBR) {
+ if (rate_control_mode != VA_RC_CQP) {
qp = mfc_context->brc.qp_prime_y[encoder_context->layer.curr_frame_layer_id][slice_type];
if (encode_state->slice_header_index[slice_index] == 0) {
pSliceParameter->slice_qp_delta = qp - pPicParameter->pic_init_qp;
@@ -1553,7 +1553,7 @@ gen8_mfc_avc_batchbuffer_slice(VADriverContextP ctx,
pSliceParameter,
encode_state,
encoder_context,
- (rate_control_mode == VA_RC_CBR),
+ (rate_control_mode != VA_RC_CQP),
qp_slice,
slice_batch);

@@ -1729,7 +1729,7 @@ gen8_mfc_avc_encode_picture(VADriverContextP ctx,
/*Programing bcs pipeline*/
gen8_mfc_avc_pipeline_programing(ctx, encode_state, encoder_context); //filling the pipeline
gen8_mfc_run(ctx, encode_state, encoder_context);
- if (rate_control_mode == VA_RC_CBR /*|| rate_control_mode == VA_RC_VBR*/) {
+ if (rate_control_mode == VA_RC_CBR || rate_control_mode == VA_RC_VBR) {
gen8_mfc_stop(ctx, encode_state, encoder_context, &current_frame_bits_size);
sts = intel_mfc_brc_postpack(encode_state, encoder_context, current_frame_bits_size);
if (sts == BRC_NO_HRD_VIOLATION) {
diff --git a/src/i965_drv_video.c b/src/i965_drv_video.c
index 76cb915..cc37190 100644
--- a/src/i965_drv_video.c
+++ b/src/i965_drv_video.c
@@ -936,7 +936,10 @@ i965_GetConfigAttributes(VADriverContextP ctx,
profile != VAProfileMPEG2Simple)
attrib_list[i].value |= VA_RC_CBR;

- if (profile == VAProfileVP9Profile0)
+ if (profile == VAProfileVP9Profile0 ||
+ profile == VAProfileH264ConstrainedBaseline ||
+ profile == VAProfileH264Main ||
+ profile == VAProfileH264High)
attrib_list[i].value |= VA_RC_VBR;

break;
--
2.11.0
Xiang, Haihao
2017-01-10 01:03:26 UTC
Permalink
Thanks for the new patch, applied.
Post by Mark Thompson
This implements a simple reactive VBR rate control mode for single-layer H.264.
The primary aim here is to avoid the problematic behaviour that the CBR rate
controller displays on scene changes, where the QP can get pushed up by a large
amount in a short period and compromise the quality of following frames to a
very visible degree.
The main idea, then, is to try to keep the HRD buffering above the target level
most of the time, so that when a large frame is generated (on a scene change or
when the stream complexity increases) we have plenty of slack to be able to
encode the more difficult region without compromising quality immediately on
the following frames.   It is optimistic about the complexity of future frames,
so even after generating one or more large frames on a significant change it
will try to keep the QP at its current level until the HRD buffer bounds force
a change to maintain the intended rate.
Compared to the CBR rate controller, it keeps the quality level much more
stable - QP does not always spike up as large frames are generated when the
complexity of the stream increases transiently, but equally it does not reduce
as quickly when the complexity of the stream decreases.
---
Post by Mark Thompson
+    BRC_CLIP(mfc_context->brc.qp_prime_y[0][SLICE_TYPE_I],
(int)encoder_context->brc.min_qp, 51);
+    BRC_CLIP(mfc_context->brc.qp_prime_y[0][SLICE_TYPE_P],
(int)encoder_context->brc.min_qp, 51);
+    BRC_CLIP(mfc_context->brc.qp_prime_y[0][SLICE_TYPE_B],
(int)encoder_context->brc.min_qp, 51);
The lower bound is 1 when encoder_context->brc.min_qp is equal to 0.
Post by Mark Thompson
+
+    if (sts == BRC_UNDERFLOW && qp[slice_type] == 51)
+        sts = BRC_UNDERFLOW_WITH_MAX_QP;
+    if (sts == BRC_OVERFLOW && qp[slice_type] == encoder_context-
Post by Mark Thompson
brc.min_qp)
Same as above
Apologies, I missed updating it to match
33a32935ac9e2622adc5c59045d565b4e5904749.
Fixed in the same way as that patch in the version.
Thanks,
- Mark
 src/gen6_mfc.c        |  10 ++--
 src/gen6_mfc_common.c | 126 ++++++++++++++++++++++++++++++++++++++++++++++++-
-
 src/gen75_mfc.c       |  10 ++--
 src/gen8_mfc.c        |  10 ++--
 src/i965_drv_video.c  |   5 +-
 5 files changed, 141 insertions(+), 20 deletions(-)
diff --git a/src/gen6_mfc.c b/src/gen6_mfc.c
index 8077c14..1765530 100644
--- a/src/gen6_mfc.c
+++ b/src/gen6_mfc.c
@@ -798,7 +798,7 @@ gen6_mfc_avc_pipeline_slice_programing(VADriverContextP ctx,
     int qp_mb;
 
     qp_slice = qp;
-    if (rate_control_mode == VA_RC_CBR) {
+    if (rate_control_mode != VA_RC_CQP) {
         qp = mfc_context->brc.qp_prime_y[encoder_context-
layer.curr_frame_layer_id][slice_type];
         if (encode_state->slice_header_index[slice_index] == 0) {
             pSliceParameter->slice_qp_delta = qp - pPicParameter-
pic_init_qp;
@@ -816,7 +816,7 @@ gen6_mfc_avc_pipeline_slice_programing(VADriverContextP ctx,
                              pPicParameter,
                              pSliceParameter,
                              encode_state, encoder_context,
-                             (rate_control_mode == VA_RC_CBR), qp_slice,
slice_batch);
+                             (rate_control_mode != VA_RC_CQP), qp_slice,
slice_batch);
 
     if ( slice_index == 0)
         intel_mfc_avc_pipeline_header_programing(ctx, encode_state,
encoder_context, slice_batch);
@@ -1188,7 +1188,7 @@ gen6_mfc_avc_batchbuffer_slice(VADriverContextP ctx,
     int qp_slice;
 
     qp_slice = qp;
-    if (rate_control_mode == VA_RC_CBR) {
+    if (rate_control_mode != VA_RC_CQP) {
         qp = mfc_context->brc.qp_prime_y[encoder_context-
layer.curr_frame_layer_id][slice_type];
         if (encode_state->slice_header_index[slice_index] == 0) {
             pSliceParameter->slice_qp_delta = qp - pPicParameter-
pic_init_qp;
@@ -1209,7 +1209,7 @@ gen6_mfc_avc_batchbuffer_slice(VADriverContextP ctx,
                              pSliceParameter,
                              encode_state,
                              encoder_context,
-                             (rate_control_mode == VA_RC_CBR),
+                             (rate_control_mode != VA_RC_CQP),
                              qp_slice,
                              slice_batch);
 
@@ -1368,7 +1368,7 @@ gen6_mfc_avc_encode_picture(VADriverContextP ctx,
         /*Programing bcs pipeline*/
         gen6_mfc_avc_pipeline_programing(ctx, encode_state, encoder_context);
//filling the pipeline
         gen6_mfc_run(ctx, encode_state, encoder_context);
-        if (rate_control_mode == VA_RC_CBR /*|| rate_control_mode ==
VA_RC_VBR*/) {
+        if (rate_control_mode == VA_RC_CBR || rate_control_mode == VA_RC_VBR) {
             gen6_mfc_stop(ctx, encode_state, encoder_context,
&current_frame_bits_size);
             sts = intel_mfc_brc_postpack(encode_state, encoder_context,
current_frame_bits_size);
             if (sts == BRC_NO_HRD_VIOLATION) {
diff --git a/src/gen6_mfc_common.c b/src/gen6_mfc_common.c
index fbedc94..0d21a11 100644
--- a/src/gen6_mfc_common.c
+++ b/src/gen6_mfc_common.c
@@ -127,6 +127,9 @@ static void intel_mfc_brc_init(struct encode_state *encode_state,
                 ((double)encoder_context->brc.framerate[i - 1].num /
(double)encoder_context->brc.framerate[i - 1].den);
         }
 
+        if (mfc_context->brc.mode == VA_RC_VBR && encoder_context-
brc.target_percentage[i])
+            bitrate = bitrate * encoder_context->brc.target_percentage[i] /
100;
+
         if (i == encoder_context->layer.num_layers - 1)
             factor = 1.0;
         else {
@@ -219,9 +222,9 @@ int intel_mfc_update_hrd(struct encode_state *encode_state,
     return BRC_NO_HRD_VIOLATION;
 }
 
-int intel_mfc_brc_postpack(struct encode_state *encode_state,
-                           struct intel_encoder_context *encoder_context,
-                           int frame_bits)
+static int intel_mfc_brc_postpack_cbr(struct encode_state *encode_state,
+                                      struct intel_encoder_context
*encoder_context,
+                                      int frame_bits)
 {
     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
     gen6_brc_status sts = BRC_NO_HRD_VIOLATION;
@@ -368,6 +371,121 @@ int intel_mfc_brc_postpack(struct encode_state *encode_state,
     return sts;
 }
 
+static int intel_mfc_brc_postpack_vbr(struct encode_state *encode_state,
+                                      struct intel_encoder_context
*encoder_context,
+                                      int frame_bits)
+{
+    struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
+    gen6_brc_status sts;
+    VAEncSliceParameterBufferH264 *pSliceParameter =
(VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer;
+    int slice_type = intel_avc_enc_slice_type_fixup(pSliceParameter-
slice_type);
+    int *qp = mfc_context->brc.qp_prime_y[0];
+    int min_qp = MAX(1, encoder_context->brc.min_qp);
+    int qp_delta, large_frame_adjustment;
+
+    // This implements a simple reactive VBR rate control mode for single-
layer H.264.  The primary
+    // aim here is to avoid the problematic behaviour that the CBR rate
controller displays on
+    // scene changes, where the QP can get pushed up by a large amount in a
short period and
+    // compromise the quality of following frames to a very visible degree.
+    // The main idea, then, is to try to keep the HRD buffering above the
target level most of the
+    // time, so that when a large frame is generated (on a scene change or
when the stream
+    // complexity increases) we have plenty of slack to be able to encode the
more difficult region
+    // without compromising quality immediately on the following frames.   It
is optimistic about
+    // the complexity of future frames, so even after generating one or more
large frames on a
+    // significant change it will try to keep the QP at its current level
until the HRD buffer
+    // bounds force a change to maintain the intended rate.
+
+    sts = intel_mfc_update_hrd(encode_state, encoder_context, frame_bits);
+
+    // This adjustment is applied to increase the QP by more than we normally
would if a very
+    // large frame is encountered and we are in danger of running out of slack.
+    large_frame_adjustment = rint(2.0 * log(frame_bits / mfc_context-
brc.target_frame_size[0][slice_type]));
+
+    if (sts == BRC_UNDERFLOW) {
+        // The frame is far too big and we don't have the bits available to
send it, so it will
+        // have to be re-encoded at a higher QP.
+        qp_delta = +2;
+        if (frame_bits > mfc_context->brc.target_frame_size[0][slice_type])
+            qp_delta += large_frame_adjustment;
+    } else if (sts == BRC_OVERFLOW) {
+        // The frame is very small and we are now overflowing the HRD
buffer.  Currently this case
+        // does not occur because we ignore overflow in VBR mode.
+        assert(0 && "Overflow in VBR mode");
+    } else if (frame_bits <= mfc_context-
brc.target_frame_size[0][slice_type]) {
+        // The frame is smaller than the average size expected for this frame
type.
+        if (mfc_context->hrd.current_buffer_fullness[0] >
+            (mfc_context->hrd.target_buffer_fullness[0] + mfc_context-
hrd.buffer_size[0]) / 2.0) {
+            // We currently have lots of bits available, so decrease the QP
slightly for the next
+            // frame.
+            qp_delta = -1;
+        } else {
+            // The HRD buffer fullness is increasing, so do nothing.  (We may
be under the target
+            // level here, but are moving in the right direction.)
+            qp_delta = 0;
+        }
+    } else {
+        // The frame is larger than the average size expected for this frame type.
+        if (mfc_context->hrd.current_buffer_fullness[0] > mfc_context-
hrd.target_buffer_fullness[0]) {
+            // We are currently over the target level, so do nothing.
+            qp_delta = 0;
+        } else if (mfc_context->hrd.current_buffer_fullness[0] > mfc_context-
hrd.target_buffer_fullness[0] / 2.0) {
+            // We are under the target level, but not critically.  Increase
the QP by one step if
+            // continuing like this would underflow soon (currently within
one second).
+            if (mfc_context->hrd.current_buffer_fullness[0] /
+                (double)(frame_bits - mfc_context-
brc.target_frame_size[0][slice_type] + 1) <
+                ((double)encoder_context->brc.framerate[0].num /
(double)encoder_context->brc.framerate[0].den))
+                qp_delta = +1;
+            else
+                qp_delta = 0;
+        } else {
+            // We are a long way under the target level.  Always increase the
QP, possibly by a
+            // larger amount dependent on how big the frame we just made
actually was.
+            qp_delta = +1 + large_frame_adjustment;
+        }
+    }
+
+    switch (slice_type) {
+        qp[SLICE_TYPE_I] += qp_delta;
+        qp[SLICE_TYPE_P]  = qp[SLICE_TYPE_I] + BRC_I_P_QP_DIFF;
+        qp[SLICE_TYPE_B]  = qp[SLICE_TYPE_I] + BRC_I_B_QP_DIFF;
+        break;
+        qp[SLICE_TYPE_P] += qp_delta;
+        qp[SLICE_TYPE_I]  = qp[SLICE_TYPE_P] - BRC_I_P_QP_DIFF;
+        qp[SLICE_TYPE_B]  = qp[SLICE_TYPE_P] + BRC_P_B_QP_DIFF;
+        break;
+        qp[SLICE_TYPE_B] += qp_delta;
+        qp[SLICE_TYPE_I]  = qp[SLICE_TYPE_B] - BRC_I_B_QP_DIFF;
+        qp[SLICE_TYPE_P]  = qp[SLICE_TYPE_B] - BRC_P_B_QP_DIFF;
+        break;
+    }
+    BRC_CLIP(mfc_context->brc.qp_prime_y[0][SLICE_TYPE_I], min_qp, 51);
+    BRC_CLIP(mfc_context->brc.qp_prime_y[0][SLICE_TYPE_P], min_qp, 51);
+    BRC_CLIP(mfc_context->brc.qp_prime_y[0][SLICE_TYPE_B], min_qp, 51);
+
+    if (sts == BRC_UNDERFLOW && qp[slice_type] == 51)
+        sts = BRC_UNDERFLOW_WITH_MAX_QP;
+    if (sts == BRC_OVERFLOW && qp[slice_type] == min_qp)
+        sts = BRC_OVERFLOW_WITH_MIN_QP;
+
+    return sts;
+}
+
+int intel_mfc_brc_postpack(struct encode_state *encode_state,
+                           struct intel_encoder_context *encoder_context,
+                           int frame_bits)
+{
+    switch (encoder_context->rate_control_mode) {
+        return intel_mfc_brc_postpack_cbr(encode_state, encoder_context,
frame_bits);
+        return intel_mfc_brc_postpack_vbr(encode_state, encoder_context,
frame_bits);
+    }
+    assert(0 && "Invalid RC mode");
+}
+
 static void intel_mfc_hrd_context_init(struct encode_state *encode_state,
                                        struct intel_encoder_context
*encoder_context)
 {
@@ -427,7 +545,7 @@ void intel_mfc_brc_prepare(struct encode_state *encode_state,
         encoder_context->codec != CODEC_H264_MVC)
         return;
 
-    if (rate_control_mode == VA_RC_CBR) {
+    if (rate_control_mode != VA_RC_CQP) {
         /*Programing bit rate control */
         if (encoder_context->brc.need_reset) {
             intel_mfc_bit_rate_control_context_init(encode_state,
encoder_context);
diff --git a/src/gen75_mfc.c b/src/gen75_mfc.c
index 0fbbe76..7b76b99 100644
--- a/src/gen75_mfc.c
+++ b/src/gen75_mfc.c
@@ -1174,7 +1174,7 @@ gen75_mfc_avc_pipeline_slice_programing(VADriverContextP ctx,
     int qp_mb;
 
     qp_slice = qp;
-    if (rate_control_mode == VA_RC_CBR) {
+    if (rate_control_mode != VA_RC_CQP) {
         qp = mfc_context->brc.qp_prime_y[encoder_context-
layer.curr_frame_layer_id][slice_type];
         if (encode_state->slice_header_index[slice_index] == 0) {
             pSliceParameter->slice_qp_delta = qp - pPicParameter-
pic_init_qp;
@@ -1192,7 +1192,7 @@ gen75_mfc_avc_pipeline_slice_programing(VADriverContextP ctx,
                               pPicParameter,
                               pSliceParameter,
                               encode_state, encoder_context,
-                              (rate_control_mode == VA_RC_CBR), qp_slice,
slice_batch);
+                              (rate_control_mode != VA_RC_CQP), qp_slice,
slice_batch);
 
     if ( slice_index == 0)
         intel_mfc_avc_pipeline_header_programing(ctx, encode_state,
encoder_context, slice_batch);
@@ -1521,7 +1521,7 @@ gen75_mfc_avc_batchbuffer_slice(VADriverContextP ctx,
     int qp_slice;
 
     qp_slice = qp;
-    if (rate_control_mode == VA_RC_CBR) {
+    if (rate_control_mode != VA_RC_CQP) {
         qp = mfc_context->brc.qp_prime_y[encoder_context-
layer.curr_frame_layer_id][slice_type];
         if (encode_state->slice_header_index[slice_index] == 0) {
             pSliceParameter->slice_qp_delta = qp - pPicParameter-
pic_init_qp;
@@ -1540,7 +1540,7 @@ gen75_mfc_avc_batchbuffer_slice(VADriverContextP ctx,
                               pSliceParameter,
                               encode_state,
                               encoder_context,
-                              (rate_control_mode == VA_RC_CBR),
+                              (rate_control_mode != VA_RC_CQP),
                               qp_slice,
                               slice_batch);
 
@@ -1702,7 +1702,7 @@ gen75_mfc_avc_encode_picture(VADriverContextP ctx,
         /*Programing bcs pipeline*/
         gen75_mfc_avc_pipeline_programing(ctx, encode_state,
encoder_context); //filling the pipeline
         gen75_mfc_run(ctx, encode_state, encoder_context);
-        if (rate_control_mode == VA_RC_CBR /*|| rate_control_mode ==
VA_RC_VBR*/) {
+        if (rate_control_mode == VA_RC_CBR || rate_control_mode == VA_RC_VBR) {
             gen75_mfc_stop(ctx, encode_state, encoder_context,
&current_frame_bits_size);
             sts = intel_mfc_brc_postpack(encode_state, encoder_context,
current_frame_bits_size);
             if (sts == BRC_NO_HRD_VIOLATION) {
diff --git a/src/gen8_mfc.c b/src/gen8_mfc.c
index 90119d7..8e68c7c 100644
--- a/src/gen8_mfc.c
+++ b/src/gen8_mfc.c
@@ -1177,7 +1177,7 @@ gen8_mfc_avc_pipeline_slice_programing(VADriverContextP ctx,
     int qp_mb;
 
     qp_slice = qp;
-    if (rate_control_mode == VA_RC_CBR) {
+    if (rate_control_mode != VA_RC_CQP) {
         qp = mfc_context->brc.qp_prime_y[encoder_context-
layer.curr_frame_layer_id][slice_type];
         if (encode_state->slice_header_index[slice_index] == 0) {
             pSliceParameter->slice_qp_delta = qp - pPicParameter-
pic_init_qp;
@@ -1195,7 +1195,7 @@ gen8_mfc_avc_pipeline_slice_programing(VADriverContextP ctx,
                              pPicParameter,
                              pSliceParameter,
                              encode_state, encoder_context,
-                             (rate_control_mode == VA_RC_CBR), qp_slice,
slice_batch);
+                             (rate_control_mode != VA_RC_CQP), qp_slice,
slice_batch);
 
     if ( slice_index == 0)
         intel_mfc_avc_pipeline_header_programing(ctx, encode_state,
encoder_context, slice_batch);
@@ -1534,7 +1534,7 @@ gen8_mfc_avc_batchbuffer_slice(VADriverContextP ctx,
     int qp_slice;
 
     qp_slice = qp;
-    if (rate_control_mode == VA_RC_CBR) {
+    if (rate_control_mode != VA_RC_CQP) {
         qp = mfc_context->brc.qp_prime_y[encoder_context-
layer.curr_frame_layer_id][slice_type];
         if (encode_state->slice_header_index[slice_index] == 0) {
             pSliceParameter->slice_qp_delta = qp - pPicParameter-
pic_init_qp;
@@ -1553,7 +1553,7 @@ gen8_mfc_avc_batchbuffer_slice(VADriverContextP ctx,
                               pSliceParameter,
                               encode_state,
                               encoder_context,
-                              (rate_control_mode == VA_RC_CBR),
+                              (rate_control_mode != VA_RC_CQP),
                               qp_slice,
                               slice_batch);
 
@@ -1729,7 +1729,7 @@ gen8_mfc_avc_encode_picture(VADriverContextP ctx,
         /*Programing bcs pipeline*/
         gen8_mfc_avc_pipeline_programing(ctx, encode_state, encoder_context);
//filling the pipeline
         gen8_mfc_run(ctx, encode_state, encoder_context);
-        if (rate_control_mode == VA_RC_CBR /*|| rate_control_mode ==
VA_RC_VBR*/) {
+        if (rate_control_mode == VA_RC_CBR || rate_control_mode == VA_RC_VBR) {
             gen8_mfc_stop(ctx, encode_state, encoder_context,
&current_frame_bits_size);
             sts = intel_mfc_brc_postpack(encode_state, encoder_context,
current_frame_bits_size);
             if (sts == BRC_NO_HRD_VIOLATION) {
diff --git a/src/i965_drv_video.c b/src/i965_drv_video.c
index 76cb915..cc37190 100644
--- a/src/i965_drv_video.c
+++ b/src/i965_drv_video.c
@@ -936,7 +936,10 @@ i965_GetConfigAttributes(VADriverContextP ctx,
                     profile != VAProfileMPEG2Simple)
                     attrib_list[i].value |= VA_RC_CBR;
 
-                if (profile == VAProfileVP9Profile0)
+                if (profile == VAProfileVP9Profile0 ||
+                    profile == VAProfileH264ConstrainedBaseline ||
+                    profile == VAProfileH264Main ||
+                    profile == VAProfileH264High)
                     attrib_list[i].value |= VA_RC_VBR;
 
                 break;
Loading...