Discussion:
[libav-devel] [PATCH 1/2] get_bits: make buffer_ptr const.
Ronald S. Bultje
2011-12-15 18:09:01 UTC
Permalink
From: "Ronald S. Bultje" <***@gmail.com>

---
libavcodec/get_bits.h | 14 +++++++-------
1 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/libavcodec/get_bits.h b/libavcodec/get_bits.h
index 7980284..4057cd2 100644
--- a/libavcodec/get_bits.h
+++ b/libavcodec/get_bits.h
@@ -55,7 +55,7 @@ typedef struct GetBitContext {
#ifdef ALT_BITSTREAM_READER
int index;
#elif defined A32_BITSTREAM_READER
- uint32_t *buffer_ptr;
+ const uint32_t *buffer_ptr;
uint32_t cache0;
uint32_t cache1;
int bit_count;
@@ -178,11 +178,11 @@ static inline void skip_bits_long(GetBitContext *s, int n){

# define MIN_CACHE_BITS 32

-# define OPEN_READER(name, gb) \
- int name##_bit_count = (gb)->bit_count; \
- uint32_t name##_cache0 = (gb)->cache0; \
- uint32_t name##_cache1 = (gb)->cache1; \
- uint32_t *name##_buffer_ptr = (gb)->buffer_ptr
+# define OPEN_READER(name, gb) \
+ int name##_bit_count = (gb)->bit_count; \
+ uint32_t name##_cache0 = (gb)->cache0; \
+ uint32_t name##_cache1 = (gb)->cache1; \
+ const uint32_t *name##_buffer_ptr = (gb)->buffer_ptr

# define CLOSE_READER(name, gb) do { \
(gb)->bit_count = name##_bit_count; \
@@ -224,7 +224,7 @@ static inline void skip_bits_long(GetBitContext *s, int n){
# define GET_CACHE(name, gb) name##_cache0

static inline int get_bits_count(const GetBitContext *s) {
- return ((uint8_t*)s->buffer_ptr - s->buffer)*8 - 32 + s->bit_count;
+ return ((const uint8_t*)s->buffer_ptr - s->buffer)*8 - 32 + s->bit_count;
}

static inline void skip_bits_long(GetBitContext *s, int n){
--
1.7.6
Ronald S. Bultje
2011-12-15 18:09:02 UTC
Permalink
From: "Ronald S. Bultje" <***@gmail.com>

Based on work (for GCI) by Aneesh Dogra <***@gmail.com>, and
inspired by patch in Chromium by Chris Evans <***@chromium.org>.

When turned on, H264/CAVLC gets ~15% (CVPCMNL1_SVA_C.264) slower for
ultra-high-bitrate files, or ~2.5% (CVFI1_SVA_C.264) for lower-bitrate
files. Other codecs are affected to a lesser extent because they are
less optimized; e.g., VC-1 slows down by less than 1% (all on x86).
The patch generated 3 extra instructions (cmp, cmovae and mov) per
call to get_bits().
---
configure | 2 ++
libavcodec/get_bits.h | 43 ++++++++++++++++++++++++++++++++++++++++---
libavcodec/wmavoice.c | 2 ++
3 files changed, 44 insertions(+), 3 deletions(-)

diff --git a/configure b/configure
index b3af2e9..056f409 100755
--- a/configure
+++ b/configure
@@ -113,6 +113,7 @@ Configuration options:
--disable-dxva2 disable DXVA2 code
--enable-runtime-cpudetect detect cpu capabilities at runtime (bigger binary)
--enable-hardcoded-tables use hardcoded tables instead of runtime generation
+ --disable-safe-bitstream-reader disable buffer boundary checking in bitreaders (faster, but may crash)
--enable-memalign-hack emulate memalign, interferes with memory debuggers
--disable-everything disable all components listed below
--disable-encoder=NAME disable encoder NAME
@@ -976,6 +977,7 @@ CONFIG_LIST="
rdft
rtpdec
runtime_cpudetect
+ safe_bitstream_reader
shared
sinewin
small
diff --git a/libavcodec/get_bits.h b/libavcodec/get_bits.h
index 4057cd2..6013e11 100644
--- a/libavcodec/get_bits.h
+++ b/libavcodec/get_bits.h
@@ -48,6 +48,23 @@
# endif
#endif

+/*
+ * Safe bitstream reading:
+ * optionally, the get_bits API can check to ensure that we
+ * don't read past input buffer boundaries. This is protected
+ * with CONFIG_SAFE_BITSTREAM_READER at the global level, and
+ * then below that with UNCHECKED_BITSTREAM_READER at the per-
+ * decoder level. This means that decoders that check internally
+ * can "#define UNCHECKED_BITSTREAM_READER 1" to disable
+ * overread checks.
+ * Boundary checking causes a minor performance penalty so for
+ * applications that won't want/need this, it can be disabled
+ * globally using "#define CONFIG_SAFE_BITSTREAM_READER 0".
+ */
+#ifndef UNCHECKED_BITSTREAM_READER
+#define UNCHECKED_BITSTREAM_READER !CONFIG_SAFE_BITSTREAM_READER
+#endif
+
/* bit input */
/* buffer, buffer_end and size_in_bits must be present and used by every reader */
typedef struct GetBitContext {
@@ -60,7 +77,7 @@ typedef struct GetBitContext {
uint32_t cache1;
int bit_count;
#endif
- int size_in_bits;
+ int size_in_bits, size_in_bits_plus1;
} GetBitContext;

#define VLC_TYPE int16_t
@@ -144,7 +161,13 @@ for examples see get_bits, show_bits, skip_bits, get_vlc
# endif

// FIXME name?
+#if UNCHECKED_BITSTREAM_READER
# define SKIP_COUNTER(name, gb, num) name##_index += (num)
+#else
+# define SKIP_COUNTER(name, gb, num) name##_index = \
+ (gb)->size_in_bits_plus1 - name##_index < (num) ? \
+ (gb)->size_in_bits_plus1 : name##_index + (num)
+#endif

# define SKIP_BITS(name, gb, num) do { \
SKIP_CACHE(name, gb, num); \
@@ -171,7 +194,12 @@ static inline int get_bits_count(const GetBitContext *s){
}

static inline void skip_bits_long(GetBitContext *s, int n){
+#if UNCHECKED_BITSTREAM_READER
s->index += n;
+#else
+ s->index = s->size_in_bits_plus1 - s->index < n ?
+ s->size_in_bits_plus1 : s->index + n;
+#endif
}

#elif defined A32_BITSTREAM_READER
@@ -196,7 +224,9 @@ static inline void skip_bits_long(GetBitContext *s, int n){
const uint32_t next = av_be2ne32(*name##_buffer_ptr); \
name##_cache0 |= NEG_USR32(next, name##_bit_count); \
name##_cache1 |= next << name##_bit_count; \
- name##_buffer_ptr++; \
+ if (UNCHECKED_BITSTREAM_READER || \
+ name##_buffer_ptr < (const uint32_t *)(gb)->buffer_end) \
+ name##_buffer_ptr++; \
name##_bit_count -= 32; \
} \
} while (0)
@@ -230,7 +260,12 @@ static inline int get_bits_count(const GetBitContext *s) {
static inline void skip_bits_long(GetBitContext *s, int n){
OPEN_READER(re, s);
re_bit_count += n;
+#if UNCHECKED_BITSTREAM_READER
re_buffer_ptr += re_bit_count>>5;
+#else
+ re_buffer_ptr = FFMIN(re_buffer_ptr + (re_bit_count >> 5),
+ (const uint32_t *) s->buffer_end);
+#endif
re_bit_count &= 31;
re_cache0 = av_be2ne32(re_buffer_ptr[-1]) << re_bit_count;
re_cache1 = 0;
@@ -311,7 +346,8 @@ static inline unsigned int get_bits1(GetBitContext *s){
result <<= index & 7;
result >>= 8 - 1;
#endif
- index++;
+ if (UNCHECKED_BITSTREAM_READER || s->index < s->size_in_bits_plus1)
+ index++;
s->index = index;

return result;
@@ -391,6 +427,7 @@ static inline void init_get_bits(GetBitContext *s,

s->buffer = buffer;
s->size_in_bits = bit_size;
+ s->size_in_bits_plus1 = bit_size + 1;
s->buffer_end = buffer + buffer_size;
#ifdef ALT_BITSTREAM_READER
s->index = 0;
diff --git a/libavcodec/wmavoice.c b/libavcodec/wmavoice.c
index 00e985d..8854e35 100644
--- a/libavcodec/wmavoice.c
+++ b/libavcodec/wmavoice.c
@@ -25,6 +25,8 @@
* @author Ronald S. Bultje <***@gmail.com>
*/

+#define UNCHECKED_BITSTREAM_READER 1
+
#include <math.h>
#include "avcodec.h"
#include "get_bits.h"
--
1.7.6
Ronald S. Bultje
2011-12-15 18:39:15 UTC
Permalink
From: "Ronald S. Bultje" <***@gmail.com>

Based on work (for GCI) by Aneesh Dogra <***@gmail.com>, and
inspired by patch in Chromium by Chris Evans <***@chromium.org>.

When turned on, H264/CAVLC gets ~15% (CVPCMNL1_SVA_C.264) slower for
ultra-high-bitrate files, or ~2.5% (CVFI1_SVA_C.264) for lower-bitrate
files. Other codecs are affected to a lesser extent because they are
less optimized; e.g., VC-1 slows down by less than 1% (all on x86).
The patch generated 3 extra instructions (cmp, cmovae and mov) per
call to get_bits().
---
configure | 2 ++
libavcodec/get_bits.h | 48 ++++++++++++++++++++++++++++++++++++++++++++++--
libavcodec/wmavoice.c | 2 ++
3 files changed, 50 insertions(+), 2 deletions(-)

diff --git a/configure b/configure
index b3af2e9..056f409 100755
--- a/configure
+++ b/configure
@@ -113,6 +113,7 @@ Configuration options:
--disable-dxva2 disable DXVA2 code
--enable-runtime-cpudetect detect cpu capabilities at runtime (bigger binary)
--enable-hardcoded-tables use hardcoded tables instead of runtime generation
+ --disable-safe-bitstream-reader disable buffer boundary checking in bitreaders (faster, but may crash)
--enable-memalign-hack emulate memalign, interferes with memory debuggers
--disable-everything disable all components listed below
--disable-encoder=NAME disable encoder NAME
@@ -976,6 +977,7 @@ CONFIG_LIST="
rdft
rtpdec
runtime_cpudetect
+ safe_bitstream_reader
shared
sinewin
small
diff --git a/libavcodec/get_bits.h b/libavcodec/get_bits.h
index 4057cd2..137742f 100644
--- a/libavcodec/get_bits.h
+++ b/libavcodec/get_bits.h
@@ -48,6 +48,23 @@
# endif
#endif

+/*
+ * Safe bitstream reading:
+ * optionally, the get_bits API can check to ensure that we
+ * don't read past input buffer boundaries. This is protected
+ * with CONFIG_SAFE_BITSTREAM_READER at the global level, and
+ * then below that with UNCHECKED_BITSTREAM_READER at the per-
+ * decoder level. This means that decoders that check internally
+ * can "#define UNCHECKED_BITSTREAM_READER 1" to disable
+ * overread checks.
+ * Boundary checking causes a minor performance penalty so for
+ * applications that won't want/need this, it can be disabled
+ * globally using "#define CONFIG_SAFE_BITSTREAM_READER 0".
+ */
+#ifndef UNCHECKED_BITSTREAM_READER
+#define UNCHECKED_BITSTREAM_READER !CONFIG_SAFE_BITSTREAM_READER
+#endif
+
/* bit input */
/* buffer, buffer_end and size_in_bits must be present and used by every reader */
typedef struct GetBitContext {
@@ -61,6 +78,9 @@ typedef struct GetBitContext {
int bit_count;
#endif
int size_in_bits;
+#if !UNCHECKED_BITSTREAM_READER
+ int size_in_bits_plus1;
+#endif
} GetBitContext;

#define VLC_TYPE int16_t
@@ -144,7 +164,13 @@ for examples see get_bits, show_bits, skip_bits, get_vlc
# endif

// FIXME name?
+#if UNCHECKED_BITSTREAM_READER
# define SKIP_COUNTER(name, gb, num) name##_index += (num)
+#else
+# define SKIP_COUNTER(name, gb, num) name##_index = \
+ (gb)->size_in_bits_plus1 - name##_index < (num) ? \
+ (gb)->size_in_bits_plus1 : name##_index + (num)
+#endif

# define SKIP_BITS(name, gb, num) do { \
SKIP_CACHE(name, gb, num); \
@@ -171,7 +197,12 @@ static inline int get_bits_count(const GetBitContext *s){
}

static inline void skip_bits_long(GetBitContext *s, int n){
+#if UNCHECKED_BITSTREAM_READER
s->index += n;
+#else
+ s->index = s->size_in_bits_plus1 - s->index < n ?
+ s->size_in_bits_plus1 : s->index + n;
+#endif
}

#elif defined A32_BITSTREAM_READER
@@ -196,7 +227,9 @@ static inline void skip_bits_long(GetBitContext *s, int n){
const uint32_t next = av_be2ne32(*name##_buffer_ptr); \
name##_cache0 |= NEG_USR32(next, name##_bit_count); \
name##_cache1 |= next << name##_bit_count; \
- name##_buffer_ptr++; \
+ if (UNCHECKED_BITSTREAM_READER || \
+ name##_buffer_ptr < (const uint32_t *)(gb)->buffer_end) \
+ name##_buffer_ptr++; \
name##_bit_count -= 32; \
} \
} while (0)
@@ -230,7 +263,12 @@ static inline int get_bits_count(const GetBitContext *s) {
static inline void skip_bits_long(GetBitContext *s, int n){
OPEN_READER(re, s);
re_bit_count += n;
+#if UNCHECKED_BITSTREAM_READER
re_buffer_ptr += re_bit_count>>5;
+#else
+ re_buffer_ptr = FFMIN(re_buffer_ptr + (re_bit_count >> 5),
+ (const uint32_t *) s->buffer_end);
+#endif
re_bit_count &= 31;
re_cache0 = av_be2ne32(re_buffer_ptr[-1]) << re_bit_count;
re_cache1 = 0;
@@ -311,7 +349,10 @@ static inline unsigned int get_bits1(GetBitContext *s){
result <<= index & 7;
result >>= 8 - 1;
#endif
- index++;
+#if !UNCHECKED_BITSTREAM_READER
+ if (s->index < s->size_in_bits_plus1)
+#endif
+ index++;
s->index = index;

return result;
@@ -391,6 +432,9 @@ static inline void init_get_bits(GetBitContext *s,

s->buffer = buffer;
s->size_in_bits = bit_size;
+#if !UNCHECKED_BITSTREAM_READER
+ s->size_in_bits_plus1 = bit_size + 1;
+#endif
s->buffer_end = buffer + buffer_size;
#ifdef ALT_BITSTREAM_READER
s->index = 0;
diff --git a/libavcodec/wmavoice.c b/libavcodec/wmavoice.c
index 00e985d..8854e35 100644
--- a/libavcodec/wmavoice.c
+++ b/libavcodec/wmavoice.c
@@ -25,6 +25,8 @@
* @author Ronald S. Bultje <***@gmail.com>
*/

+#define UNCHECKED_BITSTREAM_READER 1
+
#include <math.h>
#include "avcodec.h"
#include "get_bits.h"
--
1.7.6
Måns Rullgård
2011-12-15 19:26:20 UTC
Permalink
Post by Ronald S. Bultje
When turned on, H264/CAVLC gets ~15% (CVPCMNL1_SVA_C.264) slower for
ultra-high-bitrate files, or ~2.5% (CVFI1_SVA_C.264) for lower-bitrate
files. Other codecs are affected to a lesser extent because they are
less optimized; e.g., VC-1 slows down by less than 1% (all on x86).
The patch generated 3 extra instructions (cmp, cmovae and mov) per
call to get_bits().
---
configure | 2 ++
libavcodec/get_bits.h | 48 ++++++++++++++++++++++++++++++++++++++++++++++--
libavcodec/wmavoice.c | 2 ++
3 files changed, 50 insertions(+), 2 deletions(-)
[...]
Post by Ronald S. Bultje
// FIXME name?
+#if UNCHECKED_BITSTREAM_READER
# define SKIP_COUNTER(name, gb, num) name##_index += (num)
+#else
+# define SKIP_COUNTER(name, gb, num) name##_index = \
+ (gb)->size_in_bits_plus1 - name##_index < (num) ? \
+ (gb)->size_in_bits_plus1 : name##_index + (num)
+#endif
Why +1? Is this for the hypothetical decoders that require overreading?
Which are those, and why don't we fix them?
Post by Ronald S. Bultje
# define SKIP_BITS(name, gb, num) do { \
SKIP_CACHE(name, gb, num); \
@@ -171,7 +197,12 @@ static inline int get_bits_count(const GetBitContext *s){
}
static inline void skip_bits_long(GetBitContext *s, int n){
+#if UNCHECKED_BITSTREAM_READER
s->index += n;
+#else
+ s->index = s->size_in_bits_plus1 - s->index < n ?
+ s->size_in_bits_plus1 : s->index + n;
+#endif
}
#elif defined A32_BITSTREAM_READER
@@ -196,7 +227,9 @@ static inline void skip_bits_long(GetBitContext *s, int n){
const uint32_t next = av_be2ne32(*name##_buffer_ptr); \
name##_cache0 |= NEG_USR32(next, name##_bit_count); \
name##_cache1 |= next << name##_bit_count; \
- name##_buffer_ptr++; \
+ if (UNCHECKED_BITSTREAM_READER || \
+ name##_buffer_ptr < (const uint32_t *)(gb)->buffer_end) \
+ name##_buffer_ptr++; \
name##_bit_count -= 32; \
} \
} while (0)
@@ -230,7 +263,12 @@ static inline int get_bits_count(const GetBitContext *s) {
static inline void skip_bits_long(GetBitContext *s, int n){
OPEN_READER(re, s);
re_bit_count += n;
+#if UNCHECKED_BITSTREAM_READER
re_buffer_ptr += re_bit_count>>5;
+#else
+ re_buffer_ptr = FFMIN(re_buffer_ptr + (re_bit_count >> 5),
+ (const uint32_t *) s->buffer_end);
+#endif
This addition can overflow.
--
Måns Rullgård
***@mansr.com
Ronald S. Bultje
2011-12-15 19:33:19 UTC
Permalink
Hi,
Post by Ronald S. Bultje
Post by Ronald S. Bultje
When turned on, H264/CAVLC gets ~15% (CVPCMNL1_SVA_C.264) slower for
ultra-high-bitrate files, or ~2.5% (CVFI1_SVA_C.264) for lower-bitrate
files. Other codecs are affected to a lesser extent because they are
less optimized; e.g., VC-1 slows down by less than 1% (all on x86).
The patch generated 3 extra instructions (cmp, cmovae and mov) per
call to get_bits().
---
configure | 2 ++
libavcodec/get_bits.h | 48
++++++++++++++++++++++++++++++++++++++++++++++--
Post by Ronald S. Bultje
libavcodec/wmavoice.c | 2 ++
3 files changed, 50 insertions(+), 2 deletions(-)
[...]
Post by Ronald S. Bultje
// FIXME name?
+#if UNCHECKED_BITSTREAM_READER
# define SKIP_COUNTER(name, gb, num) name##_index += (num)
+#else
+# define SKIP_COUNTER(name, gb, num) name##_index = \
+ (gb)->size_in_bits_plus1 - name##_index < (num) ? \
+ (gb)->size_in_bits_plus1 : name##_index + (num)
+#endif
Why +1? Is this for the hypothetical decoders that require overreading?
Which are those, and why don't we fix them?
It's so get_bits_left() returns -1 on overread. I agree we should fix them
but there's so many of them that I lost track.
Post by Ronald S. Bultje
@@ -230,7 +263,12 @@ static inline int get_bits_count(const GetBitContext *s) {
Post by Ronald S. Bultje
static inline void skip_bits_long(GetBitContext *s, int n){
OPEN_READER(re, s);
re_bit_count += n;
+#if UNCHECKED_BITSTREAM_READER
re_buffer_ptr += re_bit_count>>5;
+#else
+ re_buffer_ptr = FFMIN(re_buffer_ptr + (re_bit_count >> 5),
+ (const uint32_t *) s->buffer_end);
+#endif
This addition can overflow.
Changed locally to:

re_buffer_ptr = (re_bit_count >> 5) < (const uint32_t *) s->buffer_end
- re_buffer_ptr ?
re_buffer_ptr + (re_bit_count >> 5) : (const uint32_t
*) s->buffer_end;

Ronald
Måns Rullgård
2011-12-15 19:38:28 UTC
Permalink
Post by Ronald S. Bultje
Post by Ronald S. Bultje
@@ -230,7 +263,12 @@ static inline int get_bits_count(const GetBitContext *s) {
Post by Ronald S. Bultje
static inline void skip_bits_long(GetBitContext *s, int n){
OPEN_READER(re, s);
re_bit_count += n;
+#if UNCHECKED_BITSTREAM_READER
re_buffer_ptr += re_bit_count>>5;
+#else
+ re_buffer_ptr = FFMIN(re_buffer_ptr + (re_bit_count >> 5),
+ (const uint32_t *) s->buffer_end);
+#endif
This addition can overflow.
re_buffer_ptr = (re_bit_count >> 5) < (const uint32_t *) s->buffer_end
- re_buffer_ptr ?
re_buffer_ptr + (re_bit_count >> 5) : (const uint32_t
*) s->buffer_end;
Since nothing else uses buffer_end, why not change its type and avoid
all the casting?
--
Måns Rullgård
***@mansr.com
Ronald S. Bultje
2011-12-15 19:40:36 UTC
Permalink
Hi,
Post by Ronald S. Bultje
Post by Ronald S. Bultje
Post by Ronald S. Bultje
@@ -230,7 +263,12 @@ static inline int get_bits_count(const
GetBitContext
Post by Ronald S. Bultje
Post by Ronald S. Bultje
*s) {
Post by Ronald S. Bultje
static inline void skip_bits_long(GetBitContext *s, int n){
OPEN_READER(re, s);
re_bit_count += n;
+#if UNCHECKED_BITSTREAM_READER
re_buffer_ptr += re_bit_count>>5;
+#else
+ re_buffer_ptr = FFMIN(re_buffer_ptr + (re_bit_count >> 5),
+ (const uint32_t *) s->buffer_end);
+#endif
This addition can overflow.
re_buffer_ptr = (re_bit_count >> 5) < (const uint32_t *)
s->buffer_end
Post by Ronald S. Bultje
- re_buffer_ptr ?
re_buffer_ptr + (re_bit_count >> 5) : (const uint32_t
*) s->buffer_end;
Since nothing else uses buffer_end, why not change its type and avoid
all the casting?
It's accessed directly in some of the old mpegvideo-alike codecs.

Ronald
Måns Rullgård
2011-12-15 19:48:37 UTC
Permalink
Post by Ronald S. Bultje
+# define SKIP_COUNTER(name, gb, num) name##_index = \
+ (gb)->size_in_bits_plus1 - name##_index < (num) ? \
+ (gb)->size_in_bits_plus1 : name##_index + (num)
BTW, does index += FFMIN(num, size_in_bits_plus1 - index) generate worse
code, or did you just not like the look of it?
--
Måns Rullgård
***@mansr.com
Ronald S. Bultje
2011-12-15 19:50:16 UTC
Permalink
Hi,
Post by Måns Rullgård
Post by Ronald S. Bultje
+# define SKIP_COUNTER(name, gb, num) name##_index = \
+ (gb)->size_in_bits_plus1 - name##_index < (num) ? \
+ (gb)->size_in_bits_plus1 : name##_index + (num)
BTW, does index += FFMIN(num, size_in_bits_plus1 - index) generate worse
code, or did you just not like the look of it?
Generates the same code on x86. Is it better on arm? I don't mind either
way.

Ronald
Måns Rullgård
2011-12-15 20:01:35 UTC
Permalink
Post by Ronald S. Bultje
Hi,
Post by Måns Rullgård
Post by Ronald S. Bultje
+# define SKIP_COUNTER(name, gb, num) name##_index = \
+ (gb)->size_in_bits_plus1 - name##_index < (num) ? \
+ (gb)->size_in_bits_plus1 : name##_index + (num)
BTW, does index += FFMIN(num, size_in_bits_plus1 - index) generate worse
code, or did you just not like the look of it?
Generates the same code on x86. Is it better on arm? I don't mind either
way.
I think it looks nicer.
--
Måns Rullgård
***@mansr.com
Ronald S. Bultje
2011-12-15 20:12:25 UTC
Permalink
Hi,
Post by Måns Rullgård
Post by Ronald S. Bultje
Hi,
Post by Måns Rullgård
Post by Ronald S. Bultje
+# define SKIP_COUNTER(name, gb, num) name##_index = \
+ (gb)->size_in_bits_plus1 - name##_index < (num) ? \
+ (gb)->size_in_bits_plus1 : name##_index + (num)
BTW, does index += FFMIN(num, size_in_bits_plus1 - index) generate worse
code, or did you just not like the look of it?
Generates the same code on x86. Is it better on arm? I don't mind either
way.
I think it looks nicer.
I take it back, old gcc-4.2 (Mac) barf on it...

FFMIN (17 instructions):
0x0000000100310220 <get_bits+0>: mov %edi,%eax
0x0000000100310222 <get_bits+2>: shr $0x3,%eax
0x0000000100310225 <get_bits+5>: mov %eax,%eax
0x0000000100310227 <get_bits+7>: mov (%r10,%rax,1),%esi
0x000000010031022b <av_bswap32+0>: bswap %esi
0x000000010031022d <get_bits+13>: mov %edi,%ecx
0x000000010031022f <get_bits+15>: and $0x7,%ecx
0x0000000100310232 <get_bits+18>: shl %cl,%esi
0x0000000100310234 <NEG_USR32+0>: shr $0xfe,%esi
0x0000000100310237 <get_bits+23>: mov %r8d,%eax
0x000000010031023a <get_bits+26>: sub %edi,%eax
0x000000010031023c <get_bits+28>: cmp $0x2,%eax
0x000000010031023f <get_bits+31>: mov $0x2,%edx
0x0000000100310244 <get_bits+36>: cmova %edx,%eax
0x0000000100310247 <get_bits+39>: lea (%rax,%rdi,1),%ecx
0x000000010031024a <get_bits+42>: mov %ecx,%edi
0x000000010031024c <get_bits+44>: mov %ecx,0x1768(%rbx)

current patch (15 instructions):
0x000000010030fefb <get_bits+0>: mov %edx,%eax
0x000000010030fefd <get_bits+2>: shr $0x3,%eax
0x000000010030ff00 <get_bits+5>: mov %eax,%eax
0x000000010030ff02 <get_bits+7>: mov (%r8,%rax,1),%r10d
0x000000010030ff06 <av_bswap32+0>: bswap %r10d
0x000000010030ff09 <get_bits+14>: mov %esi,%ecx
0x000000010030ff0b <get_bits+16>: shl %cl,%r10d
0x000000010030ff0e <NEG_USR32+0>: shr $0xfe,%r10d
0x000000010030ff12 <get_bits+23>: mov %edi,%esi
0x000000010030ff14 <get_bits+25>: mov %edi,%eax
0x000000010030ff16 <get_bits+27>: sub %edx,%eax
0x000000010030ff18 <get_bits+29>: add $0x2,%edx
0x000000010030ff1b <get_bits+32>: cmp $0x2,%eax
0x000000010030ff1e <get_bits+35>: cmovae %edx,%esi
0x000000010030ff21 <get_bits+38>: mov %esi,0x1768(%rbx)

unsafe bitstream reader (12 instructions - note lack of mov/cmovae/cmp):
0x00000001003022d0 <get_bits+0>: mov %esi,%edx
0x00000001003022d2 <get_bits+2>: mov %esi,%eax
0x00000001003022d4 <get_bits+4>: shr $0x3,%eax
0x00000001003022d7 <get_bits+7>: mov %eax,%eax
0x00000001003022d9 <get_bits+9>: mov (%rdi,%rax,1),%eax
0x00000001003022dc <av_bswap32+0>: bswap %eax
0x00000001003022de <get_bits+14>: mov %esi,%ecx
0x00000001003022e0 <get_bits+16>: and $0x7,%ecx
0x00000001003022e3 <get_bits+19>: shl %cl,%eax
0x00000001003022e5 <NEG_USR32+0>: shr $0xfe,%eax
0x00000001003022e8 <get_bits+24>: lea 0x2(%rdx),%esi
0x00000001003022eb <get_bits+27>: mov %esi,0x1768(%rbx)

So maybe I'll keep it. Not sure why it barfs, silly gcc...

Ronald
Ronald S. Bultje
2011-12-15 20:30:38 UTC
Permalink
From: "Ronald S. Bultje" <***@gmail.com>

Based on work (for GCI) by Aneesh Dogra <***@gmail.com>, and
inspired by patch in Chromium by Chris Evans <***@chromium.org>.

When turned on, H264/CAVLC gets ~15% (CVPCMNL1_SVA_C.264) slower for
ultra-high-bitrate files, or ~2.5% (CVFI1_SVA_C.264) for lower-bitrate
files. Other codecs are affected to a lesser extent because they are
less optimized; e.g., VC-1 slows down by less than 1% (all on x86).
The patch generated 3 extra instructions (cmp, cmovae and mov) per
call to get_bits().
---
configure | 2 ++
libavcodec/get_bits.h | 48 ++++++++++++++++++++++++++++++++++++++++++++++--
libavcodec/wmavoice.c | 2 ++
3 files changed, 50 insertions(+), 2 deletions(-)

diff --git a/configure b/configure
index b3af2e9..056f409 100755
--- a/configure
+++ b/configure
@@ -113,6 +113,7 @@ Configuration options:
--disable-dxva2 disable DXVA2 code
--enable-runtime-cpudetect detect cpu capabilities at runtime (bigger binary)
--enable-hardcoded-tables use hardcoded tables instead of runtime generation
+ --disable-safe-bitstream-reader disable buffer boundary checking in bitreaders (faster, but may crash)
--enable-memalign-hack emulate memalign, interferes with memory debuggers
--disable-everything disable all components listed below
--disable-encoder=NAME disable encoder NAME
@@ -976,6 +977,7 @@ CONFIG_LIST="
rdft
rtpdec
runtime_cpudetect
+ safe_bitstream_reader
shared
sinewin
small
diff --git a/libavcodec/get_bits.h b/libavcodec/get_bits.h
index 4057cd2..d72fcfa 100644
--- a/libavcodec/get_bits.h
+++ b/libavcodec/get_bits.h
@@ -48,6 +48,23 @@
# endif
#endif

+/*
+ * Safe bitstream reading:
+ * optionally, the get_bits API can check to ensure that we
+ * don't read past input buffer boundaries. This is protected
+ * with CONFIG_SAFE_BITSTREAM_READER at the global level, and
+ * then below that with UNCHECKED_BITSTREAM_READER at the per-
+ * decoder level. This means that decoders that check internally
+ * can "#define UNCHECKED_BITSTREAM_READER 1" to disable
+ * overread checks.
+ * Boundary checking causes a minor performance penalty so for
+ * applications that won't want/need this, it can be disabled
+ * globally using "#define CONFIG_SAFE_BITSTREAM_READER 0".
+ */
+#ifndef UNCHECKED_BITSTREAM_READER
+#define UNCHECKED_BITSTREAM_READER !CONFIG_SAFE_BITSTREAM_READER
+#endif
+
/* bit input */
/* buffer, buffer_end and size_in_bits must be present and used by every reader */
typedef struct GetBitContext {
@@ -61,6 +78,9 @@ typedef struct GetBitContext {
int bit_count;
#endif
int size_in_bits;
+#if !UNCHECKED_BITSTREAM_READER
+ int size_in_bits_plus1;
+#endif
} GetBitContext;

#define VLC_TYPE int16_t
@@ -144,7 +164,13 @@ for examples see get_bits, show_bits, skip_bits, get_vlc
# endif

// FIXME name?
+#if UNCHECKED_BITSTREAM_READER
# define SKIP_COUNTER(name, gb, num) name##_index += (num)
+#else
+# define SKIP_COUNTER(name, gb, num) name##_index = \
+ (gb)->size_in_bits_plus1 - name##_index < (num) ? \
+ (gb)->size_in_bits_plus1 : name##_index + (num)
+#endif

# define SKIP_BITS(name, gb, num) do { \
SKIP_CACHE(name, gb, num); \
@@ -171,7 +197,12 @@ static inline int get_bits_count(const GetBitContext *s){
}

static inline void skip_bits_long(GetBitContext *s, int n){
+#if UNCHECKED_BITSTREAM_READER
s->index += n;
+#else
+ s->index = s->size_in_bits_plus1 - s->index < n ?
+ s->size_in_bits_plus1 : s->index + n;
+#endif
}

#elif defined A32_BITSTREAM_READER
@@ -196,7 +227,9 @@ static inline void skip_bits_long(GetBitContext *s, int n){
const uint32_t next = av_be2ne32(*name##_buffer_ptr); \
name##_cache0 |= NEG_USR32(next, name##_bit_count); \
name##_cache1 |= next << name##_bit_count; \
- name##_buffer_ptr++; \
+ if (UNCHECKED_BITSTREAM_READER || \
+ name##_buffer_ptr < (const uint32_t *)(gb)->buffer_end) \
+ name##_buffer_ptr++; \
name##_bit_count -= 32; \
} \
} while (0)
@@ -230,7 +263,12 @@ static inline int get_bits_count(const GetBitContext *s) {
static inline void skip_bits_long(GetBitContext *s, int n){
OPEN_READER(re, s);
re_bit_count += n;
+#if UNCHECKED_BITSTREAM_READER
re_buffer_ptr += re_bit_count>>5;
+#else
+ re_buffer_ptr = (re_bit_count >> 5) < (const uint32_t *) s->buffer_end - re_buffer_ptr ?
+ re_buffer_ptr + (re_bit_count >> 5) : (const uint32_t *) s->buffer_end;
+#endif
re_bit_count &= 31;
re_cache0 = av_be2ne32(re_buffer_ptr[-1]) << re_bit_count;
re_cache1 = 0;
@@ -311,7 +349,10 @@ static inline unsigned int get_bits1(GetBitContext *s){
result <<= index & 7;
result >>= 8 - 1;
#endif
- index++;
+#if !UNCHECKED_BITSTREAM_READER
+ if (s->index < s->size_in_bits_plus1)
+#endif
+ index++;
s->index = index;

return result;
@@ -391,6 +432,9 @@ static inline void init_get_bits(GetBitContext *s,

s->buffer = buffer;
s->size_in_bits = bit_size;
+#if !UNCHECKED_BITSTREAM_READER
+ s->size_in_bits_plus1 = bit_size + 1;
+#endif
s->buffer_end = buffer + buffer_size;
#ifdef ALT_BITSTREAM_READER
s->index = 0;
diff --git a/libavcodec/wmavoice.c b/libavcodec/wmavoice.c
index 00e985d..8854e35 100644
--- a/libavcodec/wmavoice.c
+++ b/libavcodec/wmavoice.c
@@ -25,6 +25,8 @@
* @author Ronald S. Bultje <***@gmail.com>
*/

+#define UNCHECKED_BITSTREAM_READER 1
+
#include <math.h>
#include "avcodec.h"
#include "get_bits.h"
--
1.7.6
Måns Rullgård
2011-12-15 21:05:21 UTC
Permalink
Post by Ronald S. Bultje
When turned on, H264/CAVLC gets ~15% (CVPCMNL1_SVA_C.264) slower for
ultra-high-bitrate files, or ~2.5% (CVFI1_SVA_C.264) for lower-bitrate
files. Other codecs are affected to a lesser extent because they are
less optimized; e.g., VC-1 slows down by less than 1% (all on x86).
The patch generated 3 extra instructions (cmp, cmovae and mov) per
call to get_bits().
---
configure | 2 ++
libavcodec/get_bits.h | 48 ++++++++++++++++++++++++++++++++++++++++++++++--
libavcodec/wmavoice.c | 2 ++
3 files changed, 50 insertions(+), 2 deletions(-)
diff --git a/configure b/configure
index b3af2e9..056f409 100755
--- a/configure
+++ b/configure
--disable-dxva2 disable DXVA2 code
--enable-runtime-cpudetect detect cpu capabilities at runtime (bigger binary)
--enable-hardcoded-tables use hardcoded tables instead of runtime generation
+ --disable-safe-bitstream-reader disable buffer boundary checking in bitreaders (faster, but may crash)
--enable-memalign-hack emulate memalign, interferes with memory debuggers
--disable-everything disable all components listed below
--disable-encoder=NAME disable encoder NAME
The help output should list the flag needed to change the default. As
written, it gives the impression it is on by default.
Post by Ronald S. Bultje
@@ -144,7 +164,13 @@ for examples see get_bits, show_bits, skip_bits, get_vlc
# endif
// FIXME name?
+#if UNCHECKED_BITSTREAM_READER
# define SKIP_COUNTER(name, gb, num) name##_index += (num)
+#else
+# define SKIP_COUNTER(name, gb, num) name##_index = \
+ (gb)->size_in_bits_plus1 - name##_index < (num) ? \
+ (gb)->size_in_bits_plus1 : name##_index + (num)
+#endif
Thinking more carefully about it, this addition is overflow-safe since
the skip amount must be small and we require end padding. Your old
version with FFMIN(size, index + num) is thus safe here after all,
should it result in better code.
Post by Ronald S. Bultje
# define SKIP_BITS(name, gb, num) do { \
SKIP_CACHE(name, gb, num); \
@@ -171,7 +197,12 @@ static inline int get_bits_count(const GetBitContext *s){
}
static inline void skip_bits_long(GetBitContext *s, int n){
+#if UNCHECKED_BITSTREAM_READER
s->index += n;
+#else
+ s->index = s->size_in_bits_plus1 - s->index < n ?
+ s->size_in_bits_plus1 : s->index + n;
+#endif
}
This one allows an arbitrary n, so it must be done safely, e.g. like this.
Post by Ronald S. Bultje
#elif defined A32_BITSTREAM_READER
@@ -196,7 +227,9 @@ static inline void skip_bits_long(GetBitContext *s, int n){
const uint32_t next = av_be2ne32(*name##_buffer_ptr); \
name##_cache0 |= NEG_USR32(next, name##_bit_count); \
name##_cache1 |= next << name##_bit_count; \
- name##_buffer_ptr++; \
+ if (UNCHECKED_BITSTREAM_READER || \
+ name##_buffer_ptr < (const uint32_t *)(gb)->buffer_end) \
+ name##_buffer_ptr++; \
name##_bit_count -= 32; \
} \
} while (0)
@@ -230,7 +263,12 @@ static inline int get_bits_count(const GetBitContext *s) {
static inline void skip_bits_long(GetBitContext *s, int n){
OPEN_READER(re, s);
re_bit_count += n;
+#if UNCHECKED_BITSTREAM_READER
re_buffer_ptr += re_bit_count>>5;
+#else
+ re_buffer_ptr = (re_bit_count >> 5) < (const uint32_t *) s->buffer_end - re_buffer_ptr ?
+ re_buffer_ptr + (re_bit_count >> 5) : (const uint32_t *) s->buffer_end;
+#endif
This is again guaranteed to fall within the padding region, so whatever
variant gives better code is OK to use here.
Post by Ronald S. Bultje
re_bit_count &= 31;
re_cache0 = av_be2ne32(re_buffer_ptr[-1]) << re_bit_count;
re_cache1 = 0;
@@ -311,7 +349,10 @@ static inline unsigned int get_bits1(GetBitContext *s){
result <<= index & 7;
result >>= 8 - 1;
#endif
- index++;
+#if !UNCHECKED_BITSTREAM_READER
+ if (s->index < s->size_in_bits_plus1)
+#endif
+ index++;
s->index = index;
Why #if rather than if (UNCHECKED_BITSTREAM_READER || ...) here?
--
Måns Rullgård
***@mansr.com
Ronald S. Bultje
2011-12-15 21:18:28 UTC
Permalink
Hi,
Post by Måns Rullgård
Post by Ronald S. Bultje
@@ -311,7 +349,10 @@ static inline unsigned int get_bits1(GetBitContext
*s){
Post by Ronald S. Bultje
result <<= index & 7;
result >>= 8 - 1;
#endif
- index++;
+#if !UNCHECKED_BITSTREAM_READER
+ if (s->index < s->size_in_bits_plus1)
+#endif
+ index++;
s->index = index;
Why #if rather than if (UNCHECKED_BITSTREAM_READER || ...) here?
That way I can put the s->size_in_bits_plus1 declaration in the struct
under the #if, so as to safe 4 bytes of unused data in the struct for
people preferring the unsafe variant.

Ronald
Ronald S. Bultje
2011-12-15 21:26:21 UTC
Permalink
Hi,
Post by Ronald S. Bultje
Post by Ronald S. Bultje
When turned on, H264/CAVLC gets ~15% (CVPCMNL1_SVA_C.264) slower for
ultra-high-bitrate files, or ~2.5% (CVFI1_SVA_C.264) for lower-bitrate
files. Other codecs are affected to a lesser extent because they are
less optimized; e.g., VC-1 slows down by less than 1% (all on x86).
The patch generated 3 extra instructions (cmp, cmovae and mov) per
call to get_bits().
---
configure | 2 ++
libavcodec/get_bits.h | 48
++++++++++++++++++++++++++++++++++++++++++++++--
Post by Ronald S. Bultje
libavcodec/wmavoice.c | 2 ++
3 files changed, 50 insertions(+), 2 deletions(-)
diff --git a/configure b/configure
index b3af2e9..056f409 100755
--- a/configure
+++ b/configure
--disable-dxva2 disable DXVA2 code
--enable-runtime-cpudetect detect cpu capabilities at runtime (bigger
binary)
Post by Ronald S. Bultje
--enable-hardcoded-tables use hardcoded tables instead of runtime
generation
Post by Ronald S. Bultje
+ --disable-safe-bitstream-reader disable buffer boundary checking in
bitreaders (faster, but may crash)
Post by Ronald S. Bultje
--enable-memalign-hack emulate memalign, interferes with memory
debuggers
Post by Ronald S. Bultje
--disable-everything disable all components listed below
--disable-encoder=NAME disable encoder NAME
The help output should list the flag needed to change the default. As
written, it gives the impression it is on by default.
Post by Ronald S. Bultje
@@ -144,7 +164,13 @@ for examples see get_bits, show_bits, skip_bits,
get_vlc
Post by Ronald S. Bultje
# endif
// FIXME name?
+#if UNCHECKED_BITSTREAM_READER
# define SKIP_COUNTER(name, gb, num) name##_index += (num)
+#else
+# define SKIP_COUNTER(name, gb, num) name##_index = \
+ (gb)->size_in_bits_plus1 - name##_index < (num) ? \
+ (gb)->size_in_bits_plus1 : name##_index + (num)
+#endif
Thinking more carefully about it, this addition is overflow-safe since
the skip amount must be small and we require end padding. Your old
version with FFMIN(size, index + num) is thus safe here after all,
should it result in better code.
Thinking about it, I don't think that's right. Padding can't overflow size
of int of buffersize in bytes, but size_in_bits is - well - in bits, so if
bits is MAXINT, then the addition can overflow even though the size of the
padded buffer is (1<<28) + padding_size.

(Does that make sense?)

Ronald
Måns Rullgård
2011-12-15 21:44:46 UTC
Permalink
Post by Ronald S. Bultje
Post by Måns Rullgård
Post by Ronald S. Bultje
// FIXME name?
+#if UNCHECKED_BITSTREAM_READER
# define SKIP_COUNTER(name, gb, num) name##_index += (num)
+#else
+# define SKIP_COUNTER(name, gb, num) name##_index = \
+ (gb)->size_in_bits_plus1 - name##_index < (num) ? \
+ (gb)->size_in_bits_plus1 : name##_index + (num)
+#endif
Thinking more carefully about it, this addition is overflow-safe since
the skip amount must be small and we require end padding. Your old
version with FFMIN(size, index + num) is thus safe here after all,
should it result in better code.
Thinking about it, I don't think that's right. Padding can't overflow size
of int of buffersize in bytes, but size_in_bits is - well - in bits, so if
bits is MAXINT, then the addition can overflow even though the size of the
padded buffer is (1<<28) + padding_size.
(Does that make sense?)
You're right, if the size_in_bits is greater than INT_MAX-32, it can
overflow. If we require the _padded_ buffer to be at most INT_MAX/8
bytes, it's all safe however. We already (implicitly, probably should
document) impose this maximum on the unpadded size. Reducing it by a
few bytes seems acceptable if this allows faster code here.

So, does the FFMIN() variant produce better code?
--
Måns Rullgård
***@mansr.com
Ronald S. Bultje
2011-12-16 00:00:16 UTC
Permalink
Hi,
Post by Ronald S. Bultje
Post by Ronald S. Bultje
@@ -144,7 +164,13 @@ for examples see get_bits, show_bits, skip_bits,
get_vlc
Post by Ronald S. Bultje
# endif
// FIXME name?
+#if UNCHECKED_BITSTREAM_READER
# define SKIP_COUNTER(name, gb, num) name##_index += (num)
+#else
+# define SKIP_COUNTER(name, gb, num) name##_index = \
+ (gb)->size_in_bits_plus1 - name##_index < (num) ? \
+ (gb)->size_in_bits_plus1 : name##_index + (num)
+#endif
Thinking more carefully about it, this addition is overflow-safe since
the skip amount must be small and we require end padding. Your old
version with FFMIN(size, index + num) is thus safe here after all,
should it result in better code.
It does actually (one mov less, so only an added cmp and cmova), and
lightly faster on CAVLC/H264 (0.72s -> 0.83s instead of 0.86s on the
super-high-bitrate sample), so we're keeping the FFMIN() then.
Post by Ronald S. Bultje
@@ -230,7 +263,12 @@ static inline int get_bits_count(const GetBitContext *s) {
Post by Ronald S. Bultje
static inline void skip_bits_long(GetBitContext *s, int n){
OPEN_READER(re, s);
re_bit_count += n;
+#if UNCHECKED_BITSTREAM_READER
re_buffer_ptr += re_bit_count>>5;
+#else
+ re_buffer_ptr = (re_bit_count >> 5) < (const uint32_t *)
s->buffer_end - re_buffer_ptr ?
Post by Ronald S. Bultje
+ re_buffer_ptr + (re_bit_count >> 5) : (const
uint32_t *) s->buffer_end;
Post by Ronald S. Bultje
+#endif
This is again guaranteed to fall within the padding region, so whatever
variant gives better code is OK to use here.
gcc messes up and does 4 more instructions. Fortunately it's not terribly
important, but I'll stick to this:

[..]
re_buffer_ptr += re_bit_count>>5;
+ #if !UNCHECKED_BITSTREAM_READER
+ re_buffer_ptr = FFMIN(re_buffer_ptr, s->buffer_end);
+ #endif

New patch attached. Documentation not yet updated until we decide whether
to enable by default or not.

Ronald
Måns Rullgård
2011-12-16 00:38:08 UTC
Permalink
Post by Ronald S. Bultje
Hi,
Post by Ronald S. Bultje
Post by Ronald S. Bultje
@@ -144,7 +164,13 @@ for examples see get_bits, show_bits, skip_bits,
get_vlc
Post by Ronald S. Bultje
# endif
// FIXME name?
+#if UNCHECKED_BITSTREAM_READER
# define SKIP_COUNTER(name, gb, num) name##_index += (num)
+#else
+# define SKIP_COUNTER(name, gb, num) name##_index = \
+ (gb)->size_in_bits_plus1 - name##_index < (num) ? \
+ (gb)->size_in_bits_plus1 : name##_index + (num)
+#endif
Thinking more carefully about it, this addition is overflow-safe since
the skip amount must be small and we require end padding. Your old
version with FFMIN(size, index + num) is thus safe here after all,
should it result in better code.
It does actually (one mov less, so only an added cmp and cmova), and
lightly faster on CAVLC/H264 (0.72s -> 0.83s instead of 0.86s on the
super-high-bitrate sample), so we're keeping the FFMIN() then.
Great.
Post by Ronald S. Bultje
Post by Ronald S. Bultje
@@ -230,7 +263,12 @@ static inline int get_bits_count(const GetBitContext *s) {
Post by Ronald S. Bultje
static inline void skip_bits_long(GetBitContext *s, int n){
OPEN_READER(re, s);
re_bit_count += n;
+#if UNCHECKED_BITSTREAM_READER
re_buffer_ptr += re_bit_count>>5;
+#else
+ re_buffer_ptr = (re_bit_count >> 5) < (const uint32_t *) s->buffer_end - re_buffer_ptr ?
+ re_buffer_ptr + (re_bit_count >> 5) : (const uint32_t *) s->buffer_end;
+#endif
This is again guaranteed to fall within the padding region, so whatever
variant gives better code is OK to use here.
gcc messes up and does 4 more instructions. Fortunately it's not terribly
[..]
re_buffer_ptr += re_bit_count>>5;
+ #if !UNCHECKED_BITSTREAM_READER
+ re_buffer_ptr = FFMIN(re_buffer_ptr, s->buffer_end);
+ #endif
Gah, I'm stupid. This is skip_bits_long(), which can be called with any
value. It must protect against overflow.
Post by Ronald S. Bultje
New patch attached. Documentation not yet updated until we decide whether
to enable by default or not.
[...]
Post by Ronald S. Bultje
+#if UNCHECKED_BITSTREAM_READER
# define SKIP_COUNTER(name, gb, num) name##_index += (num)
+#else
+# define SKIP_COUNTER(name, gb, num) name##_index = FFMIN((gb)->size_in_bits, \
+ name##_index + num)
+#endif
Those lines are awfully long. Please do something about that, e.g. like
this:

# define SKIP_COUNTER(name, gb, num) \
name##_index = FFMIN((gb)->size_in_bits, name##_index + num)
--
Måns Rullgård
***@mansr.com
Ronald S. Bultje
2011-12-16 01:03:49 UTC
Permalink
Hi,
Post by Ronald S. Bultje
Post by Ronald S. Bultje
Post by Ronald S. Bultje
@@ -230,7 +263,12 @@ static inline int get_bits_count(const
GetBitContext
Post by Ronald S. Bultje
Post by Ronald S. Bultje
*s) {
Post by Ronald S. Bultje
static inline void skip_bits_long(GetBitContext *s, int n){
OPEN_READER(re, s);
re_bit_count += n;
+#if UNCHECKED_BITSTREAM_READER
re_buffer_ptr += re_bit_count>>5;
+#else
+ re_buffer_ptr = (re_bit_count >> 5) < (const uint32_t *)
s->buffer_end - re_buffer_ptr ?
Post by Ronald S. Bultje
Post by Ronald S. Bultje
Post by Ronald S. Bultje
+ re_buffer_ptr + (re_bit_count >> 5) : (const
uint32_t *) s->buffer_end;
Post by Ronald S. Bultje
Post by Ronald S. Bultje
Post by Ronald S. Bultje
+#endif
This is again guaranteed to fall within the padding region, so whatever
variant gives better code is OK to use here.
gcc messes up and does 4 more instructions. Fortunately it's not terribly
[..]
re_buffer_ptr += re_bit_count>>5;
+ #if !UNCHECKED_BITSTREAM_READER
+ re_buffer_ptr = FFMIN(re_buffer_ptr, s->buffer_end);
+ #endif
Gah, I'm stupid. This is skip_bits_long(), which can be called with any
value. It must protect against overflow.
No you're actually right, but differently; re_bit_count is never >31.
Post by Ronald S. Bultje
Post by Ronald S. Bultje
+#if UNCHECKED_BITSTREAM_READER
# define SKIP_COUNTER(name, gb, num) name##_index += (num)
+#else
+# define SKIP_COUNTER(name, gb, num) name##_index =
FFMIN((gb)->size_in_bits, \
Post by Ronald S. Bultje
+
name##_index + num)
Post by Ronald S. Bultje
+#endif
Those lines are awfully long. Please do something about that, e.g. like
# define SKIP_COUNTER(name, gb, num) \
name##_index = FFMIN((gb)->size_in_bits, name##_index + num)
Will fix and resend.

Ronald
Måns Rullgård
2011-12-16 01:59:56 UTC
Permalink
"Ronald S. Bultje" <***@gmail.com> writes:

f> Hi,
Post by Ronald S. Bultje
Post by Ronald S. Bultje
Post by Ronald S. Bultje
Post by Ronald S. Bultje
@@ -230,7 +263,12 @@ static inline int get_bits_count(const
GetBitContext
Post by Ronald S. Bultje
Post by Ronald S. Bultje
*s) {
Post by Ronald S. Bultje
static inline void skip_bits_long(GetBitContext *s, int n){
OPEN_READER(re, s);
re_bit_count += n;
+#if UNCHECKED_BITSTREAM_READER
re_buffer_ptr += re_bit_count>>5;
+#else
+ re_buffer_ptr = (re_bit_count >> 5) < (const uint32_t *)
s->buffer_end - re_buffer_ptr ?
Post by Ronald S. Bultje
Post by Ronald S. Bultje
Post by Ronald S. Bultje
+ re_buffer_ptr + (re_bit_count >> 5) : (const
uint32_t *) s->buffer_end;
Post by Ronald S. Bultje
Post by Ronald S. Bultje
Post by Ronald S. Bultje
+#endif
This is again guaranteed to fall within the padding region, so whatever
variant gives better code is OK to use here.
gcc messes up and does 4 more instructions. Fortunately it's not terribly
[..]
re_buffer_ptr += re_bit_count>>5;
+ #if !UNCHECKED_BITSTREAM_READER
+ re_buffer_ptr = FFMIN(re_buffer_ptr, s->buffer_end);
+ #endif
Gah, I'm stupid. This is skip_bits_long(), which can be called with any
value. It must protect against overflow.
No you're actually right, but differently; re_bit_count is never >31.
The &= 31 is after the addition, so it can have any value. If that were
not the case, skip_bits_long() would not work at all.
--
Måns Rullgård
***@mansr.com
Ronald S. Bultje
2011-12-16 02:02:08 UTC
Permalink
Hi,
Post by Måns Rullgård
f> Hi,
Post by Ronald S. Bultje
Post by Ronald S. Bultje
Post by Ronald S. Bultje
Post by Ronald S. Bultje
@@ -230,7 +263,12 @@ static inline int get_bits_count(const
GetBitContext
Post by Ronald S. Bultje
Post by Ronald S. Bultje
*s) {
Post by Ronald S. Bultje
static inline void skip_bits_long(GetBitContext *s, int n){
OPEN_READER(re, s);
re_bit_count += n;
+#if UNCHECKED_BITSTREAM_READER
re_buffer_ptr += re_bit_count>>5;
+#else
+ re_buffer_ptr = (re_bit_count >> 5) < (const uint32_t *)
s->buffer_end - re_buffer_ptr ?
Post by Ronald S. Bultje
Post by Ronald S. Bultje
Post by Ronald S. Bultje
+ re_buffer_ptr + (re_bit_count >> 5) : (const
uint32_t *) s->buffer_end;
Post by Ronald S. Bultje
Post by Ronald S. Bultje
Post by Ronald S. Bultje
+#endif
This is again guaranteed to fall within the padding region, so
whatever
Post by Ronald S. Bultje
Post by Ronald S. Bultje
Post by Ronald S. Bultje
Post by Ronald S. Bultje
variant gives better code is OK to use here.
gcc messes up and does 4 more instructions. Fortunately it's not
terribly
Post by Ronald S. Bultje
Post by Ronald S. Bultje
Post by Ronald S. Bultje
[..]
re_buffer_ptr += re_bit_count>>5;
+ #if !UNCHECKED_BITSTREAM_READER
+ re_buffer_ptr = FFMIN(re_buffer_ptr, s->buffer_end);
+ #endif
Gah, I'm stupid. This is skip_bits_long(), which can be called with any
value. It must protect against overflow.
No you're actually right, but differently; re_bit_count is never >31.
The &= 31 is after the addition, so it can have any value. If that were
not the case, skip_bits_long() would not work at all.
re_bit_count is the bit offset within the uint32_t. It's never >31, also
not before the addition. So for re_bit_count < 31 and n any value, it
should mostly work.

Anyway, since there's no performance impact I can revert it to what it was
before if you prefer that.

Ronald
Måns Rullgård
2011-12-16 02:07:34 UTC
Permalink
Post by Ronald S. Bultje
Hi,
Post by Måns Rullgård
f> Hi,
Post by Ronald S. Bultje
Post by Ronald S. Bultje
Post by Ronald S. Bultje
Post by Ronald S. Bultje
@@ -230,7 +263,12 @@ static inline int get_bits_count(const
GetBitContext
Post by Ronald S. Bultje
Post by Ronald S. Bultje
*s) {
Post by Ronald S. Bultje
static inline void skip_bits_long(GetBitContext *s, int n){
OPEN_READER(re, s);
re_bit_count += n;
+#if UNCHECKED_BITSTREAM_READER
re_buffer_ptr += re_bit_count>>5;
+#else
+ re_buffer_ptr = (re_bit_count >> 5) < (const uint32_t *)
s->buffer_end - re_buffer_ptr ?
Post by Ronald S. Bultje
Post by Ronald S. Bultje
Post by Ronald S. Bultje
+ re_buffer_ptr + (re_bit_count >> 5) : (const
uint32_t *) s->buffer_end;
Post by Ronald S. Bultje
Post by Ronald S. Bultje
Post by Ronald S. Bultje
+#endif
This is again guaranteed to fall within the padding region, so
whatever
Post by Ronald S. Bultje
Post by Ronald S. Bultje
Post by Ronald S. Bultje
Post by Ronald S. Bultje
variant gives better code is OK to use here.
gcc messes up and does 4 more instructions. Fortunately it's not
terribly
Post by Ronald S. Bultje
Post by Ronald S. Bultje
Post by Ronald S. Bultje
[..]
re_buffer_ptr += re_bit_count>>5;
+ #if !UNCHECKED_BITSTREAM_READER
+ re_buffer_ptr = FFMIN(re_buffer_ptr, s->buffer_end);
+ #endif
Gah, I'm stupid. This is skip_bits_long(), which can be called with any
value. It must protect against overflow.
No you're actually right, but differently; re_bit_count is never >31.
The &= 31 is after the addition, so it can have any value. If that were
not the case, skip_bits_long() would not work at all.
re_bit_count is the bit offset within the uint32_t. It's never >31, also
not before the addition. So for re_bit_count < 31 and n any value, it
should mostly work.
31 + INT_MAX overflows.

Do we trust callers of skip_bits_long() validate the length, e.g. when
it is taken from the bitstream itself (perhaps to skip a length-prefixed
block we don't care about)?
--
Måns Rullgård
***@mansr.com
Måns Rullgård
2011-12-16 02:09:45 UTC
Permalink
Post by Måns Rullgård
Post by Ronald S. Bultje
Hi,
Post by Måns Rullgård
f> Hi,
Post by Ronald S. Bultje
Post by Ronald S. Bultje
Post by Ronald S. Bultje
Post by Ronald S. Bultje
@@ -230,7 +263,12 @@ static inline int get_bits_count(const
GetBitContext
Post by Ronald S. Bultje
Post by Ronald S. Bultje
*s) {
Post by Ronald S. Bultje
static inline void skip_bits_long(GetBitContext *s, int n){
OPEN_READER(re, s);
re_bit_count += n;
+#if UNCHECKED_BITSTREAM_READER
re_buffer_ptr += re_bit_count>>5;
+#else
+ re_buffer_ptr = (re_bit_count >> 5) < (const uint32_t *)
s->buffer_end - re_buffer_ptr ?
Post by Ronald S. Bultje
Post by Ronald S. Bultje
Post by Ronald S. Bultje
+ re_buffer_ptr + (re_bit_count >> 5) : (const
uint32_t *) s->buffer_end;
Post by Ronald S. Bultje
Post by Ronald S. Bultje
Post by Ronald S. Bultje
+#endif
This is again guaranteed to fall within the padding region, so
whatever
Post by Ronald S. Bultje
Post by Ronald S. Bultje
Post by Ronald S. Bultje
Post by Ronald S. Bultje
variant gives better code is OK to use here.
gcc messes up and does 4 more instructions. Fortunately it's not
terribly
Post by Ronald S. Bultje
Post by Ronald S. Bultje
Post by Ronald S. Bultje
[..]
re_buffer_ptr += re_bit_count>>5;
+ #if !UNCHECKED_BITSTREAM_READER
+ re_buffer_ptr = FFMIN(re_buffer_ptr, s->buffer_end);
+ #endif
Gah, I'm stupid. This is skip_bits_long(), which can be called with any
value. It must protect against overflow.
No you're actually right, but differently; re_bit_count is never >31.
The &= 31 is after the addition, so it can have any value. If that were
not the case, skip_bits_long() would not work at all.
re_bit_count is the bit offset within the uint32_t. It's never >31, also
not before the addition. So for re_bit_count < 31 and n any value, it
should mostly work.
31 + INT_MAX overflows.
Since this is comparing pointers, overflowing the padding is sufficient
for it to become unsafe (if the (padded) buffer ends near the top of the
address space).
--
Måns Rullgård
***@mansr.com
Uoti Urpala
2011-12-16 04:26:05 UTC
Permalink
Post by Måns Rullgård
Post by Ronald S. Bultje
re_bit_count is the bit offset within the uint32_t. It's never >31, also
not before the addition. So for re_bit_count < 31 and n any value, it
should mostly work.
31 + INT_MAX overflows.
Do we trust callers of skip_bits_long() validate the length, e.g. when
it is taken from the bitstream itself (perhaps to skip a length-prefixed
block we don't care about)?
If the caller doesn't validate it and it can be INT_MAX then it can
likely also be negative. So if you want to handle that case then the
test should probably cast the value to an unsigned type.
Ronald S. Bultje
2011-12-16 04:29:03 UTC
Permalink
Hi,
Post by Uoti Urpala
Post by Måns Rullgård
Post by Ronald S. Bultje
re_bit_count is the bit offset within the uint32_t. It's never >31,
also
Post by Måns Rullgård
Post by Ronald S. Bultje
not before the addition. So for re_bit_count < 31 and n any value, it
should mostly work.
31 + INT_MAX overflows.
Do we trust callers of skip_bits_long() validate the length, e.g. when
it is taken from the bitstream itself (perhaps to skip a length-prefixed
block we don't care about)?
If the caller doesn't validate it and it can be INT_MAX then it can
likely also be negative. So if you want to handle that case then the
test should probably cast the value to an unsigned type.
The pointers are unsigned, doesn't C dictate that a comparison between
signed and unsigned will be unsigned?

Ronald
Uoti Urpala
2011-12-16 04:39:05 UTC
Permalink
Post by Uoti Urpala
If the caller doesn't validate it and it can be INT_MAX then it can
likely also be negative. So if you want to handle that case
then the test should probably cast the value to an unsigned
type.
The pointers are unsigned, doesn't C dictate that a comparison between
signed and unsigned will be unsigned?
The data type the pointers *point to* may be unsigned, but that's
irrelevant here. The value actually being compared with is the result of
subtracting two pointers, which has type ptrdiff_t (a signed integer
type).
Ronald S. Bultje
2011-12-16 05:12:14 UTC
Permalink
Hi,
Post by Uoti Urpala
Post by Uoti Urpala
If the caller doesn't validate it and it can be INT_MAX then it can
likely also be negative. So if you want to handle that case
then the test should probably cast the value to an unsigned
type.
The pointers are unsigned, doesn't C dictate that a comparison between
signed and unsigned will be unsigned?
The data type the pointers *point to* may be unsigned, but that's
irrelevant here. The value actually being compared with is the result of
subtracting two pointers, which has type ptrdiff_t (a signed integer
type).
Hm, right. I must admit I'm not terribly happy about the state of
skip_bits_long in the A32 case if I were to have to check n for overflow
before adding it to bits_count, it'd get very messy.

But maybe we don't care, since it's skip_bits_long()...

Ronald
Ronald S. Bultje
2011-12-16 01:05:27 UTC
Permalink
From: "Ronald S. Bultje" <***@gmail.com>

Based on work (for GCI) by Aneesh Dogra <***@gmail.com>, and
inspired by patch in Chromium by Chris Evans <***@chromium.org>.

When turned on, H264/CAVLC gets ~15% (CVPCMNL1_SVA_C.264) slower for
ultra-high-bitrate files, or ~2.5% (CVFI1_SVA_C.264) for lower-bitrate
files. Other codecs are affected to a lesser extent because they are
less optimized; e.g., VC-1 slows down by less than 1% (all on x86).
The patch generated 3 extra instructions (cmp, cmovae and mov) per
call to get_bits().
---
configure | 2 ++
libavcodec/get_bits.h | 45 +++++++++++++++++++++++++++++++++++++++++++--
libavcodec/wmavoice.c | 2 ++
3 files changed, 47 insertions(+), 2 deletions(-)

diff --git a/configure b/configure
index b3af2e9..056f409 100755
--- a/configure
+++ b/configure
@@ -113,6 +113,7 @@ Configuration options:
--disable-dxva2 disable DXVA2 code
--enable-runtime-cpudetect detect cpu capabilities at runtime (bigger binary)
--enable-hardcoded-tables use hardcoded tables instead of runtime generation
+ --disable-safe-bitstream-reader disable buffer boundary checking in bitreaders (faster, but may crash)
--enable-memalign-hack emulate memalign, interferes with memory debuggers
--disable-everything disable all components listed below
--disable-encoder=NAME disable encoder NAME
@@ -976,6 +977,7 @@ CONFIG_LIST="
rdft
rtpdec
runtime_cpudetect
+ safe_bitstream_reader
shared
sinewin
small
diff --git a/libavcodec/get_bits.h b/libavcodec/get_bits.h
index 4057cd2..20370e3 100644
--- a/libavcodec/get_bits.h
+++ b/libavcodec/get_bits.h
@@ -48,6 +48,23 @@
# endif
#endif

+/*
+ * Safe bitstream reading:
+ * optionally, the get_bits API can check to ensure that we
+ * don't read past input buffer boundaries. This is protected
+ * with CONFIG_SAFE_BITSTREAM_READER at the global level, and
+ * then below that with UNCHECKED_BITSTREAM_READER at the per-
+ * decoder level. This means that decoders that check internally
+ * can "#define UNCHECKED_BITSTREAM_READER 1" to disable
+ * overread checks.
+ * Boundary checking causes a minor performance penalty so for
+ * applications that won't want/need this, it can be disabled
+ * globally using "#define CONFIG_SAFE_BITSTREAM_READER 0".
+ */
+#ifndef UNCHECKED_BITSTREAM_READER
+#define UNCHECKED_BITSTREAM_READER !CONFIG_SAFE_BITSTREAM_READER
+#endif
+
/* bit input */
/* buffer, buffer_end and size_in_bits must be present and used by every reader */
typedef struct GetBitContext {
@@ -61,6 +78,9 @@ typedef struct GetBitContext {
int bit_count;
#endif
int size_in_bits;
+#if !UNCHECKED_BITSTREAM_READER
+ int size_in_bits_plus1;
+#endif
} GetBitContext;

#define VLC_TYPE int16_t
@@ -144,7 +164,12 @@ for examples see get_bits, show_bits, skip_bits, get_vlc
# endif

// FIXME name?
+#if UNCHECKED_BITSTREAM_READER
# define SKIP_COUNTER(name, gb, num) name##_index += (num)
+#else
+# define SKIP_COUNTER(name, gb, num) name##_index = \
+ FFMIN((gb)->size_in_bits, name##_index + num)
+#endif

# define SKIP_BITS(name, gb, num) do { \
SKIP_CACHE(name, gb, num); \
@@ -171,7 +196,12 @@ static inline int get_bits_count(const GetBitContext *s){
}

static inline void skip_bits_long(GetBitContext *s, int n){
+#if UNCHECKED_BITSTREAM_READER
s->index += n;
+#else
+ s->index = s->size_in_bits_plus1 - s->index < n ?
+ s->size_in_bits_plus1 : s->index + n;
+#endif
}

#elif defined A32_BITSTREAM_READER
@@ -196,7 +226,9 @@ static inline void skip_bits_long(GetBitContext *s, int n){
const uint32_t next = av_be2ne32(*name##_buffer_ptr); \
name##_cache0 |= NEG_USR32(next, name##_bit_count); \
name##_cache1 |= next << name##_bit_count; \
- name##_buffer_ptr++; \
+ if (UNCHECKED_BITSTREAM_READER || \
+ name##_buffer_ptr < (const uint32_t *)(gb)->buffer_end) \
+ name##_buffer_ptr++; \
name##_bit_count -= 32; \
} \
} while (0)
@@ -231,6 +263,9 @@ static inline void skip_bits_long(GetBitContext *s, int n){
OPEN_READER(re, s);
re_bit_count += n;
re_buffer_ptr += re_bit_count>>5;
+#if !UNCHECKED_BITSTREAM_READER
+ re_buffer_ptr = FFMIN(re_buffer_ptr, s->buffer_end);
+#endif
re_bit_count &= 31;
re_cache0 = av_be2ne32(re_buffer_ptr[-1]) << re_bit_count;
re_cache1 = 0;
@@ -311,7 +346,10 @@ static inline unsigned int get_bits1(GetBitContext *s){
result <<= index & 7;
result >>= 8 - 1;
#endif
- index++;
+#if !UNCHECKED_BITSTREAM_READER
+ if (s->index < s->size_in_bits_plus1)
+#endif
+ index++;
s->index = index;

return result;
@@ -391,6 +429,9 @@ static inline void init_get_bits(GetBitContext *s,

s->buffer = buffer;
s->size_in_bits = bit_size;
+#if !UNCHECKED_BITSTREAM_READER
+ s->size_in_bits_plus1 = bit_size + 1;
+#endif
s->buffer_end = buffer + buffer_size;
#ifdef ALT_BITSTREAM_READER
s->index = 0;
diff --git a/libavcodec/wmavoice.c b/libavcodec/wmavoice.c
index 00e985d..8854e35 100644
--- a/libavcodec/wmavoice.c
+++ b/libavcodec/wmavoice.c
@@ -25,6 +25,8 @@
* @author Ronald S. Bultje <***@gmail.com>
*/

+#define UNCHECKED_BITSTREAM_READER 1
+
#include <math.h>
#include "avcodec.h"
#include "get_bits.h"
--
1.7.6
Ronald S. Bultje
2011-12-16 04:16:21 UTC
Permalink
Based on work (for GCI) by Aneesh Dogra <***@gmail.com>, and
inspired by patch in Chromium by Chris Evans <***@chromium.org>.

When turned on, H264/CAVLC gets ~15% (CVPCMNL1_SVA_C.264) slower for
ultra-high-bitrate files, or ~2.5% (CVFI1_SVA_C.264) for lower-bitrate
files. Other codecs are affected to a lesser extent because they are
less optimized; e.g., VC-1 slows down by less than 1% (all on x86).
The patch generated 3 extra instructions (cmp, cmovae and mov) per
call to get_bits().
---
configure | 2 ++
libavcodec/get_bits.h | 47 +++++++++++++++++++++++++++++++++++++++++++++--
libavcodec/wmavoice.c | 2 ++
3 files changed, 49 insertions(+), 2 deletions(-)

diff --git a/configure b/configure
index b3af2e9..056f409 100755
--- a/configure
+++ b/configure
@@ -113,6 +113,7 @@ Configuration options:
--disable-dxva2 disable DXVA2 code
--enable-runtime-cpudetect detect cpu capabilities at runtime (bigger binary)
--enable-hardcoded-tables use hardcoded tables instead of runtime generation
+ --disable-safe-bitstream-reader disable buffer boundary checking in bitreaders (faster, but may crash)
--enable-memalign-hack emulate memalign, interferes with memory debuggers
--disable-everything disable all components listed below
--disable-encoder=NAME disable encoder NAME
@@ -976,6 +977,7 @@ CONFIG_LIST="
rdft
rtpdec
runtime_cpudetect
+ safe_bitstream_reader
shared
sinewin
small
diff --git a/libavcodec/get_bits.h b/libavcodec/get_bits.h
index 4057cd2..0c8f384 100644
--- a/libavcodec/get_bits.h
+++ b/libavcodec/get_bits.h
@@ -48,6 +48,23 @@
# endif
#endif

+/*
+ * Safe bitstream reading:
+ * optionally, the get_bits API can check to ensure that we
+ * don't read past input buffer boundaries. This is protected
+ * with CONFIG_SAFE_BITSTREAM_READER at the global level, and
+ * then below that with UNCHECKED_BITSTREAM_READER at the per-
+ * decoder level. This means that decoders that check internally
+ * can "#define UNCHECKED_BITSTREAM_READER 1" to disable
+ * overread checks.
+ * Boundary checking causes a minor performance penalty so for
+ * applications that won't want/need this, it can be disabled
+ * globally using "#define CONFIG_SAFE_BITSTREAM_READER 0".
+ */
+#ifndef UNCHECKED_BITSTREAM_READER
+#define UNCHECKED_BITSTREAM_READER !CONFIG_SAFE_BITSTREAM_READER
+#endif
+
/* bit input */
/* buffer, buffer_end and size_in_bits must be present and used by every reader */
typedef struct GetBitContext {
@@ -61,6 +78,9 @@ typedef struct GetBitContext {
int bit_count;
#endif
int size_in_bits;
+#if !UNCHECKED_BITSTREAM_READER
+ int size_in_bits_plus1;
+#endif
} GetBitContext;

#define VLC_TYPE int16_t
@@ -144,7 +164,12 @@ for examples see get_bits, show_bits, skip_bits, get_vlc
# endif

// FIXME name?
+#if UNCHECKED_BITSTREAM_READER
# define SKIP_COUNTER(name, gb, num) name##_index += (num)
+#else
+# define SKIP_COUNTER(name, gb, num) name##_index = \
+ FFMIN((gb)->size_in_bits, name##_index + num)
+#endif

# define SKIP_BITS(name, gb, num) do { \
SKIP_CACHE(name, gb, num); \
@@ -171,7 +196,12 @@ static inline int get_bits_count(const GetBitContext *s){
}

static inline void skip_bits_long(GetBitContext *s, int n){
+#if UNCHECKED_BITSTREAM_READER
s->index += n;
+#else
+ s->index = s->size_in_bits_plus1 - s->index < n ?
+ s->size_in_bits_plus1 : s->index + n;
+#endif
}

#elif defined A32_BITSTREAM_READER
@@ -196,7 +226,9 @@ static inline void skip_bits_long(GetBitContext *s, int n){
const uint32_t next = av_be2ne32(*name##_buffer_ptr); \
name##_cache0 |= NEG_USR32(next, name##_bit_count); \
name##_cache1 |= next << name##_bit_count; \
- name##_buffer_ptr++; \
+ if (UNCHECKED_BITSTREAM_READER || \
+ name##_buffer_ptr < (const uint32_t *)(gb)->buffer_end) \
+ name##_buffer_ptr++; \
name##_bit_count -= 32; \
} \
} while (0)
@@ -230,7 +262,12 @@ static inline int get_bits_count(const GetBitContext *s) {
static inline void skip_bits_long(GetBitContext *s, int n){
OPEN_READER(re, s);
re_bit_count += n;
+#if UNCHECKED_BITSTREAM_READER
re_buffer_ptr += re_bit_count>>5;
+#else
+ re_buffer_ptr = (re_bit_count >> 5) < (const uint32_t *) s->buffer_end - re_buffer_ptr ?
+ re_buffer_ptr + (re_bit_count >> 5) : (const uint32_t *) s->buffer_end;
+#endif
re_bit_count &= 31;
re_cache0 = av_be2ne32(re_buffer_ptr[-1]) << re_bit_count;
re_cache1 = 0;
@@ -311,7 +348,10 @@ static inline unsigned int get_bits1(GetBitContext *s){
result <<= index & 7;
result >>= 8 - 1;
#endif
- index++;
+#if !UNCHECKED_BITSTREAM_READER
+ if (s->index < s->size_in_bits_plus1)
+#endif
+ index++;
s->index = index;

return result;
@@ -391,6 +431,9 @@ static inline void init_get_bits(GetBitContext *s,

s->buffer = buffer;
s->size_in_bits = bit_size;
+#if !UNCHECKED_BITSTREAM_READER
+ s->size_in_bits_plus1 = bit_size + 1;
+#endif
s->buffer_end = buffer + buffer_size;
#ifdef ALT_BITSTREAM_READER
s->index = 0;
diff --git a/libavcodec/wmavoice.c b/libavcodec/wmavoice.c
index 00e985d..8854e35 100644
--- a/libavcodec/wmavoice.c
+++ b/libavcodec/wmavoice.c
@@ -25,6 +25,8 @@
* @author Ronald S. Bultje <***@gmail.com>
*/

+#define UNCHECKED_BITSTREAM_READER 1
+
#include <math.h>
#include "avcodec.h"
#include "get_bits.h"
--
1.7.2.1
Ronald S. Bultje
2011-12-16 18:15:28 UTC
Permalink
From: "Ronald S. Bultje" <***@gmail.com>

Based on work (for GCI) by Aneesh Dogra <***@gmail.com>, and
inspired by patch in Chromium by Chris Evans <***@chromium.org>.

When turned on, H264/CAVLC gets ~15% (CVPCMNL1_SVA_C.264) slower for
ultra-high-bitrate files, or ~2.5% (CVFI1_SVA_C.264) for lower-bitrate
files. Other codecs are affected to a lesser extent because they are
less optimized; e.g., VC-1 slows down by less than 1% (all on x86).
The patch generated 3 extra instructions (cmp, cmovae and mov) per
call to get_bits().
---
configure | 2 +
libavcodec/get_bits.h | 51 +++++++++++++++++++++++++++++++++++++++++++++++-
libavcodec/wmavoice.c | 2 +
3 files changed, 53 insertions(+), 2 deletions(-)

diff --git a/configure b/configure
index b3af2e9..056f409 100755
--- a/configure
+++ b/configure
@@ -113,6 +113,7 @@ Configuration options:
--disable-dxva2 disable DXVA2 code
--enable-runtime-cpudetect detect cpu capabilities at runtime (bigger binary)
--enable-hardcoded-tables use hardcoded tables instead of runtime generation
+ --disable-safe-bitstream-reader disable buffer boundary checking in bitreaders (faster, but may crash)
--enable-memalign-hack emulate memalign, interferes with memory debuggers
--disable-everything disable all components listed below
--disable-encoder=NAME disable encoder NAME
@@ -976,6 +977,7 @@ CONFIG_LIST="
rdft
rtpdec
runtime_cpudetect
+ safe_bitstream_reader
shared
sinewin
small
diff --git a/libavcodec/get_bits.h b/libavcodec/get_bits.h
index 4057cd2..6080d33 100644
--- a/libavcodec/get_bits.h
+++ b/libavcodec/get_bits.h
@@ -48,6 +48,23 @@
# endif
#endif

+/*
+ * Safe bitstream reading:
+ * optionally, the get_bits API can check to ensure that we
+ * don't read past input buffer boundaries. This is protected
+ * with CONFIG_SAFE_BITSTREAM_READER at the global level, and
+ * then below that with UNCHECKED_BITSTREAM_READER at the per-
+ * decoder level. This means that decoders that check internally
+ * can "#define UNCHECKED_BITSTREAM_READER 1" to disable
+ * overread checks.
+ * Boundary checking causes a minor performance penalty so for
+ * applications that won't want/need this, it can be disabled
+ * globally using "#define CONFIG_SAFE_BITSTREAM_READER 0".
+ */
+#ifndef UNCHECKED_BITSTREAM_READER
+#define UNCHECKED_BITSTREAM_READER !CONFIG_SAFE_BITSTREAM_READER
+#endif
+
/* bit input */
/* buffer, buffer_end and size_in_bits must be present and used by every reader */
typedef struct GetBitContext {
@@ -61,6 +78,9 @@ typedef struct GetBitContext {
int bit_count;
#endif
int size_in_bits;
+#if !UNCHECKED_BITSTREAM_READER
+ int size_in_bits_plus1;
+#endif
} GetBitContext;

#define VLC_TYPE int16_t
@@ -144,7 +164,12 @@ for examples see get_bits, show_bits, skip_bits, get_vlc
# endif

// FIXME name?
+#if UNCHECKED_BITSTREAM_READER
# define SKIP_COUNTER(name, gb, num) name##_index += (num)
+#else
+# define SKIP_COUNTER(name, gb, num) name##_index = \
+ FFMIN((gb)->size_in_bits, name##_index + num)
+#endif

# define SKIP_BITS(name, gb, num) do { \
SKIP_CACHE(name, gb, num); \
@@ -171,7 +196,11 @@ static inline int get_bits_count(const GetBitContext *s){
}

static inline void skip_bits_long(GetBitContext *s, int n){
+#if UNCHECKED_BITSTREAM_READER
s->index += n;
+#else
+ s->index += av_clip(n, -s->index, s->size_in_bits_plus1 - s->index);
+#endif
}

#elif defined A32_BITSTREAM_READER
@@ -196,7 +225,9 @@ static inline void skip_bits_long(GetBitContext *s, int n){
const uint32_t next = av_be2ne32(*name##_buffer_ptr); \
name##_cache0 |= NEG_USR32(next, name##_bit_count); \
name##_cache1 |= next << name##_bit_count; \
- name##_buffer_ptr++; \
+ if (UNCHECKED_BITSTREAM_READER || \
+ name##_buffer_ptr < (const uint32_t *)(gb)->buffer_end) \
+ name##_buffer_ptr++; \
name##_bit_count -= 32; \
} \
} while (0)
@@ -229,8 +260,18 @@ static inline int get_bits_count(const GetBitContext *s) {

static inline void skip_bits_long(GetBitContext *s, int n){
OPEN_READER(re, s);
+#if UNCHECKED_BITSTREAM_READER
re_bit_count += n;
re_buffer_ptr += re_bit_count>>5;
+#else
+ {
+ int offset_pos = (re_bit_count >> 5) + (n >> 5);
+ re_bit_count = (re_bit_count & 31) + (n & 31);
+ re_buffer_ptr += av_clip(offset_pos + (re_bit_count >> 5),
+ s->buffer - re_buffer_ptr,
+ s->buffer_end - re_buffer_ptr);
+ }
+#endif
re_bit_count &= 31;
re_cache0 = av_be2ne32(re_buffer_ptr[-1]) << re_bit_count;
re_cache1 = 0;
@@ -311,7 +352,10 @@ static inline unsigned int get_bits1(GetBitContext *s){
result <<= index & 7;
result >>= 8 - 1;
#endif
- index++;
+#if !UNCHECKED_BITSTREAM_READER
+ if (s->index < s->size_in_bits_plus1)
+#endif
+ index++;
s->index = index;

return result;
@@ -391,6 +435,9 @@ static inline void init_get_bits(GetBitContext *s,

s->buffer = buffer;
s->size_in_bits = bit_size;
+#if !UNCHECKED_BITSTREAM_READER
+ s->size_in_bits_plus1 = bit_size + 1;
+#endif
s->buffer_end = buffer + buffer_size;
#ifdef ALT_BITSTREAM_READER
s->index = 0;
diff --git a/libavcodec/wmavoice.c b/libavcodec/wmavoice.c
index 00e985d..8854e35 100644
--- a/libavcodec/wmavoice.c
+++ b/libavcodec/wmavoice.c
@@ -25,6 +25,8 @@
* @author Ronald S. Bultje <***@gmail.com>
*/

+#define UNCHECKED_BITSTREAM_READER 1
+
#include <math.h>
#include "avcodec.h"
#include "get_bits.h"
--
1.7.6
Måns Rullgård
2011-12-16 18:26:10 UTC
Permalink
Post by Ronald S. Bultje
When turned on, H264/CAVLC gets ~15% (CVPCMNL1_SVA_C.264) slower for
ultra-high-bitrate files, or ~2.5% (CVFI1_SVA_C.264) for lower-bitrate
files. Other codecs are affected to a lesser extent because they are
less optimized; e.g., VC-1 slows down by less than 1% (all on x86).
The patch generated 3 extra instructions (cmp, cmovae and mov) per
call to get_bits().
---
configure | 2 +
libavcodec/get_bits.h | 51 +++++++++++++++++++++++++++++++++++++++++++++++-
libavcodec/wmavoice.c | 2 +
3 files changed, 53 insertions(+), 2 deletions(-)
[...]
Post by Ronald S. Bultje
+#if UNCHECKED_BITSTREAM_READER
# define SKIP_COUNTER(name, gb, num) name##_index += (num)
+#else
+# define SKIP_COUNTER(name, gb, num) name##_index = \
+ FFMIN((gb)->size_in_bits, name##_index + num)
+#endif
Not +1 any more?
--
Måns Rullgård
***@mansr.com
Ronald S. Bultje
2011-12-16 18:28:09 UTC
Permalink
From: "Ronald S. Bultje" <***@gmail.com>

Based on work (for GCI) by Aneesh Dogra <***@gmail.com>, and
inspired by patch in Chromium by Chris Evans <***@chromium.org>.

When turned on, H264/CAVLC gets ~15% (CVPCMNL1_SVA_C.264) slower for
ultra-high-bitrate files, or ~2.5% (CVFI1_SVA_C.264) for lower-bitrate
files. Other codecs are affected to a lesser extent because they are
less optimized; e.g., VC-1 slows down by less than 1% (all on x86).
The patch generated 3 extra instructions (cmp, cmovae and mov) per
call to get_bits().
---
configure | 2 +
libavcodec/get_bits.h | 51 +++++++++++++++++++++++++++++++++++++++++++++++-
libavcodec/wmavoice.c | 2 +
3 files changed, 53 insertions(+), 2 deletions(-)

diff --git a/configure b/configure
index b3af2e9..056f409 100755
--- a/configure
+++ b/configure
@@ -113,6 +113,7 @@ Configuration options:
--disable-dxva2 disable DXVA2 code
--enable-runtime-cpudetect detect cpu capabilities at runtime (bigger binary)
--enable-hardcoded-tables use hardcoded tables instead of runtime generation
+ --disable-safe-bitstream-reader disable buffer boundary checking in bitreaders (faster, but may crash)
--enable-memalign-hack emulate memalign, interferes with memory debuggers
--disable-everything disable all components listed below
--disable-encoder=NAME disable encoder NAME
@@ -976,6 +977,7 @@ CONFIG_LIST="
rdft
rtpdec
runtime_cpudetect
+ safe_bitstream_reader
shared
sinewin
small
diff --git a/libavcodec/get_bits.h b/libavcodec/get_bits.h
index 4057cd2..897136c 100644
--- a/libavcodec/get_bits.h
+++ b/libavcodec/get_bits.h
@@ -48,6 +48,23 @@
# endif
#endif

+/*
+ * Safe bitstream reading:
+ * optionally, the get_bits API can check to ensure that we
+ * don't read past input buffer boundaries. This is protected
+ * with CONFIG_SAFE_BITSTREAM_READER at the global level, and
+ * then below that with UNCHECKED_BITSTREAM_READER at the per-
+ * decoder level. This means that decoders that check internally
+ * can "#define UNCHECKED_BITSTREAM_READER 1" to disable
+ * overread checks.
+ * Boundary checking causes a minor performance penalty so for
+ * applications that won't want/need this, it can be disabled
+ * globally using "#define CONFIG_SAFE_BITSTREAM_READER 0".
+ */
+#ifndef UNCHECKED_BITSTREAM_READER
+#define UNCHECKED_BITSTREAM_READER !CONFIG_SAFE_BITSTREAM_READER
+#endif
+
/* bit input */
/* buffer, buffer_end and size_in_bits must be present and used by every reader */
typedef struct GetBitContext {
@@ -61,6 +78,9 @@ typedef struct GetBitContext {
int bit_count;
#endif
int size_in_bits;
+#if !UNCHECKED_BITSTREAM_READER
+ int size_in_bits_plus1;
+#endif
} GetBitContext;

#define VLC_TYPE int16_t
@@ -144,7 +164,12 @@ for examples see get_bits, show_bits, skip_bits, get_vlc
# endif

// FIXME name?
+#if UNCHECKED_BITSTREAM_READER
# define SKIP_COUNTER(name, gb, num) name##_index += (num)
+#else
+# define SKIP_COUNTER(name, gb, num) name##_index = \
+ FFMIN((gb)->size_in_bits_plus1, name##_index + num)
+#endif

# define SKIP_BITS(name, gb, num) do { \
SKIP_CACHE(name, gb, num); \
@@ -171,7 +196,11 @@ static inline int get_bits_count(const GetBitContext *s){
}

static inline void skip_bits_long(GetBitContext *s, int n){
+#if UNCHECKED_BITSTREAM_READER
s->index += n;
+#else
+ s->index += av_clip(n, -s->index, s->size_in_bits_plus1 - s->index);
+#endif
}

#elif defined A32_BITSTREAM_READER
@@ -196,7 +225,9 @@ static inline void skip_bits_long(GetBitContext *s, int n){
const uint32_t next = av_be2ne32(*name##_buffer_ptr); \
name##_cache0 |= NEG_USR32(next, name##_bit_count); \
name##_cache1 |= next << name##_bit_count; \
- name##_buffer_ptr++; \
+ if (UNCHECKED_BITSTREAM_READER || \
+ name##_buffer_ptr < (const uint32_t *)(gb)->buffer_end) \
+ name##_buffer_ptr++; \
name##_bit_count -= 32; \
} \
} while (0)
@@ -229,8 +260,18 @@ static inline int get_bits_count(const GetBitContext *s) {

static inline void skip_bits_long(GetBitContext *s, int n){
OPEN_READER(re, s);
+#if UNCHECKED_BITSTREAM_READER
re_bit_count += n;
re_buffer_ptr += re_bit_count>>5;
+#else
+ {
+ int offset_pos = (re_bit_count >> 5) + (n >> 5);
+ re_bit_count = (re_bit_count & 31) + (n & 31);
+ re_buffer_ptr += av_clip(offset_pos + (re_bit_count >> 5),
+ s->buffer - re_buffer_ptr,
+ s->buffer_end - re_buffer_ptr);
+ }
+#endif
re_bit_count &= 31;
re_cache0 = av_be2ne32(re_buffer_ptr[-1]) << re_bit_count;
re_cache1 = 0;
@@ -311,7 +352,10 @@ static inline unsigned int get_bits1(GetBitContext *s){
result <<= index & 7;
result >>= 8 - 1;
#endif
- index++;
+#if !UNCHECKED_BITSTREAM_READER
+ if (s->index < s->size_in_bits_plus1)
+#endif
+ index++;
s->index = index;

return result;
@@ -391,6 +435,9 @@ static inline void init_get_bits(GetBitContext *s,

s->buffer = buffer;
s->size_in_bits = bit_size;
+#if !UNCHECKED_BITSTREAM_READER
+ s->size_in_bits_plus1 = bit_size + 1;
+#endif
s->buffer_end = buffer + buffer_size;
#ifdef ALT_BITSTREAM_READER
s->index = 0;
diff --git a/libavcodec/wmavoice.c b/libavcodec/wmavoice.c
index 00e985d..8854e35 100644
--- a/libavcodec/wmavoice.c
+++ b/libavcodec/wmavoice.c
@@ -25,6 +25,8 @@
* @author Ronald S. Bultje <***@gmail.com>
*/

+#define UNCHECKED_BITSTREAM_READER 1
+
#include <math.h>
#include "avcodec.h"
#include "get_bits.h"
--
1.7.6
Laurent Aimar
2011-12-16 00:22:12 UTC
Permalink
Post by Ronald S. Bultje
+#if !UNCHECKED_BITSTREAM_READER
+ s->size_in_bits_plus1 = bit_size + 1;
+#endif
One extra bit will ensure that get_bit_count() will return < 0 value,
but it does not ensure that get_bits/show_bits like functions or macros
return 0 on overread, and so it is incompatible with get_vlc* functions
(you will have an infinite loop, at least it was like that when I last
check this solution).
--
fenrir
Ronald S. Bultje
2011-12-16 00:24:05 UTC
Permalink
Hi,
Post by Laurent Aimar
Post by Ronald S. Bultje
+#if !UNCHECKED_BITSTREAM_READER
+ s->size_in_bits_plus1 = bit_size + 1;
+#endif
One extra bit will ensure that get_bit_count() will return < 0 value,
but it does not ensure that get_bits/show_bits like functions or macros
return 0 on overread, and so it is incompatible with get_vlc* functions
(you will have an infinite loop, at least it was like that when I last
check this solution).
Why not?

Ronald
Laurent Aimar
2011-12-16 00:31:11 UTC
Permalink
Post by Laurent Aimar
Hi,
Post by Ronald S. Bultje
+#if !UNCHECKED_BITSTREAM_READER
+ s->size_in_bits_plus1 = bit_size + 1;
+#endif
One extra bit will ensure that get_bit_count() will return < 0 value,
but it does not ensure that get_bits/show_bits like functions or macros
return 0 on overread, and so it is incompatible with get_vlc* functions
(you will have an infinite loop, at least it was like that when I last
check this solution).
Why not?
Please, read the thread
"[libav-devel] [PATCH] Checked get_bits.h functions to prevent overread"
as it contains some explanations and warnings about making a suitable
safe get_bits implementation (for existing usage of it).

For the get_vlc* case, the functions don't check for overread as it expects
the buffer to be padded with enough 0 bits (FF_INPUT_BUFFER_PADDING_SIZE
bytes), and those zeros will ensure that get_vlc() will abort on reading
an invalid VLC code.

Regards,
--
fenrir
Ronald S. Bultje
2011-12-16 01:02:55 UTC
Permalink
Hi,
Post by Laurent Aimar
Post by Laurent Aimar
Post by Ronald S. Bultje
+#if !UNCHECKED_BITSTREAM_READER
+ s->size_in_bits_plus1 = bit_size + 1;
+#endif
One extra bit will ensure that get_bit_count() will return < 0
value,
Post by Laurent Aimar
but it does not ensure that get_bits/show_bits like functions or
macros
Post by Laurent Aimar
return 0 on overread, and so it is incompatible with get_vlc*
functions
Post by Laurent Aimar
(you will have an infinite loop, at least it was like that when I
last
Post by Laurent Aimar
check this solution).
Why not?
Please, read the thread
"[libav-devel] [PATCH] Checked get_bits.h functions to prevent overread"
as it contains some explanations and warnings about making a suitable
safe get_bits implementation (for existing usage of it).
For the get_vlc* case, the functions don't check for overread as it expects
the buffer to be padded with enough 0 bits (FF_INPUT_BUFFER_PADDING_SIZE
bytes), and those zeros will ensure that get_vlc() will abort on reading
an invalid VLC code.
The buffer is padded with 32 bytes of zeroes and index never goes more than
1 bit beyond. You're telling me get_vlc() will read 32 bytes in a single
iteration?

Ronald
Laurent Aimar
2011-12-16 09:17:08 UTC
Permalink
Post by Laurent Aimar
Hi,
Post by Laurent Aimar
Post by Ronald S. Bultje
+#if !UNCHECKED_BITSTREAM_READER
+ s->size_in_bits_plus1 = bit_size + 1;
+#endif
One extra bit will ensure that get_bit_count() will return < 0
value,
Post by Laurent Aimar
but it does not ensure that get_bits/show_bits like functions or
macros
Post by Laurent Aimar
return 0 on overread, and so it is incompatible with get_vlc*
functions
Post by Laurent Aimar
(you will have an infinite loop, at least it was like that when I
last
Post by Laurent Aimar
check this solution).
Why not?
Please, read the thread
"[libav-devel] [PATCH] Checked get_bits.h functions to prevent overread"
as it contains some explanations and warnings about making a suitable
safe get_bits implementation (for existing usage of it).
For the get_vlc* case, the functions don't check for overread as it expects
the buffer to be padded with enough 0 bits (FF_INPUT_BUFFER_PADDING_SIZE
bytes), and those zeros will ensure that get_vlc() will abort on reading
an invalid VLC code.
The buffer is padded with 32 bytes of zeroes and index never goes more than 1
bit beyond. You're telling me get_vlc() will read 32 bytes in a single
iteration?
No. But your implementation when reaching the end may not return 0 if you
read/show more than 1 bit. You need to let the index goes beyong the max
by at least the number of bits the cache can hold (in the case where size_in_bits
is a multiple of 8, otherwise probably a bit more but it may depend on the
actual implementation of the reader) to allow reading into the padding which
will then allow returning 0.
--
fenrir
Måns Rullgård
2011-12-16 12:00:19 UTC
Permalink
Post by Laurent Aimar
Post by Laurent Aimar
Hi,
Post by Laurent Aimar
Post by Ronald S. Bultje
+#if !UNCHECKED_BITSTREAM_READER
+ s->size_in_bits_plus1 = bit_size + 1;
+#endif
One extra bit will ensure that get_bit_count() will return
< 0 value, but it does not ensure that get_bits/show_bits
like functions or macros return 0 on overread, and so it is
incompatible with get_vlc* functions (you will have an
infinite loop, at least it was like that when I last check
this solution).
Why not?
Please, read the thread
"[libav-devel] [PATCH] Checked get_bits.h functions to prevent overread"
as it contains some explanations and warnings about making a suitable
safe get_bits implementation (for existing usage of it).
For the get_vlc* case, the functions don't check for overread as
it expects the buffer to be padded with enough 0 bits
(FF_INPUT_BUFFER_PADDING_SIZE bytes), and those zeros will ensure
that get_vlc() will abort on reading an invalid VLC code.
The buffer is padded with 32 bytes of zeroes and index never goes more than 1
bit beyond. You're telling me get_vlc() will read 32 bytes in a single
iteration?
No. But your implementation when reaching the end may not return 0 if
you read/show more than 1 bit. You need to let the index goes beyong
the max by at least the number of bits the cache can hold (in the case
where size_in_bits is a multiple of 8, otherwise probably a bit more
but it may depend on the actual implementation of the reader) to allow
reading into the padding which will then allow returning 0.
You are not making any sense at all. The bitstream readers never return
bits *before* the current position. There is also no possibility of an
infinite loop, for the simple reason that get_vlc() does not contain a
loop.

It is probably a good idea to extend the cap to a position beyond the
last data byte, so callers don't need to make sure any extra bits in the
last byte are zeroed. Adding 8 instead of 1 will do this.
--
Måns Rullgård
***@mansr.com
Måns Rullgård
2011-12-15 18:10:44 UTC
Permalink
Post by Ronald S. Bultje
---
libavcodec/get_bits.h | 14 +++++++-------
1 files changed, 7 insertions(+), 7 deletions(-)
OK
--
Måns Rullgård
***@mansr.com
Mans Rullgard
2011-12-16 21:54:31 UTC
Permalink
From: "Ronald S. Bultje" <***@gmail.com>

Based on work (for GCI) by Aneesh Dogra <***@gmail.com>, and
inspired by patch in Chromium by Chris Evans <***@chromium.org>.

When turned on, H264/CAVLC gets ~15% (CVPCMNL1_SVA_C.264) slower for
ultra-high-bitrate files, or ~2.5% (CVFI1_SVA_C.264) for lower-bitrate
files. Other codecs are affected to a lesser extent because they are
less optimized; e.g., VC-1 slows down by less than 1% (all on x86).
The patch generated 3 extra instructions (cmp, cmovae and mov) per
call to get_bits().
---
configure | 2 ++
libavcodec/get_bits.h | 37 ++++++++++++++++++++++++++++++++++++-
libavcodec/wmavoice.c | 2 ++
3 files changed, 40 insertions(+), 1 deletions(-)

diff --git a/configure b/configure
index 974e75f..0349862 100755
--- a/configure
+++ b/configure
@@ -113,6 +113,7 @@ Configuration options:
--disable-dxva2 disable DXVA2 code
--enable-runtime-cpudetect detect cpu capabilities at runtime (bigger binary)
--enable-hardcoded-tables use hardcoded tables instead of runtime generation
+ --disable-safe-bitstream-reader disable buffer boundary checking in bitreaders (faster, but may crash)
--enable-memalign-hack emulate memalign, interferes with memory debuggers
--disable-everything disable all components listed below
--disable-encoder=NAME disable encoder NAME
@@ -976,6 +977,7 @@ CONFIG_LIST="
rdft
rtpdec
runtime_cpudetect
+ safe_bitstream_reader
shared
sinewin
small
diff --git a/libavcodec/get_bits.h b/libavcodec/get_bits.h
index 684cc99..91afe34 100644
--- a/libavcodec/get_bits.h
+++ b/libavcodec/get_bits.h
@@ -35,12 +35,32 @@
#include "libavutil/log.h"
#include "mathops.h"

+/*
+ * Safe bitstream reading:
+ * optionally, the get_bits API can check to ensure that we
+ * don't read past input buffer boundaries. This is protected
+ * with CONFIG_SAFE_BITSTREAM_READER at the global level, and
+ * then below that with UNCHECKED_BITSTREAM_READER at the per-
+ * decoder level. This means that decoders that check internally
+ * can "#define UNCHECKED_BITSTREAM_READER 1" to disable
+ * overread checks.
+ * Boundary checking causes a minor performance penalty so for
+ * applications that won't want/need this, it can be disabled
+ * globally using "#define CONFIG_SAFE_BITSTREAM_READER 0".
+ */
+#ifndef UNCHECKED_BITSTREAM_READER
+#define UNCHECKED_BITSTREAM_READER !CONFIG_SAFE_BITSTREAM_READER
+#endif
+
/* bit input */
/* buffer, buffer_end and size_in_bits must be present and used by every reader */
typedef struct GetBitContext {
const uint8_t *buffer, *buffer_end;
int index;
int size_in_bits;
+#if !UNCHECKED_BITSTREAM_READER
+ int size_in_bits_plus1;
+#endif
} GetBitContext;

#define VLC_TYPE int16_t
@@ -137,7 +157,12 @@ for examples see get_bits, show_bits, skip_bits, get_vlc
# endif

// FIXME name?
+#if UNCHECKED_BITSTREAM_READER
# define SKIP_COUNTER(name, gb, num) name##_index += (num)
+#else
+# define SKIP_COUNTER(name, gb, num) name##_index = \
+ FFMIN((gb)->size_in_bits_plus1, name##_index + num)
+#endif

# define SKIP_BITS(name, gb, num) do { \
SKIP_CACHE(name, gb, num); \
@@ -164,7 +189,11 @@ static inline int get_bits_count(const GetBitContext *s){
}

static inline void skip_bits_long(GetBitContext *s, int n){
+#if UNCHECKED_BITSTREAM_READER
s->index += n;
+#else
+ s->index += av_clip(n, -s->index, s->size_in_bits_plus1 - s->index);
+#endif
}

/**
@@ -237,7 +266,10 @@ static inline unsigned int get_bits1(GetBitContext *s){
result <<= index & 7;
result >>= 8 - 1;
#endif
- index++;
+#if !UNCHECKED_BITSTREAM_READER
+ if (s->index < s->size_in_bits_plus1)
+#endif
+ index++;
s->index = index;

return result;
@@ -314,6 +346,9 @@ static inline void init_get_bits(GetBitContext *s,

s->buffer = buffer;
s->size_in_bits = bit_size;
+#if !UNCHECKED_BITSTREAM_READER
+ s->size_in_bits_plus1 = bit_size + 1;
+#endif
s->buffer_end = buffer + buffer_size;
s->index = 0;
}
diff --git a/libavcodec/wmavoice.c b/libavcodec/wmavoice.c
index 00e985d..8854e35 100644
--- a/libavcodec/wmavoice.c
+++ b/libavcodec/wmavoice.c
@@ -25,6 +25,8 @@
* @author Ronald S. Bultje <***@gmail.com>
*/

+#define UNCHECKED_BITSTREAM_READER 1
+
#include <math.h>
#include "avcodec.h"
#include "get_bits.h"
--
1.7.8
Martin Storsjö
2011-12-16 21:57:35 UTC
Permalink
Post by Ronald S. Bultje
diff --git a/configure b/configure
index 974e75f..0349862 100755
--- a/configure
+++ b/configure
--disable-dxva2 disable DXVA2 code
--enable-runtime-cpudetect detect cpu capabilities at runtime (bigger binary)
--enable-hardcoded-tables use hardcoded tables instead of runtime generation
+ --disable-safe-bitstream-reader disable buffer boundary checking in bitreaders (faster, but may crash)
--enable-memalign-hack emulate memalign, interferes with memory debuggers
--disable-everything disable all components listed below
--disable-encoder=NAME disable encoder NAME
@@ -976,6 +977,7 @@ CONFIG_LIST="
rdft
rtpdec
runtime_cpudetect
+ safe_bitstream_reader
shared
sinewin
small
Doesn't this still need to be enabled somewhere, or the help text changed
into --enable?

// Martin
Måns Rullgård
2011-12-16 22:02:29 UTC
Permalink
Post by Martin Storsjö
Post by Ronald S. Bultje
diff --git a/configure b/configure
index 974e75f..0349862 100755
--- a/configure
+++ b/configure
--disable-dxva2 disable DXVA2 code
--enable-runtime-cpudetect detect cpu capabilities at runtime (bigger binary)
--enable-hardcoded-tables use hardcoded tables instead of runtime generation
+ --disable-safe-bitstream-reader disable buffer boundary checking in bitreaders (faster, but may crash)
--enable-memalign-hack emulate memalign, interferes with memory debuggers
--disable-everything disable all components listed below
--disable-encoder=NAME disable encoder NAME
@@ -976,6 +977,7 @@ CONFIG_LIST="
rdft
rtpdec
runtime_cpudetect
+ safe_bitstream_reader
shared
sinewin
small
Doesn't this still need to be enabled somewhere, or the help text
changed into --enable?
Yes, I'm running some benchmarks now to determine whether to enable this
by default. We'll update the patch depending on the outcome.
--
Måns Rullgård
***@mansr.com
Måns Rullgård
2011-12-16 22:59:47 UTC
Permalink
Post by Måns Rullgård
Post by Martin Storsjö
Post by Ronald S. Bultje
diff --git a/configure b/configure
index 974e75f..0349862 100755
--- a/configure
+++ b/configure
--disable-dxva2 disable DXVA2 code
--enable-runtime-cpudetect detect cpu capabilities at runtime (bigger binary)
--enable-hardcoded-tables use hardcoded tables instead of runtime generation
+ --disable-safe-bitstream-reader disable buffer boundary checking in bitreaders (faster, but may crash)
--enable-memalign-hack emulate memalign, interferes with memory debuggers
--disable-everything disable all components listed below
--disable-encoder=NAME disable encoder NAME
@@ -976,6 +977,7 @@ CONFIG_LIST="
rdft
rtpdec
runtime_cpudetect
+ safe_bitstream_reader
shared
sinewin
small
Doesn't this still need to be enabled somewhere, or the help text
changed into --enable?
Yes, I'm running some benchmarks now to determine whether to enable this
by default. We'll update the patch depending on the outcome.
The performance penalty on ARM (both Cortex-A8 and A9) is 0-2% for most
files, 4% in extreme cases. In many cases, the change is within the
error margin of the measurement.

I will not object to turning this on by default.
--
Måns Rullgård
***@mansr.com
Ronald S. Bultje
2011-12-17 00:18:48 UTC
Permalink
Hi,
Post by Ronald S. Bultje
Post by Måns Rullgård
Post by Martin Storsjö
Post by Ronald S. Bultje
diff --git a/configure b/configure
index 974e75f..0349862 100755
--- a/configure
+++ b/configure
--disable-dxva2 disable DXVA2 code
--enable-runtime-cpudetect detect cpu capabilities at runtime
(bigger binary)
Post by Måns Rullgård
Post by Martin Storsjö
Post by Ronald S. Bultje
--enable-hardcoded-tables use hardcoded tables instead of runtime
generation
Post by Måns Rullgård
Post by Martin Storsjö
Post by Ronald S. Bultje
+ --disable-safe-bitstream-reader disable buffer boundary checking in
bitreaders (faster, but may crash)
Post by Måns Rullgård
Post by Martin Storsjö
Post by Ronald S. Bultje
--enable-memalign-hack emulate memalign, interferes with memory
debuggers
Post by Måns Rullgård
Post by Martin Storsjö
Post by Ronald S. Bultje
--disable-everything disable all components listed below
--disable-encoder=NAME disable encoder NAME
@@ -976,6 +977,7 @@ CONFIG_LIST="
rdft
rtpdec
runtime_cpudetect
+ safe_bitstream_reader
shared
sinewin
small
Doesn't this still need to be enabled somewhere, or the help text
changed into --enable?
Yes, I'm running some benchmarks now to determine whether to enable this
by default. We'll update the patch depending on the outcome.
The performance penalty on ARM (both Cortex-A8 and A9) is 0-2% for most
files, 4% in extreme cases. In many cases, the change is within the
error margin of the measurement.
I will not object to turning this on by default.
Awesome, if you can add the proper line to configure, patch is OK with me.
Feel free to push directly.

Ronald
Måns Rullgård
2011-12-17 00:23:06 UTC
Permalink
Post by Ronald S. Bultje
Hi,
Post by Ronald S. Bultje
Post by Måns Rullgård
Post by Martin Storsjö
Post by Ronald S. Bultje
diff --git a/configure b/configure
index 974e75f..0349862 100755
--- a/configure
+++ b/configure
--disable-dxva2 disable DXVA2 code
--enable-runtime-cpudetect detect cpu capabilities at runtime
(bigger binary)
Post by Måns Rullgård
Post by Martin Storsjö
Post by Ronald S. Bultje
--enable-hardcoded-tables use hardcoded tables instead of runtime
generation
Post by Måns Rullgård
Post by Martin Storsjö
Post by Ronald S. Bultje
+ --disable-safe-bitstream-reader disable buffer boundary checking in
bitreaders (faster, but may crash)
Post by Måns Rullgård
Post by Martin Storsjö
Post by Ronald S. Bultje
--enable-memalign-hack emulate memalign, interferes with memory
debuggers
Post by Måns Rullgård
Post by Martin Storsjö
Post by Ronald S. Bultje
--disable-everything disable all components listed below
--disable-encoder=NAME disable encoder NAME
@@ -976,6 +977,7 @@ CONFIG_LIST="
rdft
rtpdec
runtime_cpudetect
+ safe_bitstream_reader
shared
sinewin
small
Doesn't this still need to be enabled somewhere, or the help text
changed into --enable?
Yes, I'm running some benchmarks now to determine whether to enable this
by default. We'll update the patch depending on the outcome.
The performance penalty on ARM (both Cortex-A8 and A9) is 0-2% for most
files, 4% in extreme cases. In many cases, the change is within the
error margin of the measurement.
I will not object to turning this on by default.
Awesome, if you can add the proper line to configure, patch is OK with me.
Feel free to push directly.
What about changing +1 to +8?
--
Måns Rullgård
***@mansr.com
Mans Rullgard
2011-12-17 01:45:29 UTC
Permalink
From: "Ronald S. Bultje" <***@gmail.com>

When turned on, H264/CAVLC gets ~15% (CVPCMNL1_SVA_C.264) slower for
ultra-high-bitrate files, or ~2.5% (CVFI1_SVA_C.264) for lower-bitrate
files. Other codecs are affected to a lesser extent because they are
less optimized; e.g., VC-1 slows down by less than 1% (all on x86).
The patch generated 3 extra instructions (cmp, cmovae and mov) per
call to get_bits().

Based on work (for GCI) by Aneesh Dogra <***@gmail.com>, and
inspired by patch in Chromium by Chris Evans <***@chromium.org>.
---
Now with safe reader enabled by default and plus8 instead of plus1
as discussed.

Are the comments above still accurate? We've been through a few
revisions of this...
---
configure | 5 +++++
libavcodec/get_bits.h | 37 ++++++++++++++++++++++++++++++++++++-
libavcodec/wmavoice.c | 2 ++
3 files changed, 43 insertions(+), 1 deletions(-)

diff --git a/configure b/configure
index 974e75f..22542af 100755
--- a/configure
+++ b/configure
@@ -113,6 +113,9 @@ Configuration options:
--disable-dxva2 disable DXVA2 code
--enable-runtime-cpudetect detect cpu capabilities at runtime (bigger binary)
--enable-hardcoded-tables use hardcoded tables instead of runtime generation
+ --disable-safe-bitstream-reader
+ disable buffer boundary checking in bitreaders
+ (faster, but may crash)
--enable-memalign-hack emulate memalign, interferes with memory debuggers
--disable-everything disable all components listed below
--disable-encoder=NAME disable encoder NAME
@@ -976,6 +979,7 @@ CONFIG_LIST="
rdft
rtpdec
runtime_cpudetect
+ safe_bitstream_reader
shared
sinewin
small
@@ -1679,6 +1683,7 @@ enable doc
enable fastdiv
enable network
enable optimizations
+enable safe_bitstream_reader
enable static
enable swscale_alpha

diff --git a/libavcodec/get_bits.h b/libavcodec/get_bits.h
index 684cc99..33ddb4b 100644
--- a/libavcodec/get_bits.h
+++ b/libavcodec/get_bits.h
@@ -35,12 +35,32 @@
#include "libavutil/log.h"
#include "mathops.h"

+/*
+ * Safe bitstream reading:
+ * optionally, the get_bits API can check to ensure that we
+ * don't read past input buffer boundaries. This is protected
+ * with CONFIG_SAFE_BITSTREAM_READER at the global level, and
+ * then below that with UNCHECKED_BITSTREAM_READER at the per-
+ * decoder level. This means that decoders that check internally
+ * can "#define UNCHECKED_BITSTREAM_READER 1" to disable
+ * overread checks.
+ * Boundary checking causes a minor performance penalty so for
+ * applications that won't want/need this, it can be disabled
+ * globally using "#define CONFIG_SAFE_BITSTREAM_READER 0".
+ */
+#ifndef UNCHECKED_BITSTREAM_READER
+#define UNCHECKED_BITSTREAM_READER !CONFIG_SAFE_BITSTREAM_READER
+#endif
+
/* bit input */
/* buffer, buffer_end and size_in_bits must be present and used by every reader */
typedef struct GetBitContext {
const uint8_t *buffer, *buffer_end;
int index;
int size_in_bits;
+#if !UNCHECKED_BITSTREAM_READER
+ int size_in_bits_plus8;
+#endif
} GetBitContext;

#define VLC_TYPE int16_t
@@ -137,7 +157,12 @@ for examples see get_bits, show_bits, skip_bits, get_vlc
# endif

// FIXME name?
+#if UNCHECKED_BITSTREAM_READER
# define SKIP_COUNTER(name, gb, num) name##_index += (num)
+#else
+# define SKIP_COUNTER(name, gb, num) \
+ name##_index = FFMIN((gb)->size_in_bits_plus8, name##_index + (num))
+#endif

# define SKIP_BITS(name, gb, num) do { \
SKIP_CACHE(name, gb, num); \
@@ -164,7 +189,11 @@ static inline int get_bits_count(const GetBitContext *s){
}

static inline void skip_bits_long(GetBitContext *s, int n){
+#if UNCHECKED_BITSTREAM_READER
s->index += n;
+#else
+ s->index += av_clip(n, -s->index, s->size_in_bits_plus8 - s->index);
+#endif
}

/**
@@ -237,7 +266,10 @@ static inline unsigned int get_bits1(GetBitContext *s){
result <<= index & 7;
result >>= 8 - 1;
#endif
- index++;
+#if !UNCHECKED_BITSTREAM_READER
+ if (s->index < s->size_in_bits_plus8)
+#endif
+ index++;
s->index = index;

return result;
@@ -314,6 +346,9 @@ static inline void init_get_bits(GetBitContext *s,

s->buffer = buffer;
s->size_in_bits = bit_size;
+#if !UNCHECKED_BITSTREAM_READER
+ s->size_in_bits_plus8 = bit_size + 8;
+#endif
s->buffer_end = buffer + buffer_size;
s->index = 0;
}
diff --git a/libavcodec/wmavoice.c b/libavcodec/wmavoice.c
index 00e985d..8854e35 100644
--- a/libavcodec/wmavoice.c
+++ b/libavcodec/wmavoice.c
@@ -25,6 +25,8 @@
* @author Ronald S. Bultje <***@gmail.com>
*/

+#define UNCHECKED_BITSTREAM_READER 1
+
#include <math.h>
#include "avcodec.h"
#include "get_bits.h"
--
1.7.8
Janne Grunau
2011-12-17 14:02:31 UTC
Permalink
Post by Ronald S. Bultje
When turned on, H264/CAVLC gets ~15% (CVPCMNL1_SVA_C.264) slower for
ultra-high-bitrate files, or ~2.5% (CVFI1_SVA_C.264) for lower-bitrate
files. Other codecs are affected to a lesser extent because they are
less optimized; e.g., VC-1 slows down by less than 1% (all on x86).
The patch generated 3 extra instructions (cmp, cmovae and mov) per
call to get_bits().
---
Now with safe reader enabled by default and plus8 instead of plus1
as discussed.
Are the comments above still accurate? We've been through a few
revisions of this...
performance stats are still accurate, extra instructions on x86_64
verified at one place.

Please add a sentence with your ARM numbers.

Patch itself is ok.

Janne

Loading...