diff options
author | Scott LaVarnway <slavarnway@google.com> | 2018-06-06 06:29:16 -0700 |
---|---|---|
committer | Scott LaVarnway <slavarnway@google.com> | 2018-06-12 12:31:13 +0000 |
commit | 589b7a1f4ab5e4665ddb4c2090d2266dd40da304 (patch) | |
tree | ecf6570db9226a7717d08155bb44db6242d36765 /aom_dsp | |
parent | e29094a3b4980413d56268970e48bc260a5c70f5 (diff) |
Change blend function param order from h,w to w,h
Followup from 59721
Change-Id: I272551ab78a0efdcdb8e7297e890f06693ebf3f7
Diffstat (limited to 'aom_dsp')
-rwxr-xr-x | aom_dsp/aom_dsp_rtcd_defs.pl | 16 | ||||
-rw-r--r-- | aom_dsp/blend_a64_hmask.c | 4 | ||||
-rw-r--r-- | aom_dsp/blend_a64_mask.c | 10 | ||||
-rw-r--r-- | aom_dsp/blend_a64_vmask.c | 4 | ||||
-rw-r--r-- | aom_dsp/x86/blend_a64_hmask_sse4.c | 8 | ||||
-rw-r--r-- | aom_dsp/x86/blend_a64_mask_sse4.c | 104 | ||||
-rw-r--r-- | aom_dsp/x86/blend_a64_vmask_sse4.c | 36 |
7 files changed, 91 insertions, 91 deletions
diff --git a/aom_dsp/aom_dsp_rtcd_defs.pl b/aom_dsp/aom_dsp_rtcd_defs.pl index 934646a6c..7a8c20dee 100755 --- a/aom_dsp/aom_dsp_rtcd_defs.pl +++ b/aom_dsp/aom_dsp_rtcd_defs.pl @@ -501,19 +501,19 @@ if (aom_config("CONFIG_AV1_ENCODER") eq "yes") { # # Alpha blending with mask # -add_proto qw/void aom_lowbd_blend_a64_d16_mask/, "uint8_t *dst, uint32_t dst_stride, const CONV_BUF_TYPE *src0, uint32_t src0_stride, const CONV_BUF_TYPE *src1, uint32_t src1_stride, const uint8_t *mask, uint32_t mask_stride, int h, int w, int suby, int subx, ConvolveParams *conv_params"; +add_proto qw/void aom_lowbd_blend_a64_d16_mask/, "uint8_t *dst, uint32_t dst_stride, const CONV_BUF_TYPE *src0, uint32_t src0_stride, const CONV_BUF_TYPE *src1, uint32_t src1_stride, const uint8_t *mask, uint32_t mask_stride, int w, int h, int subx, int suby, ConvolveParams *conv_params"; specialize qw/aom_lowbd_blend_a64_d16_mask sse4_1/; -add_proto qw/void aom_highbd_blend_a64_d16_mask/, "uint8_t *dst, uint32_t dst_stride, const CONV_BUF_TYPE *src0, uint32_t src0_stride, const CONV_BUF_TYPE *src1, uint32_t src1_stride, const uint8_t *mask, uint32_t mask_stride, int h, int w, int suby, int subx, ConvolveParams *conv_params, const int bd"; -add_proto qw/void aom_blend_a64_mask/, "uint8_t *dst, uint32_t dst_stride, const uint8_t *src0, uint32_t src0_stride, const uint8_t *src1, uint32_t src1_stride, const uint8_t *mask, uint32_t mask_stride, int h, int w, int suby, int subx"; -add_proto qw/void aom_blend_a64_hmask/, "uint8_t *dst, uint32_t dst_stride, const uint8_t *src0, uint32_t src0_stride, const uint8_t *src1, uint32_t src1_stride, const uint8_t *mask, int h, int w"; -add_proto qw/void aom_blend_a64_vmask/, "uint8_t *dst, uint32_t dst_stride, const uint8_t *src0, uint32_t src0_stride, const uint8_t *src1, uint32_t src1_stride, const uint8_t *mask, int h, int w"; +add_proto qw/void aom_highbd_blend_a64_d16_mask/, "uint8_t *dst, uint32_t dst_stride, const CONV_BUF_TYPE *src0, uint32_t src0_stride, const CONV_BUF_TYPE *src1, uint32_t src1_stride, const uint8_t *mask, uint32_t mask_stride, int w, int h, int subx, int suby, ConvolveParams *conv_params, const int bd"; +add_proto qw/void aom_blend_a64_mask/, "uint8_t *dst, uint32_t dst_stride, const uint8_t *src0, uint32_t src0_stride, const uint8_t *src1, uint32_t src1_stride, const uint8_t *mask, uint32_t mask_stride, int w, int h, int subx, int suby"; +add_proto qw/void aom_blend_a64_hmask/, "uint8_t *dst, uint32_t dst_stride, const uint8_t *src0, uint32_t src0_stride, const uint8_t *src1, uint32_t src1_stride, const uint8_t *mask, int w, int h"; +add_proto qw/void aom_blend_a64_vmask/, "uint8_t *dst, uint32_t dst_stride, const uint8_t *src0, uint32_t src0_stride, const uint8_t *src1, uint32_t src1_stride, const uint8_t *mask, int w, int h"; specialize "aom_blend_a64_mask", qw/sse4_1/; specialize "aom_blend_a64_hmask", qw/sse4_1/; specialize "aom_blend_a64_vmask", qw/sse4_1/; -add_proto qw/void aom_highbd_blend_a64_mask/, "uint8_t *dst, uint32_t dst_stride, const uint8_t *src0, uint32_t src0_stride, const uint8_t *src1, uint32_t src1_stride, const uint8_t *mask, uint32_t mask_stride, int h, int w, int suby, int subx, int bd"; -add_proto qw/void aom_highbd_blend_a64_hmask/, "uint8_t *dst, uint32_t dst_stride, const uint8_t *src0, uint32_t src0_stride, const uint8_t *src1, uint32_t src1_stride, const uint8_t *mask, int h, int w, int bd"; -add_proto qw/void aom_highbd_blend_a64_vmask/, "uint8_t *dst, uint32_t dst_stride, const uint8_t *src0, uint32_t src0_stride, const uint8_t *src1, uint32_t src1_stride, const uint8_t *mask, int h, int w, int bd"; +add_proto qw/void aom_highbd_blend_a64_mask/, "uint8_t *dst, uint32_t dst_stride, const uint8_t *src0, uint32_t src0_stride, const uint8_t *src1, uint32_t src1_stride, const uint8_t *mask, uint32_t mask_stride, int w, int h, int subx, int suby, int bd"; +add_proto qw/void aom_highbd_blend_a64_hmask/, "uint8_t *dst, uint32_t dst_stride, const uint8_t *src0, uint32_t src0_stride, const uint8_t *src1, uint32_t src1_stride, const uint8_t *mask, int w, int h, int bd"; +add_proto qw/void aom_highbd_blend_a64_vmask/, "uint8_t *dst, uint32_t dst_stride, const uint8_t *src0, uint32_t src0_stride, const uint8_t *src1, uint32_t src1_stride, const uint8_t *mask, int w, int h, int bd"; specialize "aom_highbd_blend_a64_mask", qw/sse4_1/; specialize "aom_highbd_blend_a64_hmask", qw/sse4_1/; specialize "aom_highbd_blend_a64_vmask", qw/sse4_1/; diff --git a/aom_dsp/blend_a64_hmask.c b/aom_dsp/blend_a64_hmask.c index 516094313..0554b43d1 100644 --- a/aom_dsp/blend_a64_hmask.c +++ b/aom_dsp/blend_a64_hmask.c @@ -21,7 +21,7 @@ void aom_blend_a64_hmask_c(uint8_t *dst, uint32_t dst_stride, const uint8_t *src0, uint32_t src0_stride, const uint8_t *src1, uint32_t src1_stride, - const uint8_t *mask, int h, int w) { + const uint8_t *mask, int w, int h) { int i, j; assert(IMPLIES(src0 == dst, src0_stride == dst_stride)); @@ -43,7 +43,7 @@ void aom_blend_a64_hmask_c(uint8_t *dst, uint32_t dst_stride, void aom_highbd_blend_a64_hmask_c(uint8_t *dst_8, uint32_t dst_stride, const uint8_t *src0_8, uint32_t src0_stride, const uint8_t *src1_8, uint32_t src1_stride, - const uint8_t *mask, int h, int w, int bd) { + const uint8_t *mask, int w, int h, int bd) { int i, j; uint16_t *dst = CONVERT_TO_SHORTPTR(dst_8); const uint16_t *src0 = CONVERT_TO_SHORTPTR(src0_8); diff --git a/aom_dsp/blend_a64_mask.c b/aom_dsp/blend_a64_mask.c index 7b5c8dc27..992cc5c0c 100644 --- a/aom_dsp/blend_a64_mask.c +++ b/aom_dsp/blend_a64_mask.c @@ -36,7 +36,7 @@ void aom_lowbd_blend_a64_d16_mask_c( uint8_t *dst, uint32_t dst_stride, const CONV_BUF_TYPE *src0, uint32_t src0_stride, const CONV_BUF_TYPE *src1, uint32_t src1_stride, - const uint8_t *mask, uint32_t mask_stride, int h, int w, int subh, int subw, + const uint8_t *mask, uint32_t mask_stride, int w, int h, int subw, int subh, ConvolveParams *conv_params) { int i, j; const int bd = 8; @@ -123,7 +123,7 @@ void aom_lowbd_blend_a64_d16_mask_c( void aom_highbd_blend_a64_d16_mask_c( uint8_t *dst_8, uint32_t dst_stride, const CONV_BUF_TYPE *src0, uint32_t src0_stride, const CONV_BUF_TYPE *src1, uint32_t src1_stride, - const uint8_t *mask, uint32_t mask_stride, int h, int w, int subh, int subw, + const uint8_t *mask, uint32_t mask_stride, int w, int h, int subw, int subh, ConvolveParams *conv_params, const int bd) { const int offset_bits = bd + 2 * FILTER_BITS - conv_params->round_0; const int round_offset = (1 << (offset_bits - conv_params->round_1)) + @@ -227,8 +227,8 @@ void aom_highbd_blend_a64_d16_mask_c( void aom_blend_a64_mask_c(uint8_t *dst, uint32_t dst_stride, const uint8_t *src0, uint32_t src0_stride, const uint8_t *src1, uint32_t src1_stride, - const uint8_t *mask, uint32_t mask_stride, int h, - int w, int subh, int subw) { + const uint8_t *mask, uint32_t mask_stride, int w, + int h, int subw, int subh) { int i, j; assert(IMPLIES(src0 == dst, src0_stride == dst_stride)); @@ -285,7 +285,7 @@ void aom_highbd_blend_a64_mask_c(uint8_t *dst_8, uint32_t dst_stride, const uint8_t *src0_8, uint32_t src0_stride, const uint8_t *src1_8, uint32_t src1_stride, const uint8_t *mask, uint32_t mask_stride, - int h, int w, int subh, int subw, int bd) { + int w, int h, int subw, int subh, int bd) { int i, j; uint16_t *dst = CONVERT_TO_SHORTPTR(dst_8); const uint16_t *src0 = CONVERT_TO_SHORTPTR(src0_8); diff --git a/aom_dsp/blend_a64_vmask.c b/aom_dsp/blend_a64_vmask.c index 20528618a..4f222e17f 100644 --- a/aom_dsp/blend_a64_vmask.c +++ b/aom_dsp/blend_a64_vmask.c @@ -21,7 +21,7 @@ void aom_blend_a64_vmask_c(uint8_t *dst, uint32_t dst_stride, const uint8_t *src0, uint32_t src0_stride, const uint8_t *src1, uint32_t src1_stride, - const uint8_t *mask, int h, int w) { + const uint8_t *mask, int w, int h) { int i, j; assert(IMPLIES(src0 == dst, src0_stride == dst_stride)); @@ -44,7 +44,7 @@ void aom_blend_a64_vmask_c(uint8_t *dst, uint32_t dst_stride, void aom_highbd_blend_a64_vmask_c(uint8_t *dst_8, uint32_t dst_stride, const uint8_t *src0_8, uint32_t src0_stride, const uint8_t *src1_8, uint32_t src1_stride, - const uint8_t *mask, int h, int w, int bd) { + const uint8_t *mask, int w, int h, int bd) { int i, j; uint16_t *dst = CONVERT_TO_SHORTPTR(dst_8); const uint16_t *src0 = CONVERT_TO_SHORTPTR(src0_8); diff --git a/aom_dsp/x86/blend_a64_hmask_sse4.c b/aom_dsp/x86/blend_a64_hmask_sse4.c index 07765a605..4f5e3f8c1 100644 --- a/aom_dsp/x86/blend_a64_hmask_sse4.c +++ b/aom_dsp/x86/blend_a64_hmask_sse4.c @@ -19,16 +19,16 @@ void aom_blend_a64_hmask_sse4_1(uint8_t *dst, uint32_t dst_stride, const uint8_t *src0, uint32_t src0_stride, const uint8_t *src1, uint32_t src1_stride, - const uint8_t *mask, int h, int w) { + const uint8_t *mask, int w, int h) { aom_blend_a64_mask_sse4_1(dst, dst_stride, src0, src0_stride, src1, - src1_stride, mask, 0, h, w, 0, 0); + src1_stride, mask, 0, w, h, 0, 0); } void aom_highbd_blend_a64_hmask_sse4_1( uint8_t *dst_8, uint32_t dst_stride, const uint8_t *src0_8, uint32_t src0_stride, const uint8_t *src1_8, uint32_t src1_stride, - const uint8_t *mask, int h, int w, int bd) { + const uint8_t *mask, int w, int h, int bd) { aom_highbd_blend_a64_mask_sse4_1(dst_8, dst_stride, src0_8, src0_stride, - src1_8, src1_stride, mask, 0, h, w, 0, 0, + src1_8, src1_stride, mask, 0, w, h, 0, 0, bd); } diff --git a/aom_dsp/x86/blend_a64_mask_sse4.c b/aom_dsp/x86/blend_a64_mask_sse4.c index 22569b5f5..49c20b467 100644 --- a/aom_dsp/x86/blend_a64_mask_sse4.c +++ b/aom_dsp/x86/blend_a64_mask_sse4.c @@ -31,7 +31,7 @@ static void blend_a64_mask_w4_sse4_1(uint8_t *dst, uint32_t dst_stride, const uint8_t *src0, uint32_t src0_stride, const uint8_t *src1, uint32_t src1_stride, const uint8_t *mask, uint32_t mask_stride, - int h, int w) { + int w, int h) { const __m128i v_maxval_w = _mm_set1_epi16(AOM_BLEND_A64_MAX_ALPHA); (void)w; @@ -58,7 +58,7 @@ static void blend_a64_mask_w8_sse4_1(uint8_t *dst, uint32_t dst_stride, const uint8_t *src0, uint32_t src0_stride, const uint8_t *src1, uint32_t src1_stride, const uint8_t *mask, uint32_t mask_stride, - int h, int w) { + int w, int h) { const __m128i v_maxval_w = _mm_set1_epi16(AOM_BLEND_A64_MAX_ALPHA); (void)w; @@ -84,7 +84,7 @@ static void blend_a64_mask_w8_sse4_1(uint8_t *dst, uint32_t dst_stride, static void blend_a64_mask_w16n_sse4_1( uint8_t *dst, uint32_t dst_stride, const uint8_t *src0, uint32_t src0_stride, const uint8_t *src1, uint32_t src1_stride, - const uint8_t *mask, uint32_t mask_stride, int h, int w) { + const uint8_t *mask, uint32_t mask_stride, int w, int h) { const __m128i v_maxval_w = _mm_set1_epi16(AOM_BLEND_A64_MAX_ALPHA); do { @@ -119,7 +119,7 @@ static void blend_a64_mask_w16n_sse4_1( static void blend_a64_mask_sx_w4_sse4_1( uint8_t *dst, uint32_t dst_stride, const uint8_t *src0, uint32_t src0_stride, const uint8_t *src1, uint32_t src1_stride, - const uint8_t *mask, uint32_t mask_stride, int h, int w) { + const uint8_t *mask, uint32_t mask_stride, int w, int h) { const __m128i v_zmask_b = _mm_set_epi8(0, 0xff, 0, 0xff, 0, 0xff, 0, 0xff, 0, 0xff, 0, 0xff, 0, 0xff, 0, 0xff); const __m128i v_maxval_w = _mm_set1_epi16(AOM_BLEND_A64_MAX_ALPHA); @@ -149,7 +149,7 @@ static void blend_a64_mask_sx_w4_sse4_1( static void blend_a64_mask_sx_w8_sse4_1( uint8_t *dst, uint32_t dst_stride, const uint8_t *src0, uint32_t src0_stride, const uint8_t *src1, uint32_t src1_stride, - const uint8_t *mask, uint32_t mask_stride, int h, int w) { + const uint8_t *mask, uint32_t mask_stride, int w, int h) { const __m128i v_zmask_b = _mm_set_epi8(0, 0xff, 0, 0xff, 0, 0xff, 0, 0xff, 0, 0xff, 0, 0xff, 0, 0xff, 0, 0xff); const __m128i v_maxval_w = _mm_set1_epi16(AOM_BLEND_A64_MAX_ALPHA); @@ -179,7 +179,7 @@ static void blend_a64_mask_sx_w8_sse4_1( static void blend_a64_mask_sx_w16n_sse4_1( uint8_t *dst, uint32_t dst_stride, const uint8_t *src0, uint32_t src0_stride, const uint8_t *src1, uint32_t src1_stride, - const uint8_t *mask, uint32_t mask_stride, int h, int w) { + const uint8_t *mask, uint32_t mask_stride, int w, int h) { const __m128i v_zmask_b = _mm_set_epi8(0, 0xff, 0, 0xff, 0, 0xff, 0, 0xff, 0, 0xff, 0, 0xff, 0, 0xff, 0, 0xff); const __m128i v_maxval_w = _mm_set1_epi16(AOM_BLEND_A64_MAX_ALPHA); @@ -219,7 +219,7 @@ static void blend_a64_mask_sx_w16n_sse4_1( static void blend_a64_mask_sy_w4_sse4_1( uint8_t *dst, uint32_t dst_stride, const uint8_t *src0, uint32_t src0_stride, const uint8_t *src1, uint32_t src1_stride, - const uint8_t *mask, uint32_t mask_stride, int h, int w) { + const uint8_t *mask, uint32_t mask_stride, int w, int h) { const __m128i v_maxval_w = _mm_set1_epi16(AOM_BLEND_A64_MAX_ALPHA); (void)w; @@ -248,7 +248,7 @@ static void blend_a64_mask_sy_w4_sse4_1( static void blend_a64_mask_sy_w8_sse4_1( uint8_t *dst, uint32_t dst_stride, const uint8_t *src0, uint32_t src0_stride, const uint8_t *src1, uint32_t src1_stride, - const uint8_t *mask, uint32_t mask_stride, int h, int w) { + const uint8_t *mask, uint32_t mask_stride, int w, int h) { const __m128i v_maxval_w = _mm_set1_epi16(AOM_BLEND_A64_MAX_ALPHA); (void)w; @@ -277,7 +277,7 @@ static void blend_a64_mask_sy_w8_sse4_1( static void blend_a64_mask_sy_w16n_sse4_1( uint8_t *dst, uint32_t dst_stride, const uint8_t *src0, uint32_t src0_stride, const uint8_t *src1, uint32_t src1_stride, - const uint8_t *mask, uint32_t mask_stride, int h, int w) { + const uint8_t *mask, uint32_t mask_stride, int w, int h) { const __m128i v_zero = _mm_setzero_si128(); const __m128i v_maxval_w = _mm_set1_epi16(AOM_BLEND_A64_MAX_ALPHA); @@ -315,7 +315,7 @@ static void blend_a64_mask_sy_w16n_sse4_1( static void blend_a64_mask_sx_sy_w4_sse4_1( uint8_t *dst, uint32_t dst_stride, const uint8_t *src0, uint32_t src0_stride, const uint8_t *src1, uint32_t src1_stride, - const uint8_t *mask, uint32_t mask_stride, int h, int w) { + const uint8_t *mask, uint32_t mask_stride, int w, int h) { const __m128i v_zmask_b = _mm_set_epi8(0, 0xff, 0, 0xff, 0, 0xff, 0, 0xff, 0, 0xff, 0, 0xff, 0, 0xff, 0, 0xff); const __m128i v_maxval_w = _mm_set1_epi16(AOM_BLEND_A64_MAX_ALPHA); @@ -350,7 +350,7 @@ static void blend_a64_mask_sx_sy_w4_sse4_1( static void blend_a64_mask_sx_sy_w8_sse4_1( uint8_t *dst, uint32_t dst_stride, const uint8_t *src0, uint32_t src0_stride, const uint8_t *src1, uint32_t src1_stride, - const uint8_t *mask, uint32_t mask_stride, int h, int w) { + const uint8_t *mask, uint32_t mask_stride, int w, int h) { const __m128i v_zmask_b = _mm_set_epi8(0, 0xff, 0, 0xff, 0, 0xff, 0, 0xff, 0, 0xff, 0, 0xff, 0, 0xff, 0, 0xff); const __m128i v_maxval_w = _mm_set1_epi16(AOM_BLEND_A64_MAX_ALPHA); @@ -385,7 +385,7 @@ static void blend_a64_mask_sx_sy_w8_sse4_1( static void blend_a64_mask_sx_sy_w16n_sse4_1( uint8_t *dst, uint32_t dst_stride, const uint8_t *src0, uint32_t src0_stride, const uint8_t *src1, uint32_t src1_stride, - const uint8_t *mask, uint32_t mask_stride, int h, int w) { + const uint8_t *mask, uint32_t mask_stride, int w, int h) { const __m128i v_zmask_b = _mm_set_epi8(0, 0xff, 0, 0xff, 0, 0xff, 0, 0xff, 0, 0xff, 0, 0xff, 0, 0xff, 0, 0xff); const __m128i v_maxval_w = _mm_set1_epi16(AOM_BLEND_A64_MAX_ALPHA); @@ -435,12 +435,12 @@ static void blend_a64_mask_sx_sy_w16n_sse4_1( void aom_blend_a64_mask_sse4_1(uint8_t *dst, uint32_t dst_stride, const uint8_t *src0, uint32_t src0_stride, const uint8_t *src1, uint32_t src1_stride, - const uint8_t *mask, uint32_t mask_stride, int h, - int w, int suby, int subx) { + const uint8_t *mask, uint32_t mask_stride, int w, + int h, int subx, int suby) { typedef void (*blend_fn)( uint8_t * dst, uint32_t dst_stride, const uint8_t *src0, uint32_t src0_stride, const uint8_t *src1, uint32_t src1_stride, - const uint8_t *mask, uint32_t mask_stride, int h, int w); + const uint8_t *mask, uint32_t mask_stride, int w, int h); // Dimensions are: width_index X subx X suby static const blend_fn blend[3][2][2] = { @@ -465,11 +465,11 @@ void aom_blend_a64_mask_sse4_1(uint8_t *dst, uint32_t dst_stride, if (UNLIKELY((h | w) & 3)) { // if (w <= 2 || h <= 2) aom_blend_a64_mask_c(dst, dst_stride, src0, src0_stride, src1, src1_stride, - mask, mask_stride, h, w, suby, subx); + mask, mask_stride, w, h, subx, suby); } else { blend[(w >> 2) & 3][subx != 0][suby != 0](dst, dst_stride, src0, src0_stride, src1, src1_stride, - mask, mask_stride, h, w); + mask, mask_stride, w, h); } } @@ -502,7 +502,7 @@ static INLINE void blend_a64_mask_bn_w4_sse4_1( static void blend_a64_mask_b10_w4_sse4_1( uint16_t *dst, uint32_t dst_stride, const uint16_t *src0, uint32_t src0_stride, const uint16_t *src1, uint32_t src1_stride, - const uint8_t *mask, uint32_t mask_stride, int h, int w) { + const uint8_t *mask, uint32_t mask_stride, int w, int h) { (void)w; blend_a64_mask_bn_w4_sse4_1(dst, dst_stride, src0, src0_stride, src1, src1_stride, mask, mask_stride, h, blend_4_b10); @@ -511,7 +511,7 @@ static void blend_a64_mask_b10_w4_sse4_1( static void blend_a64_mask_b12_w4_sse4_1( uint16_t *dst, uint32_t dst_stride, const uint16_t *src0, uint32_t src0_stride, const uint16_t *src1, uint32_t src1_stride, - const uint8_t *mask, uint32_t mask_stride, int h, int w) { + const uint8_t *mask, uint32_t mask_stride, int w, int h) { (void)w; blend_a64_mask_bn_w4_sse4_1(dst, dst_stride, src0, src0_stride, src1, src1_stride, mask, mask_stride, h, blend_4_b12); @@ -520,7 +520,7 @@ static void blend_a64_mask_b12_w4_sse4_1( static INLINE void blend_a64_mask_bn_w8n_sse4_1( uint16_t *dst, uint32_t dst_stride, const uint16_t *src0, uint32_t src0_stride, const uint16_t *src1, uint32_t src1_stride, - const uint8_t *mask, uint32_t mask_stride, int h, int w, + const uint8_t *mask, uint32_t mask_stride, int w, int h, blend_unit_fn blend) { const __m128i v_maxval_w = _mm_set1_epi16(AOM_BLEND_A64_MAX_ALPHA); @@ -545,18 +545,18 @@ static INLINE void blend_a64_mask_bn_w8n_sse4_1( static void blend_a64_mask_b10_w8n_sse4_1( uint16_t *dst, uint32_t dst_stride, const uint16_t *src0, uint32_t src0_stride, const uint16_t *src1, uint32_t src1_stride, - const uint8_t *mask, uint32_t mask_stride, int h, int w) { + const uint8_t *mask, uint32_t mask_stride, int w, int h) { blend_a64_mask_bn_w8n_sse4_1(dst, dst_stride, src0, src0_stride, src1, - src1_stride, mask, mask_stride, h, w, + src1_stride, mask, mask_stride, w, h, blend_8_b10); } static void blend_a64_mask_b12_w8n_sse4_1( uint16_t *dst, uint32_t dst_stride, const uint16_t *src0, uint32_t src0_stride, const uint16_t *src1, uint32_t src1_stride, - const uint8_t *mask, uint32_t mask_stride, int h, int w) { + const uint8_t *mask, uint32_t mask_stride, int w, int h) { blend_a64_mask_bn_w8n_sse4_1(dst, dst_stride, src0, src0_stride, src1, - src1_stride, mask, mask_stride, h, w, + src1_stride, mask, mask_stride, w, h, blend_8_b12); } @@ -593,7 +593,7 @@ static INLINE void blend_a64_mask_bn_sx_w4_sse4_1( static void blend_a64_mask_b10_sx_w4_sse4_1( uint16_t *dst, uint32_t dst_stride, const uint16_t *src0, uint32_t src0_stride, const uint16_t *src1, uint32_t src1_stride, - const uint8_t *mask, uint32_t mask_stride, int h, int w) { + const uint8_t *mask, uint32_t mask_stride, int w, int h) { (void)w; blend_a64_mask_bn_sx_w4_sse4_1(dst, dst_stride, src0, src0_stride, src1, src1_stride, mask, mask_stride, h, @@ -603,7 +603,7 @@ static void blend_a64_mask_b10_sx_w4_sse4_1( static void blend_a64_mask_b12_sx_w4_sse4_1( uint16_t *dst, uint32_t dst_stride, const uint16_t *src0, uint32_t src0_stride, const uint16_t *src1, uint32_t src1_stride, - const uint8_t *mask, uint32_t mask_stride, int h, int w) { + const uint8_t *mask, uint32_t mask_stride, int w, int h) { (void)w; blend_a64_mask_bn_sx_w4_sse4_1(dst, dst_stride, src0, src0_stride, src1, src1_stride, mask, mask_stride, h, @@ -613,7 +613,7 @@ static void blend_a64_mask_b12_sx_w4_sse4_1( static INLINE void blend_a64_mask_bn_sx_w8n_sse4_1( uint16_t *dst, uint32_t dst_stride, const uint16_t *src0, uint32_t src0_stride, const uint16_t *src1, uint32_t src1_stride, - const uint8_t *mask, uint32_t mask_stride, int h, int w, + const uint8_t *mask, uint32_t mask_stride, int w, int h, blend_unit_fn blend) { const __m128i v_zmask_b = _mm_set_epi8(0, 0xff, 0, 0xff, 0, 0xff, 0, 0xff, 0, 0xff, 0, 0xff, 0, 0xff, 0, 0xff); @@ -642,18 +642,18 @@ static INLINE void blend_a64_mask_bn_sx_w8n_sse4_1( static void blend_a64_mask_b10_sx_w8n_sse4_1( uint16_t *dst, uint32_t dst_stride, const uint16_t *src0, uint32_t src0_stride, const uint16_t *src1, uint32_t src1_stride, - const uint8_t *mask, uint32_t mask_stride, int h, int w) { + const uint8_t *mask, uint32_t mask_stride, int w, int h) { blend_a64_mask_bn_sx_w8n_sse4_1(dst, dst_stride, src0, src0_stride, src1, - src1_stride, mask, mask_stride, h, w, + src1_stride, mask, mask_stride, w, h, blend_8_b10); } static void blend_a64_mask_b12_sx_w8n_sse4_1( uint16_t *dst, uint32_t dst_stride, const uint16_t *src0, uint32_t src0_stride, const uint16_t *src1, uint32_t src1_stride, - const uint8_t *mask, uint32_t mask_stride, int h, int w) { + const uint8_t *mask, uint32_t mask_stride, int w, int h) { blend_a64_mask_bn_sx_w8n_sse4_1(dst, dst_stride, src0, src0_stride, src1, - src1_stride, mask, mask_stride, h, w, + src1_stride, mask, mask_stride, w, h, blend_8_b12); } @@ -689,7 +689,7 @@ static INLINE void blend_a64_mask_bn_sy_w4_sse4_1( static void blend_a64_mask_b10_sy_w4_sse4_1( uint16_t *dst, uint32_t dst_stride, const uint16_t *src0, uint32_t src0_stride, const uint16_t *src1, uint32_t src1_stride, - const uint8_t *mask, uint32_t mask_stride, int h, int w) { + const uint8_t *mask, uint32_t mask_stride, int w, int h) { (void)w; blend_a64_mask_bn_sy_w4_sse4_1(dst, dst_stride, src0, src0_stride, src1, src1_stride, mask, mask_stride, h, @@ -699,7 +699,7 @@ static void blend_a64_mask_b10_sy_w4_sse4_1( static void blend_a64_mask_b12_sy_w4_sse4_1( uint16_t *dst, uint32_t dst_stride, const uint16_t *src0, uint32_t src0_stride, const uint16_t *src1, uint32_t src1_stride, - const uint8_t *mask, uint32_t mask_stride, int h, int w) { + const uint8_t *mask, uint32_t mask_stride, int w, int h) { (void)w; blend_a64_mask_bn_sy_w4_sse4_1(dst, dst_stride, src0, src0_stride, src1, src1_stride, mask, mask_stride, h, @@ -709,7 +709,7 @@ static void blend_a64_mask_b12_sy_w4_sse4_1( static INLINE void blend_a64_mask_bn_sy_w8n_sse4_1( uint16_t *dst, uint32_t dst_stride, const uint16_t *src0, uint32_t src0_stride, const uint16_t *src1, uint32_t src1_stride, - const uint8_t *mask, uint32_t mask_stride, int h, int w, + const uint8_t *mask, uint32_t mask_stride, int w, int h, blend_unit_fn blend) { const __m128i v_maxval_w = _mm_set1_epi16(AOM_BLEND_A64_MAX_ALPHA); @@ -737,18 +737,18 @@ static INLINE void blend_a64_mask_bn_sy_w8n_sse4_1( static void blend_a64_mask_b10_sy_w8n_sse4_1( uint16_t *dst, uint32_t dst_stride, const uint16_t *src0, uint32_t src0_stride, const uint16_t *src1, uint32_t src1_stride, - const uint8_t *mask, uint32_t mask_stride, int h, int w) { + const uint8_t *mask, uint32_t mask_stride, int w, int h) { blend_a64_mask_bn_sy_w8n_sse4_1(dst, dst_stride, src0, src0_stride, src1, - src1_stride, mask, mask_stride, h, w, + src1_stride, mask, mask_stride, w, h, blend_8_b10); } static void blend_a64_mask_b12_sy_w8n_sse4_1( uint16_t *dst, uint32_t dst_stride, const uint16_t *src0, uint32_t src0_stride, const uint16_t *src1, uint32_t src1_stride, - const uint8_t *mask, uint32_t mask_stride, int h, int w) { + const uint8_t *mask, uint32_t mask_stride, int w, int h) { blend_a64_mask_bn_sy_w8n_sse4_1(dst, dst_stride, src0, src0_stride, src1, - src1_stride, mask, mask_stride, h, w, + src1_stride, mask, mask_stride, w, h, blend_8_b12); } @@ -790,7 +790,7 @@ static INLINE void blend_a64_mask_bn_sx_sy_w4_sse4_1( static void blend_a64_mask_b10_sx_sy_w4_sse4_1( uint16_t *dst, uint32_t dst_stride, const uint16_t *src0, uint32_t src0_stride, const uint16_t *src1, uint32_t src1_stride, - const uint8_t *mask, uint32_t mask_stride, int h, int w) { + const uint8_t *mask, uint32_t mask_stride, int w, int h) { (void)w; blend_a64_mask_bn_sx_sy_w4_sse4_1(dst, dst_stride, src0, src0_stride, src1, src1_stride, mask, mask_stride, h, @@ -800,7 +800,7 @@ static void blend_a64_mask_b10_sx_sy_w4_sse4_1( static void blend_a64_mask_b12_sx_sy_w4_sse4_1( uint16_t *dst, uint32_t dst_stride, const uint16_t *src0, uint32_t src0_stride, const uint16_t *src1, uint32_t src1_stride, - const uint8_t *mask, uint32_t mask_stride, int h, int w) { + const uint8_t *mask, uint32_t mask_stride, int w, int h) { (void)w; blend_a64_mask_bn_sx_sy_w4_sse4_1(dst, dst_stride, src0, src0_stride, src1, src1_stride, mask, mask_stride, h, @@ -810,7 +810,7 @@ static void blend_a64_mask_b12_sx_sy_w4_sse4_1( static INLINE void blend_a64_mask_bn_sx_sy_w8n_sse4_1( uint16_t *dst, uint32_t dst_stride, const uint16_t *src0, uint32_t src0_stride, const uint16_t *src1, uint32_t src1_stride, - const uint8_t *mask, uint32_t mask_stride, int h, int w, + const uint8_t *mask, uint32_t mask_stride, int w, int h, blend_unit_fn blend) { const __m128i v_zmask_b = _mm_set_epi8(0, 0xff, 0, 0xff, 0, 0xff, 0, 0xff, 0, 0xff, 0, 0xff, 0, 0xff, 0, 0xff); @@ -844,18 +844,18 @@ static INLINE void blend_a64_mask_bn_sx_sy_w8n_sse4_1( static void blend_a64_mask_b10_sx_sy_w8n_sse4_1( uint16_t *dst, uint32_t dst_stride, const uint16_t *src0, uint32_t src0_stride, const uint16_t *src1, uint32_t src1_stride, - const uint8_t *mask, uint32_t mask_stride, int h, int w) { + const uint8_t *mask, uint32_t mask_stride, int w, int h) { blend_a64_mask_bn_sx_sy_w8n_sse4_1(dst, dst_stride, src0, src0_stride, src1, - src1_stride, mask, mask_stride, h, w, + src1_stride, mask, mask_stride, w, h, blend_8_b10); } static void blend_a64_mask_b12_sx_sy_w8n_sse4_1( uint16_t *dst, uint32_t dst_stride, const uint16_t *src0, uint32_t src0_stride, const uint16_t *src1, uint32_t src1_stride, - const uint8_t *mask, uint32_t mask_stride, int h, int w) { + const uint8_t *mask, uint32_t mask_stride, int w, int h) { blend_a64_mask_bn_sx_sy_w8n_sse4_1(dst, dst_stride, src0, src0_stride, src1, - src1_stride, mask, mask_stride, h, w, + src1_stride, mask, mask_stride, w, h, blend_8_b12); } @@ -868,12 +868,12 @@ void aom_highbd_blend_a64_mask_sse4_1(uint8_t *dst_8, uint32_t dst_stride, uint32_t src0_stride, const uint8_t *src1_8, uint32_t src1_stride, const uint8_t *mask, - uint32_t mask_stride, int h, int w, - int suby, int subx, int bd) { + uint32_t mask_stride, int w, int h, + int subx, int suby, int bd) { typedef void (*blend_fn)( uint16_t * dst, uint32_t dst_stride, const uint16_t *src0, uint32_t src0_stride, const uint16_t *src1, uint32_t src1_stride, - const uint8_t *mask, uint32_t mask_stride, int h, int w); + const uint8_t *mask, uint32_t mask_stride, int w, int h); // Dimensions are: bd_index X width_index X subx X suby static const blend_fn blend[2][2][2][2] = { @@ -908,8 +908,8 @@ void aom_highbd_blend_a64_mask_sse4_1(uint8_t *dst_8, uint32_t dst_stride, assert(bd == 8 || bd == 10 || bd == 12); if (UNLIKELY((h | w) & 3)) { // if (w <= 2 || h <= 2) aom_highbd_blend_a64_mask_c(dst_8, dst_stride, src0_8, src0_stride, src1_8, - src1_stride, mask, mask_stride, h, w, suby, - subx, bd); + src1_stride, mask, mask_stride, w, h, subx, + suby, bd); } else { uint16_t *const dst = CONVERT_TO_SHORTPTR(dst_8); const uint16_t *const src0 = CONVERT_TO_SHORTPTR(src0_8); @@ -917,7 +917,7 @@ void aom_highbd_blend_a64_mask_sse4_1(uint8_t *dst_8, uint32_t dst_stride, blend[bd == 12][(w >> 2) & 1][subx != 0][suby != 0]( dst, dst_stride, src0, src0_stride, src1, src1_stride, mask, - mask_stride, h, w); + mask_stride, w, h); } } @@ -944,7 +944,7 @@ static INLINE void blend_a64_d16_mask(uint8_t *dst, const CONV_BUF_TYPE *src0, void aom_lowbd_blend_a64_d16_mask_sse4_1( uint8_t *dst, uint32_t dst_stride, const CONV_BUF_TYPE *src0, uint32_t src0_stride, const CONV_BUF_TYPE *src1, uint32_t src1_stride, - const uint8_t *mask, uint32_t mask_stride, int h, int w, int subh, int subw, + const uint8_t *mask, uint32_t mask_stride, int w, int h, int subw, int subh, ConvolveParams *conv_params) { const int bd = 8; const int offset_bits = bd + 2 * FILTER_BITS - conv_params->round_0; diff --git a/aom_dsp/x86/blend_a64_vmask_sse4.c b/aom_dsp/x86/blend_a64_vmask_sse4.c index e2e464dc6..59506bdfe 100644 --- a/aom_dsp/x86/blend_a64_vmask_sse4.c +++ b/aom_dsp/x86/blend_a64_vmask_sse4.c @@ -30,7 +30,7 @@ static void blend_a64_vmask_w4_sse4_1(uint8_t *dst, uint32_t dst_stride, const uint8_t *src0, uint32_t src0_stride, const uint8_t *src1, uint32_t src1_stride, - const uint8_t *mask, int h, int w) { + const uint8_t *mask, int w, int h) { const __m128i v_maxval_w = _mm_set1_epi16(AOM_BLEND_A64_MAX_ALPHA); (void)w; @@ -55,7 +55,7 @@ static void blend_a64_vmask_w4_sse4_1(uint8_t *dst, uint32_t dst_stride, static void blend_a64_vmask_w8_sse4_1(uint8_t *dst, uint32_t dst_stride, const uint8_t *src0, uint32_t src0_stride, const uint8_t *src1, uint32_t src1_stride, - const uint8_t *mask, int h, int w) { + const uint8_t *mask, int w, int h) { const __m128i v_maxval_w = _mm_set1_epi16(AOM_BLEND_A64_MAX_ALPHA); (void)w; @@ -82,7 +82,7 @@ static void blend_a64_vmask_w16n_sse4_1(uint8_t *dst, uint32_t dst_stride, uint32_t src0_stride, const uint8_t *src1, uint32_t src1_stride, - const uint8_t *mask, int h, int w) { + const uint8_t *mask, int w, int h) { const __m128i v_maxval_w = _mm_set1_epi16(AOM_BLEND_A64_MAX_ALPHA); do { @@ -112,11 +112,11 @@ static void blend_a64_vmask_w16n_sse4_1(uint8_t *dst, uint32_t dst_stride, void aom_blend_a64_vmask_sse4_1(uint8_t *dst, uint32_t dst_stride, const uint8_t *src0, uint32_t src0_stride, const uint8_t *src1, uint32_t src1_stride, - const uint8_t *mask, int h, int w) { + const uint8_t *mask, int w, int h) { typedef void (*blend_fn)(uint8_t * dst, uint32_t dst_stride, const uint8_t *src0, uint32_t src0_stride, const uint8_t *src1, uint32_t src1_stride, - const uint8_t *mask, int h, int w); + const uint8_t *mask, int w, int h); // Dimension: width_index static const blend_fn blend[9] = { @@ -139,8 +139,8 @@ void aom_blend_a64_vmask_sse4_1(uint8_t *dst, uint32_t dst_stride, assert(IS_POWER_OF_TWO(h)); assert(IS_POWER_OF_TWO(w)); - blend[w & 0xf](dst, dst_stride, src0, src0_stride, src1, src1_stride, mask, h, - w); + blend[w & 0xf](dst, dst_stride, src0, src0_stride, src1, src1_stride, mask, w, + h); } ////////////////////////////////////////////////////////////////////////////// @@ -173,7 +173,7 @@ static void blend_a64_vmask_b10_w4_sse4_1(uint16_t *dst, uint32_t dst_stride, uint32_t src0_stride, const uint16_t *src1, uint32_t src1_stride, - const uint8_t *mask, int h, int w) { + const uint8_t *mask, int w, int h) { (void)w; blend_a64_vmask_bn_w4_sse4_1(dst, dst_stride, src0, src0_stride, src1, src1_stride, mask, h, blend_4_b10); @@ -184,7 +184,7 @@ static void blend_a64_vmask_b12_w4_sse4_1(uint16_t *dst, uint32_t dst_stride, uint32_t src0_stride, const uint16_t *src1, uint32_t src1_stride, - const uint8_t *mask, int h, int w) { + const uint8_t *mask, int w, int h) { (void)w; blend_a64_vmask_bn_w4_sse4_1(dst, dst_stride, src0, src0_stride, src1, src1_stride, mask, h, blend_4_b12); @@ -193,7 +193,7 @@ static void blend_a64_vmask_b12_w4_sse4_1(uint16_t *dst, uint32_t dst_stride, static INLINE void blend_a64_vmask_bn_w8n_sse4_1( uint16_t *dst, uint32_t dst_stride, const uint16_t *src0, uint32_t src0_stride, const uint16_t *src1, uint32_t src1_stride, - const uint8_t *mask, int h, int w, blend_unit_fn blend) { + const uint8_t *mask, int w, int h, blend_unit_fn blend) { const __m128i v_maxval_w = _mm_set1_epi16(AOM_BLEND_A64_MAX_ALPHA); do { @@ -217,9 +217,9 @@ static void blend_a64_vmask_b10_w8n_sse4_1(uint16_t *dst, uint32_t dst_stride, uint32_t src0_stride, const uint16_t *src1, uint32_t src1_stride, - const uint8_t *mask, int h, int w) { + const uint8_t *mask, int w, int h) { blend_a64_vmask_bn_w8n_sse4_1(dst, dst_stride, src0, src0_stride, src1, - src1_stride, mask, h, w, blend_8_b10); + src1_stride, mask, w, h, blend_8_b10); } static void blend_a64_vmask_b12_w8n_sse4_1(uint16_t *dst, uint32_t dst_stride, @@ -227,9 +227,9 @@ static void blend_a64_vmask_b12_w8n_sse4_1(uint16_t *dst, uint32_t dst_stride, uint32_t src0_stride, const uint16_t *src1, uint32_t src1_stride, - const uint8_t *mask, int h, int w) { + const uint8_t *mask, int w, int h) { blend_a64_vmask_bn_w8n_sse4_1(dst, dst_stride, src0, src0_stride, src1, - src1_stride, mask, h, w, blend_8_b12); + src1_stride, mask, w, h, blend_8_b12); } ////////////////////////////////////////////////////////////////////////////// @@ -239,11 +239,11 @@ static void blend_a64_vmask_b12_w8n_sse4_1(uint16_t *dst, uint32_t dst_stride, void aom_highbd_blend_a64_vmask_sse4_1( uint8_t *dst_8, uint32_t dst_stride, const uint8_t *src0_8, uint32_t src0_stride, const uint8_t *src1_8, uint32_t src1_stride, - const uint8_t *mask, int h, int w, int bd) { + const uint8_t *mask, int w, int h, int bd) { typedef void (*blend_fn)(uint16_t * dst, uint32_t dst_stride, const uint16_t *src0, uint32_t src0_stride, const uint16_t *src1, uint32_t src1_stride, - const uint8_t *mask, int h, int w); + const uint8_t *mask, int w, int h); // Dimensions are: bd_index X width_index static const blend_fn blend[2][2] = { @@ -271,13 +271,13 @@ void aom_highbd_blend_a64_vmask_sse4_1( if (UNLIKELY((h | w) & 3)) { // if (w <= 2 || h <= 2) aom_highbd_blend_a64_vmask_c(dst_8, dst_stride, src0_8, src0_stride, src1_8, - src1_stride, mask, h, w, bd); + src1_stride, mask, w, h, bd); } else { uint16_t *const dst = CONVERT_TO_SHORTPTR(dst_8); const uint16_t *const src0 = CONVERT_TO_SHORTPTR(src0_8); const uint16_t *const src1 = CONVERT_TO_SHORTPTR(src1_8); blend[bd == 12][(w >> 2) & 1](dst, dst_stride, src0, src0_stride, src1, - src1_stride, mask, h, w); + src1_stride, mask, w, h); } } |