diff options
author | Cherma Rajan A <cherma.rajan@ittiam.com> | 2018-03-20 10:00:51 +0530 |
---|---|---|
committer | Yunqing Wang <yunqingwang@google.com> | 2018-03-21 16:34:04 +0000 |
commit | a7be3686a141dfe805eef41f33e29994cc604c9d (patch) | |
tree | 34ecaf631d6a5306a0a8b8d798f98c099c5a030d /aom_dsp/blend_a64_mask.c | |
parent | 3a7b5c283f5c1aa67fad33c9405bcde8de83c5fd (diff) |
Change the convolve output from 32-bit to 16-bit
CONV_BUF_TYPE changed from int32_t to uint16_t and rounding stage is
removed
Warp, Blend, Convolve, Convolve scale functions are modified to output
with 16 bit precision.
Relevant intrinsic functions and Unit tests are modified.
Change-Id: I2dbcf4612cf42efdad0defeb74a44bc5153ed78e
Diffstat (limited to 'aom_dsp/blend_a64_mask.c')
-rw-r--r-- | aom_dsp/blend_a64_mask.c | 178 |
1 files changed, 178 insertions, 0 deletions
diff --git a/aom_dsp/blend_a64_mask.c b/aom_dsp/blend_a64_mask.c index 4c42274ab..559f44df4 100644 --- a/aom_dsp/blend_a64_mask.c +++ b/aom_dsp/blend_a64_mask.c @@ -33,6 +33,183 @@ // In contrast, the output of the non-d32 functions will not be further rounded, // so we *should* use ROUND_POWER_OF_TWO there. +#if CONFIG_LOWPRECISION_BLEND +void aom_lowbd_blend_a64_d16_mask(uint8_t *dst, uint32_t dst_stride, + const CONV_BUF_TYPE *src0, + uint32_t src0_stride, + const CONV_BUF_TYPE *src1, + uint32_t src1_stride, const uint8_t *mask, + uint32_t mask_stride, int h, int w, int subh, + int subw, ConvolveParams *conv_params) { + int i, j; + const int bd = 8; + const int offset_bits = bd + 2 * FILTER_BITS - conv_params->round_0; + const int round_offset = (1 << (offset_bits - conv_params->round_1)) + + (1 << (offset_bits - conv_params->round_1 - 1)); + const int round_bits = + 2 * FILTER_BITS - conv_params->round_0 - conv_params->round_1; + + assert(IMPLIES((void *)src0 == dst, src0_stride == dst_stride)); + assert(IMPLIES((void *)src1 == dst, src1_stride == dst_stride)); + + assert(h >= 1); + assert(w >= 1); + assert(IS_POWER_OF_TWO(h)); + assert(IS_POWER_OF_TWO(w)); + + if (subw == 0 && subh == 0) { + for (i = 0; i < h; ++i) { + for (j = 0; j < w; ++j) { + int32_t res; + const int m = mask[i * mask_stride + j]; + res = ((m * (int32_t)src0[i * src0_stride + j] + + (AOM_BLEND_A64_MAX_ALPHA - m) * + (int32_t)src1[i * src1_stride + j]) >> + AOM_BLEND_A64_ROUND_BITS); + res -= round_offset; + dst[i * dst_stride + j] = + clip_pixel(ROUND_POWER_OF_TWO(res, round_bits)); + } + } + } else if (subw == 1 && subh == 1) { + for (i = 0; i < h; ++i) { + for (j = 0; j < w; ++j) { + int32_t res; + const int m = ROUND_POWER_OF_TWO( + mask[(2 * i) * mask_stride + (2 * j)] + + mask[(2 * i + 1) * mask_stride + (2 * j)] + + mask[(2 * i) * mask_stride + (2 * j + 1)] + + mask[(2 * i + 1) * mask_stride + (2 * j + 1)], + 2); + res = ((m * (int32_t)src0[i * src0_stride + j] + + (AOM_BLEND_A64_MAX_ALPHA - m) * + (int32_t)src1[i * src1_stride + j]) >> + AOM_BLEND_A64_ROUND_BITS); + res -= round_offset; + dst[i * dst_stride + j] = + clip_pixel(ROUND_POWER_OF_TWO(res, round_bits)); + } + } + } else if (subw == 1 && subh == 0) { + for (i = 0; i < h; ++i) { + for (j = 0; j < w; ++j) { + int32_t res; + const int m = AOM_BLEND_AVG(mask[i * mask_stride + (2 * j)], + mask[i * mask_stride + (2 * j + 1)]); + res = ((m * (int32_t)src0[i * src0_stride + j] + + (AOM_BLEND_A64_MAX_ALPHA - m) * + (int32_t)src1[i * src1_stride + j]) >> + AOM_BLEND_A64_ROUND_BITS); + res -= round_offset; + dst[i * dst_stride + j] = + clip_pixel(ROUND_POWER_OF_TWO(res, round_bits)); + } + } + } else { + for (i = 0; i < h; ++i) { + for (j = 0; j < w; ++j) { + int32_t res; + const int m = AOM_BLEND_AVG(mask[(2 * i) * mask_stride + j], + mask[(2 * i + 1) * mask_stride + j]); + res = ((int32_t)(m * (int32_t)src0[i * src0_stride + j] + + (AOM_BLEND_A64_MAX_ALPHA - m) * + (int32_t)src1[i * src1_stride + j]) >> + AOM_BLEND_A64_ROUND_BITS); + res -= round_offset; + dst[i * dst_stride + j] = + clip_pixel(ROUND_POWER_OF_TWO(res, round_bits)); + } + } + } +} + +void aom_highbd_blend_a64_d16_mask_c( + uint8_t *dst_8, uint32_t dst_stride, const CONV_BUF_TYPE *src0, + uint32_t src0_stride, const CONV_BUF_TYPE *src1, uint32_t src1_stride, + const uint8_t *mask, uint32_t mask_stride, int h, int w, int subh, int subw, + ConvolveParams *conv_params, const int bd) { + int i, j; + const int offset_bits = bd + 2 * FILTER_BITS - conv_params->round_0; + const int round_offset = (1 << (offset_bits - conv_params->round_1)) + + (1 << (offset_bits - conv_params->round_1 - 1)); + const int round_bits = + 2 * FILTER_BITS - conv_params->round_0 - conv_params->round_1; + uint16_t *dst = CONVERT_TO_SHORTPTR(dst_8); + + assert(IMPLIES(src0 == dst, src0_stride == dst_stride)); + assert(IMPLIES(src1 == dst, src1_stride == dst_stride)); + + assert(h >= 1); + assert(w >= 1); + assert(IS_POWER_OF_TWO(h)); + assert(IS_POWER_OF_TWO(w)); + + if (subw == 0 && subh == 0) { + for (i = 0; i < h; ++i) { + for (j = 0; j < w; ++j) { + int32_t res; + const int m = mask[i * mask_stride + j]; + res = ((m * (int32_t)src0[i * src0_stride + j] + + (AOM_BLEND_A64_MAX_ALPHA - m) * + (int32_t)src1[i * src1_stride + j]) >> + AOM_BLEND_A64_ROUND_BITS); + res -= round_offset; + dst[i * dst_stride + j] = + clip_pixel_highbd(ROUND_POWER_OF_TWO(res, round_bits), bd); + } + } + } else if (subw == 1 && subh == 1) { + for (i = 0; i < h; ++i) { + for (j = 0; j < w; ++j) { + int32_t res; + const int m = ROUND_POWER_OF_TWO( + mask[(2 * i) * mask_stride + (2 * j)] + + mask[(2 * i + 1) * mask_stride + (2 * j)] + + mask[(2 * i) * mask_stride + (2 * j + 1)] + + mask[(2 * i + 1) * mask_stride + (2 * j + 1)], + 2); + res = ((m * (int32_t)src0[i * src0_stride + j] + + (AOM_BLEND_A64_MAX_ALPHA - m) * + (int32_t)src1[i * src1_stride + j]) >> + AOM_BLEND_A64_ROUND_BITS); + res -= round_offset; + dst[i * dst_stride + j] = + clip_pixel_highbd(ROUND_POWER_OF_TWO(res, round_bits), bd); + } + } + } else if (subw == 1 && subh == 0) { + for (i = 0; i < h; ++i) { + for (j = 0; j < w; ++j) { + int32_t res; + const int m = AOM_BLEND_AVG(mask[i * mask_stride + (2 * j)], + mask[i * mask_stride + (2 * j + 1)]); + res = ((m * (int32_t)src0[i * src0_stride + j] + + (AOM_BLEND_A64_MAX_ALPHA - m) * + (int32_t)src1[i * src1_stride + j]) >> + AOM_BLEND_A64_ROUND_BITS); + res -= round_offset; + dst[i * dst_stride + j] = + clip_pixel_highbd(ROUND_POWER_OF_TWO(res, round_bits), bd); + } + } + } else { + for (i = 0; i < h; ++i) { + for (j = 0; j < w; ++j) { + int32_t res; + const int m = AOM_BLEND_AVG(mask[(2 * i) * mask_stride + j], + mask[(2 * i + 1) * mask_stride + j]); + res = ((int32_t)(m * (int32_t)src0[i * src0_stride + j] + + (AOM_BLEND_A64_MAX_ALPHA - m) * + (int32_t)src1[i * src1_stride + j]) >> + AOM_BLEND_A64_ROUND_BITS); + res -= round_offset; + dst[i * dst_stride + j] = + clip_pixel_highbd(ROUND_POWER_OF_TWO(res, round_bits), bd); + } + } + } +} +#else // CONFIG_LOWPRECISION_BLEND void aom_blend_a64_d32_mask_c(int32_t *dst, uint32_t dst_stride, const int32_t *src0, uint32_t src0_stride, const int32_t *src1, uint32_t src1_stride, @@ -97,6 +274,7 @@ void aom_blend_a64_d32_mask_c(int32_t *dst, uint32_t dst_stride, } } } +#endif // CONFIG_LOWPRECISION_BLEND // Blending with alpha mask. Mask values come from the range [0, 64], // as described for AOM_BLEND_A64 in aom_dsp/blend.h. src0 or src1 can |