summaryrefslogtreecommitdiff
path: root/aom_dsp/aom_convolve.c
diff options
context:
space:
mode:
authorDavid Barker <david.barker@argondesign.com>2017-05-18 15:12:50 +0100
committerDebargha Mukherjee <debargha@google.com>2017-05-23 13:22:36 +0000
commit5d34e6a738984c6efd3884d7703680687cfdc00c (patch)
treefa5de29dad405b30a42c0e10f0f04693b8907718 /aom_dsp/aom_convolve.c
parent0f3c94e134ab4d358be43c39f2fed82fb8cdcdf5 (diff)
Vectorize high-precision convolve filter
Add SSE2 lowbd and SSSE3 highbd versions of the filters introduced in https://aomedia-review.googlesource.com/c/11962/ . These filters are equivalent in speed to the SSE2 implementations of the regular convolve filter. The average time to filter a 64x64 block is: lowbd C: 52us lowbd SSE2: 5.6us highbd C: 53us highbd SSSE3: 5.8us Also add a correctness test based on the warp filter tests. Change-Id: Ia0d81100e8a414bbfc2b5f664d751cf24765299e
Diffstat (limited to 'aom_dsp/aom_convolve.c')
-rw-r--r--aom_dsp/aom_convolve.c2
1 files changed, 0 insertions, 2 deletions
diff --git a/aom_dsp/aom_convolve.c b/aom_dsp/aom_convolve.c
index 7182d7b3a..1abd9a241 100644
--- a/aom_dsp/aom_convolve.c
+++ b/aom_dsp/aom_convolve.c
@@ -447,8 +447,6 @@ void aom_convolve8_add_src_c(const uint8_t *src, ptrdiff_t src_stride,
x_step_q4, filters_y, y0_q4, y_step_q4, w, h);
}
-#define EXTRAPREC_BITS 2
-#define EXTRAPREC_CLAMP_LIMIT (512 << EXTRAPREC_BITS)
static void convolve_add_src_horiz_hip(const uint8_t *src, ptrdiff_t src_stride,
uint16_t *dst, ptrdiff_t dst_stride,
const InterpKernel *x_filters, int x0_q4,