/* babl - dynamically extendable universal pixel conversion library. * Copyright (C) 2013 Daniel Sabo * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 3 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General * Public License along with this library; if not, see * . */ #include "config.h" #if defined(USE_SSE4_1) /* SSE 4 */ #include #include #include #include "babl.h" #include "babl-cpuaccel.h" #include "extensions/util.h" static inline void conv_y8_yF (const Babl *conversion, const uint8_t *src, float *dst, long samples) { const float factor = 1.0f / 255.0f; const __v4sf factor_vec = {1.0f / 255.0f, 1.0f / 255.0f, 1.0f / 255.0f, 1.0f / 255.0f}; const uint32_t *s_vec; __v4sf *d_vec; long n = samples; s_vec = (const uint32_t *)src; d_vec = (__v4sf *)dst; while (n >= 4) { __m128i in_val; __v4sf out_val; in_val = _mm_insert_epi32 ((__m128i)_mm_setzero_ps(), *s_vec++, 0); in_val = _mm_cvtepu8_epi32 (in_val); out_val = _mm_cvtepi32_ps (in_val) * factor_vec; _mm_storeu_ps ((float *)d_vec++, out_val); n -= 4; } src = (const uint8_t *)s_vec; dst = (float *)d_vec; while (n) { *dst++ = (float)(*src++) * factor; n -= 1; } } static void conv_ya8_yaF (const Babl *conversion, const uint8_t *src, float *dst, long samples) { conv_y8_yF (conversion, src, dst, samples * 2); } static void conv_rgb8_rgbF (const Babl *conversion, const uint8_t *src, float *dst, long samples) { conv_y8_yF (conversion, src, dst, samples * 3); } static void conv_rgba8_rgbaF (const Babl *conversion, const uint8_t *src, float *dst, long samples) { conv_y8_yF (conversion, src, dst, samples * 4); } #endif int init (void); int init (void) { #if defined(USE_SSE4_1) const Babl *rgbaF_linear = babl_format_new ( babl_model ("RGBA"), babl_type ("float"), babl_component ("R"), babl_component ("G"), babl_component ("B"), babl_component ("A"), NULL); const Babl *rgba8_linear = babl_format_new ( babl_model ("RGBA"), babl_type ("u8"), babl_component ("R"), babl_component ("G"), babl_component ("B"), babl_component ("A"), NULL); const Babl *rgbaF_gamma = babl_format_new ( babl_model ("R'G'B'A"), babl_type ("float"), babl_component ("R'"), babl_component ("G'"), babl_component ("B'"), babl_component ("A"), NULL); const Babl *rgba8_gamma = babl_format_new ( babl_model ("R'G'B'A"), babl_type ("u8"), babl_component ("R'"), babl_component ("G'"), babl_component ("B'"), babl_component ("A"), NULL); const Babl *rgbF_linear = babl_format_new ( babl_model ("RGB"), babl_type ("float"), babl_component ("R"), babl_component ("G"), babl_component ("B"), NULL); const Babl *rgb8_linear = babl_format_new ( babl_model ("RGB"), babl_type ("u8"), babl_component ("R"), babl_component ("G"), babl_component ("B"), NULL); const Babl *rgbF_gamma = babl_format_new ( babl_model ("R'G'B'"), babl_type ("float"), babl_component ("R'"), babl_component ("G'"), babl_component ("B'"), NULL); const Babl *rgb8_gamma = babl_format_new ( babl_model ("R'G'B'"), babl_type ("u8"), babl_component ("R'"), babl_component ("G'"), babl_component ("B'"), NULL); const Babl *yaF_linear = babl_format_new ( babl_model ("YA"), babl_type ("float"), babl_component ("Y"), babl_component ("A"), NULL); const Babl *ya8_linear = babl_format_new ( babl_model ("YA"), babl_type ("u8"), babl_component ("Y"), babl_component ("A"), NULL); const Babl *yaF_gamma = babl_format_new ( babl_model ("Y'A"), babl_type ("float"), babl_component ("Y'"), babl_component ("A"), NULL); const Babl *ya8_gamma = babl_format_new ( babl_model ("Y'A"), babl_type ("u8"), babl_component ("Y'"), babl_component ("A"), NULL); const Babl *yF_linear = babl_format_new ( babl_model ("Y"), babl_type ("float"), babl_component ("Y"), NULL); const Babl *y8_linear = babl_format_new ( babl_model ("Y"), babl_type ("u8"), babl_component ("Y"), NULL); const Babl *yF_gamma = babl_format_new ( babl_model ("Y'"), babl_type ("float"), babl_component ("Y'"), NULL); const Babl *y8_gamma = babl_format_new ( babl_model ("Y'"), babl_type ("u8"), babl_component ("Y'"), NULL); #define CONV(src, dst) \ { \ babl_conversion_new (src ## _linear, dst ## _linear, "linear", conv_ ## src ## _ ## dst, NULL); \ babl_conversion_new (src ## _gamma, dst ## _gamma, "linear", conv_ ## src ## _ ## dst, NULL); \ } if ((babl_cpu_accel_get_support () & BABL_CPU_ACCEL_X86_SSE4_1)) { CONV(rgba8, rgbaF); CONV(rgb8, rgbF); CONV(ya8, yaF); CONV(y8, yF); } #endif return 0; }