diff options
Diffstat (limited to 'babl/base')
-rw-r--r-- | babl/base/babl-base.c | 26 | ||||
-rw-r--r-- | babl/base/babl-base.h | 42 | ||||
-rw-r--r-- | babl/base/babl-rgb-converter.c | 536 | ||||
-rw-r--r-- | babl/base/babl-trc.c | 610 | ||||
-rw-r--r-- | babl/base/babl-trc.h | 110 | ||||
-rw-r--r-- | babl/base/formats.c | 4 | ||||
-rw-r--r-- | babl/base/meson.build | 35 | ||||
-rw-r--r-- | babl/base/model-cmyk.c | 2 | ||||
-rw-r--r-- | babl/base/model-gray.c | 3 | ||||
-rw-r--r-- | babl/base/model-rgb.c | 2 | ||||
-rw-r--r-- | babl/base/model-ycbcr.c | 2 | ||||
-rw-r--r-- | babl/base/pow-24.h | 7 | ||||
-rw-r--r-- | babl/base/type-float.c | 2 | ||||
-rw-r--r-- | babl/base/type-half.c | 2 | ||||
-rw-r--r-- | babl/base/type-u15.c | 2 | ||||
-rw-r--r-- | babl/base/type-u16.c | 2 | ||||
-rw-r--r-- | babl/base/type-u32.c | 6 | ||||
-rw-r--r-- | babl/base/type-u8.c | 2 | ||||
-rw-r--r-- | babl/base/util.h | 18 |
19 files changed, 1355 insertions, 58 deletions
diff --git a/babl/base/babl-base.c b/babl/base/babl-base.c index 1d93341..8b9cdde 100644 --- a/babl/base/babl-base.c +++ b/babl/base/babl-base.c @@ -25,19 +25,19 @@ static void types (void); static void models (void); void -babl_base_init (void) +BABL_SIMD_SUFFIX(babl_base_init) (void) { babl_hmpf_on_name_lookups++; types (); models (); - babl_formats_init (); + BABL_SIMD_SUFFIX (babl_formats_init) (); babl_hmpf_on_name_lookups--; } void -babl_base_destroy (void) +BABL_SIMD_SUFFIX(babl_base_destroy) (void) { /* done by the destruction of the elemental babl clases */ } @@ -50,12 +50,12 @@ babl_base_destroy (void) static void types (void) { - babl_base_type_float (); - babl_base_type_u15 (); - babl_base_type_half (); - babl_base_type_u8 (); - babl_base_type_u16 (); - babl_base_type_u32 (); + BABL_SIMD_SUFFIX (babl_base_type_float) (); + BABL_SIMD_SUFFIX (babl_base_type_u15) (); + BABL_SIMD_SUFFIX (babl_base_type_half) (); + BABL_SIMD_SUFFIX (babl_base_type_u8) (); + BABL_SIMD_SUFFIX (babl_base_type_u16) (); + BABL_SIMD_SUFFIX (babl_base_type_u32) (); } /* @@ -67,9 +67,9 @@ static void models (void) { babl_hmpf_on_name_lookups--; - babl_base_model_rgb (); - babl_base_model_gray (); - babl_base_model_cmyk (); + BABL_SIMD_SUFFIX (babl_base_model_rgb) (); + BABL_SIMD_SUFFIX (babl_base_model_gray) (); + BABL_SIMD_SUFFIX (babl_base_model_cmyk) (); babl_hmpf_on_name_lookups++; - babl_base_model_ycbcr (); + BABL_SIMD_SUFFIX (babl_base_model_ycbcr) (); } diff --git a/babl/base/babl-base.h b/babl/base/babl-base.h index 64f1667..291697b 100644 --- a/babl/base/babl-base.h +++ b/babl/base/babl-base.h @@ -19,22 +19,36 @@ #ifndef _BABL_BASE_H #define _BABL_BASE_H +#ifdef ARM_NEON +#define BABL_SIMD_SUFFIX(symbol) symbol##_arm_neon +#else +#ifdef X86_64_V2 +#define BABL_SIMD_SUFFIX(symbol) symbol##_x86_64_v2 +#else +#ifdef X86_64_V3 +#define BABL_SIMD_SUFFIX(symbol) symbol##_x86_64_v3 +#else +#define BABL_SIMD_SUFFIX(symbol) symbol##_generic +#endif +#endif +#endif + +extern void (*babl_base_init) (void); -void babl_base_init (void); -void babl_base_destroy (void); -void babl_formats_init (void); +void BABL_SIMD_SUFFIX(babl_base_init) (void); +void BABL_SIMD_SUFFIX(babl_base_destroy) (void); +void BABL_SIMD_SUFFIX(babl_formats_init) (void); -void babl_base_type_half (void); -void babl_base_type_float (void); -void babl_base_type_u8 (void); -void babl_base_type_u16 (void); -void babl_base_type_u15 (void); -void babl_base_type_u32 (void); +void BABL_SIMD_SUFFIX(babl_base_type_half) (void); +void BABL_SIMD_SUFFIX(babl_base_type_float) (void); +void BABL_SIMD_SUFFIX(babl_base_type_u8) (void); +void BABL_SIMD_SUFFIX(babl_base_type_u16) (void); +void BABL_SIMD_SUFFIX(babl_base_type_u15) (void); +void BABL_SIMD_SUFFIX(babl_base_type_u32) (void); -void babl_base_model_pal (void); -void babl_base_model_rgb (void); -void babl_base_model_cmyk (void); -void babl_base_model_gray (void); -void babl_base_model_ycbcr (void); +void BABL_SIMD_SUFFIX(babl_base_model_rgb) (void); +void BABL_SIMD_SUFFIX(babl_base_model_cmyk) (void); +void BABL_SIMD_SUFFIX(babl_base_model_gray) (void); +void BABL_SIMD_SUFFIX(babl_base_model_ycbcr) (void); #endif diff --git a/babl/base/babl-rgb-converter.c b/babl/base/babl-rgb-converter.c new file mode 100644 index 0000000..e0ba7c3 --- /dev/null +++ b/babl/base/babl-rgb-converter.c @@ -0,0 +1,536 @@ +#include "config.h" +#include "babl-internal.h" +#include "base/util.h" +#include "babl-trc.h" +#include "babl-base.h" + +static void +prep_conversion (const Babl *babl) +{ + Babl *conversion = (void*) babl; + const Babl *source_space = babl_conversion_get_source_space (conversion); + float *matrixf; + unsigned int i; + float *lut_red; + float *lut_green; + float *lut_blue; + + double matrix[9]; + babl_matrix_mul_matrix ( + (conversion->conversion.destination)->format.space->space.XYZtoRGB, + (conversion->conversion.source)->format.space->space.RGBtoXYZ, + matrix); + + matrixf = babl_calloc (sizeof (float), 9 + 256 * 3); // we leak this matrix , which is a singleton + babl_matrix_to_float (matrix, matrixf); + conversion->conversion.data = matrixf; + + lut_red = matrixf + 9; + lut_green = lut_red + 256; + lut_blue = lut_green + 256; + for (i = 0; i < 256; i++) + { + lut_red[i] = babl_trc_to_linear (source_space->space.trc[0], i/255.0); + lut_green[i] = babl_trc_to_linear (source_space->space.trc[1], i/255.0); + lut_blue[i] = babl_trc_to_linear (source_space->space.trc[2], i/255.0); + } +} + +#define TRC_IN(rgba_in, rgba_out) do{ int i;\ + for (i = 0; i < samples; i++) \ + { \ + rgba_out[i*4+3] = rgba_in[i*4+3]; \ + } \ + if ((source_space->space.trc[0] == source_space->space.trc[1]) && \ + (source_space->space.trc[1] == source_space->space.trc[2])) \ + { \ + const Babl *trc = (void*)source_space->space.trc[0]; \ + babl_trc_to_linear_buf(trc, rgba_in, rgba_out, 4, 4, 3, samples); \ + } \ + else \ + { \ + unsigned int c; \ + for (c = 0; c < 3; c ++) \ + { \ + const Babl *trc = (void*)source_space->space.trc[c]; \ + babl_trc_to_linear_buf(trc, rgba_in + c, rgba_out + c, 4, 4, 1, samples); \ + } \ + } \ +}while(0) + +#define TRC_OUT(rgba_in, rgba_out) do{\ + { \ + if ((destination_space->space.trc[0] == destination_space->space.trc[1]) && \ + (destination_space->space.trc[1] == destination_space->space.trc[2])) \ + { \ + const Babl *trc = (void*)destination_space->space.trc[0]; \ + babl_trc_from_linear_buf(trc, rgba_in, rgba_out, 4, 4, 3, samples); \ + } \ + else \ + { \ + unsigned int c; \ + for (c = 0; c < 3; c ++) \ + { \ + const Babl *trc = (void*)destination_space->space.trc[c]; \ + babl_trc_from_linear_buf(trc, rgba_in + c, rgba_out + c, 4, 4, 1, samples); \ + } \ + } \ + }\ +} while(0) + + +static inline void +universal_nonlinear_rgba_converter (const Babl *conversion, + unsigned char *__restrict__ src_char, + unsigned char *__restrict__ dst_char, + long samples, + void *data) +{ + const Babl *source_space = babl_conversion_get_source_space (conversion); + const Babl *destination_space = babl_conversion_get_destination_space (conversion); + + float * matrixf = data; + float *rgba_in = (void*)src_char; + float *rgba_out = (void*)dst_char; + + TRC_IN(rgba_in, rgba_out); + + babl_matrix_mul_vectorff_buf4 (matrixf, rgba_out, rgba_out, samples); + + TRC_OUT(rgba_out, rgba_out); +} + +static inline void +universal_nonlinear_rgb_linear_converter (const Babl *conversion, + unsigned char *__restrict__ src_char, + unsigned char *__restrict__ dst_char, + long samples, + void *data) +{ + const Babl *source_space = babl_conversion_get_source_space (conversion); + float * matrixf = data; + float *rgba_in = (void*)src_char; + float *rgba_out = (void*)dst_char; + + TRC_IN(rgba_in, rgba_out); + + babl_matrix_mul_vectorff_buf4 (matrixf, rgba_out, rgba_out, samples); +} + +static inline void +universal_linear_rgb_nonlinear_converter (const Babl *conversion, + unsigned char *__restrict__ src_char, + unsigned char *__restrict__ dst_char, + long samples, + void *data) +{ + const Babl *destination_space = conversion->conversion.destination->format.space; + float * matrixf = data; + float *rgba_in = (void*)src_char; + float *rgba_out = (void*)dst_char; + + babl_matrix_mul_vectorff_buf4 (matrixf, rgba_in, rgba_out, samples); + + TRC_OUT(rgba_out, rgba_out); +} + +static inline void +universal_rgba_converter (const Babl *conversion, + unsigned char *__restrict__ src_char, + unsigned char *__restrict__ dst_char, + long samples, + void *data) +{ + float *matrixf = data; + float *rgba_in = (void*)src_char; + float *rgba_out = (void*)dst_char; + + babl_matrix_mul_vectorff_buf4 (matrixf, rgba_in, rgba_out, samples); +} + +static inline void +universal_rgb_converter (const Babl *conversion, + unsigned char *__restrict__ src_char, + unsigned char *__restrict__ dst_char, + long samples, + void *data) +{ + float *matrixf = data; + float *rgb_in = (void*)src_char; + float *rgb_out = (void*)dst_char; + + babl_matrix_mul_vectorff_buf3 (matrixf, rgb_in, rgb_out, samples); +} + + +static inline void +universal_ya_converter (const Babl *conversion, + unsigned char *__restrict__ src_char, + unsigned char *__restrict__ dst_char, + long samples, + void *data) +{ + memcpy (dst_char, src_char, samples * 4 * 2); +} + +static inline void +universal_y_converter (const Babl *conversion, + unsigned char *__restrict__ src_char, + unsigned char *__restrict__ dst_char, + long samples, + void *data) +{ + memcpy (dst_char, src_char, samples * 4); +} + + +static inline void +universal_nonlinear_rgb_u8_converter (const Babl *conversion, + unsigned char *__restrict__ src_char, + unsigned char *__restrict__ dst_char, + long samples, + void *data) +{ + const Babl *destination_space = conversion->conversion.destination->format.space; + + float * matrixf = data; + float * in_trc_lut_red = matrixf + 9; + float * in_trc_lut_green = in_trc_lut_red + 256; + float * in_trc_lut_blue = in_trc_lut_green + 256; + unsigned int i; + uint8_t *rgb_in_u8 = (void*)src_char; + uint8_t *rgb_out_u8 = (void*)dst_char; + + float rgba_out[4*samples]; + + for (i = 0; i < samples; i++) + { + rgba_out[i*4+0]=in_trc_lut_red[rgb_in_u8[i*3+0]]; + rgba_out[i*4+1]=in_trc_lut_green[rgb_in_u8[i*3+1]]; + rgba_out[i*4+2]=in_trc_lut_blue[rgb_in_u8[i*3+2]]; + rgba_out[i*4+3]=rgb_in_u8[i*3+2] * 255.0f; + } + + babl_matrix_mul_vectorff_buf4 (matrixf, rgba_out, rgba_out, samples); + + { + TRC_OUT(rgba_out, rgba_out); + + for (i = 0; i < samples; i++) + for (unsigned int c = 0; c < 3; c ++) + rgb_out_u8[i*3+c] = rgba_out[i*4+c] * 255.0f; + } + +} + + +#if defined(USE_SSE2) + +#define m(matr, j, i) matr[j*3+i] + +#include <emmintrin.h> + +static inline void babl_matrix_mul_vectorff_buf4_sse2 (const float *mat, + const float *v_in, + float *v_out, + unsigned int samples) +{ + const __v4sf m___0 = {m(mat, 0, 0), m(mat, 1, 0), m(mat, 2, 0), 0}; + const __v4sf m___1 = {m(mat, 0, 1), m(mat, 1, 1), m(mat, 2, 1), 0}; + const __v4sf m___2 = {m(mat, 0, 2), m(mat, 1, 2), m(mat, 2, 2), 1}; + unsigned int i; + for (i = 0; i < samples; i ++) + { + __v4sf a, b, c = _mm_load_ps(&v_in[0]); + a = (__v4sf) _mm_shuffle_epi32((__m128i)c, _MM_SHUFFLE(0,0,0,0)); + b = (__v4sf) _mm_shuffle_epi32((__m128i)c, _MM_SHUFFLE(1,1,1,1)); + c = (__v4sf) _mm_shuffle_epi32((__m128i)c, _MM_SHUFFLE(3,2,2,2)); + _mm_store_ps (v_out, m___0 * a + m___1 * b + m___2 * c); + v_out += 4; + v_in += 4; + } + _mm_empty (); +} + +#undef m + +static inline void +universal_nonlinear_rgba_converter_sse2 (const Babl *conversion, + unsigned char *__restrict__ src_char, + unsigned char *__restrict__ dst_char, + long samples, + void *data) +{ + const Babl *source_space = babl_conversion_get_source_space (conversion); + const Babl *destination_space = babl_conversion_get_destination_space (conversion); + float * matrixf = data; + float *rgba_in = (void*)src_char; + float *rgba_out = (void*)dst_char; + + TRC_IN(rgba_in, rgba_out); + + babl_matrix_mul_vectorff_buf4_sse2 (matrixf, rgba_out, rgba_out, samples); + + TRC_OUT(rgba_out, rgba_out); +} + + +static inline void +universal_rgba_converter_sse2 (const Babl *conversion, + unsigned char *__restrict__ src_char, + unsigned char *__restrict__ dst_char, + long samples, + void *data) +{ + float *matrixf = data; + float *rgba_in = (void*)src_char; + float *rgba_out = (void*)dst_char; + + babl_matrix_mul_vectorff_buf4_sse2 (matrixf, rgba_in, rgba_out, samples); +} + +static inline void +universal_nonlinear_rgb_u8_converter_sse2 (const Babl *conversion, + unsigned char *__restrict__ src_char, + unsigned char *__restrict__ dst_char, + long samples, + void *data) +{ + const Babl *destination_space = conversion->conversion.destination->format.space; + + float * matrixf = data; + float * in_trc_lut_red = matrixf + 9; + float * in_trc_lut_green = in_trc_lut_red + 256; + float * in_trc_lut_blue = in_trc_lut_green + 256; + unsigned int i; + uint8_t *rgb_in_u8 = (void*)src_char; + uint8_t *rgb_out_u8 = (void*)dst_char; + + // The alignment is necessary for SIMD intrinsics in babl_matrix_mul_vectorff_buf4_sse2() + float __attribute__ ((aligned (16))) rgba_out[4*samples]; + + for (i = 0; i < samples; i++) + { + rgba_out[i*4+0]=in_trc_lut_red[rgb_in_u8[i*3+0]]; + rgba_out[i*4+1]=in_trc_lut_green[rgb_in_u8[i*3+1]]; + rgba_out[i*4+2]=in_trc_lut_blue[rgb_in_u8[i*3+2]]; + } + + babl_matrix_mul_vectorff_buf4_sse2 (matrixf, rgba_out, rgba_out, samples); + + { + TRC_OUT(rgba_out, rgba_out); + + for (i = 0; i < samples; i++) + for (unsigned c = 0; c < 3; c ++) + rgb_out_u8[i*3+c] = rgba_out[i*4+c] * 255 + 0.5f; + } +} + + +static inline void +universal_nonlinear_rgb_linear_converter_sse2 (const Babl *conversion, + unsigned char *__restrict__ src_char, + unsigned char *__restrict__ dst_char, + long samples, + void *data) +{ + const Babl *source_space = babl_conversion_get_source_space (conversion); + float * matrixf = data; + float *rgba_in = (void*)src_char; + float *rgba_out = (void*)dst_char; + + TRC_IN(rgba_in, rgba_out); + + babl_matrix_mul_vectorff_buf4_sse2 (matrixf, rgba_out, rgba_out, samples); +} + + +static inline void +universal_linear_rgb_nonlinear_converter_sse2 (const Babl *conversion, + unsigned char *__restrict__ src_char, + unsigned char *__restrict__ dst_char, + long samples, + void *data) +{ + const Babl *destination_space = conversion->conversion.destination->format.space; + float * matrixf = data; + float *rgba_in = (void*)src_char; + float *rgba_out = (void*)dst_char; + + babl_matrix_mul_vectorff_buf4_sse2 (matrixf, rgba_in, rgba_out, samples); + + TRC_OUT(rgba_out, rgba_out); +} +#endif + + +static int +add_rgb_adapter (Babl *babl, + void *space) +{ + if (babl != space) + { + +#if defined(USE_SSE2) + if ((babl_cpu_accel_get_support () & BABL_CPU_ACCEL_X86_SSE) && + (babl_cpu_accel_get_support () & BABL_CPU_ACCEL_X86_SSE2)) + { + prep_conversion(babl_conversion_new( + babl_format_with_space("RGBA float", space), + babl_format_with_space("RGBA float", babl), + "linear", universal_rgba_converter_sse2, + NULL)); + prep_conversion(babl_conversion_new( + babl_format_with_space("RGBA float", babl), + babl_format_with_space("RGBA float", space), + "linear", universal_rgba_converter_sse2, + NULL)); + prep_conversion(babl_conversion_new( + babl_format_with_space("R'G'B'A float", space), + babl_format_with_space("R'G'B'A float", babl), + "linear", universal_nonlinear_rgba_converter_sse2, + NULL)); + prep_conversion(babl_conversion_new( + babl_format_with_space("R'G'B'A float", babl), + babl_format_with_space("R'G'B'A float", space), + "linear", universal_nonlinear_rgba_converter_sse2, + NULL)); + + prep_conversion(babl_conversion_new( + babl_format_with_space("R'G'B'A float", space), + babl_format_with_space("RGBA float", babl), + "linear", universal_nonlinear_rgb_linear_converter_sse2, + NULL)); + prep_conversion(babl_conversion_new( + babl_format_with_space("R'G'B'A float", babl), + babl_format_with_space("RGBA float", space), + "linear", universal_nonlinear_rgb_linear_converter_sse2, + NULL)); + + prep_conversion(babl_conversion_new( + babl_format_with_space("RGBA float", babl), + babl_format_with_space("R'G'B'A float", space), + "linear", universal_linear_rgb_nonlinear_converter_sse2, + NULL)); + prep_conversion(babl_conversion_new( + babl_format_with_space("RGBA float", space), + babl_format_with_space("R'G'B'A float", babl), + "linear", universal_linear_rgb_nonlinear_converter_sse2, + NULL)); + + prep_conversion(babl_conversion_new( + babl_format_with_space("R'G'B' u8", space), + babl_format_with_space("R'G'B' u8", babl), + "linear", universal_nonlinear_rgb_u8_converter_sse2, + NULL)); + prep_conversion(babl_conversion_new( + babl_format_with_space("R'G'B' u8", babl), + babl_format_with_space("R'G'B' u8", space), + "linear", universal_nonlinear_rgb_u8_converter_sse2, + NULL)); + } + else +#endif + { +#if 1 + prep_conversion(babl_conversion_new( + babl_format_with_space("RGBA float", space), + babl_format_with_space("RGBA float", babl), + "linear", universal_rgba_converter, + NULL)); + prep_conversion(babl_conversion_new( + babl_format_with_space("RGBA float", babl), + babl_format_with_space("RGBA float", space), + "linear", universal_rgba_converter, + NULL)); + + prep_conversion(babl_conversion_new( + babl_format_with_space("R'G'B'A float", space), + babl_format_with_space("R'G'B'A float", babl), + "linear", universal_nonlinear_rgba_converter, + NULL)); + prep_conversion(babl_conversion_new( + babl_format_with_space("R'G'B'A float", babl), + babl_format_with_space("R'G'B'A float", space), + "linear", universal_nonlinear_rgba_converter, + NULL)); +#endif +#if 1 + prep_conversion(babl_conversion_new( + babl_format_with_space("R'G'B'A float", space), + babl_format_with_space("RGBA float", babl), + "linear", universal_nonlinear_rgb_linear_converter, + NULL)); + prep_conversion(babl_conversion_new( + babl_format_with_space("R'G'B'A float", babl), + babl_format_with_space("RGBA float", space), + "linear", universal_nonlinear_rgb_linear_converter, + NULL)); +#endif + +#if 1 + prep_conversion(babl_conversion_new( + babl_format_with_space("R'G'B' u8", space), + babl_format_with_space("R'G'B' u8", babl), + "linear", universal_nonlinear_rgb_u8_converter, + NULL)); + prep_conversion(babl_conversion_new( + babl_format_with_space("R'G'B' u8", babl), + babl_format_with_space("R'G'B' u8", space), + "linear", universal_nonlinear_rgb_u8_converter, + NULL)); + + prep_conversion(babl_conversion_new( + babl_format_with_space("RGBA float", babl), + babl_format_with_space("R'G'B'A float", space), + "linear", universal_linear_rgb_nonlinear_converter, + NULL)); + prep_conversion(babl_conversion_new( + babl_format_with_space("RGBA float", space), + babl_format_with_space("R'G'B'A float", babl), + "linear", universal_linear_rgb_nonlinear_converter, + NULL)); +#endif + } + prep_conversion(babl_conversion_new( + babl_format_with_space("RGB float", space), + babl_format_with_space("RGB float", babl), + "linear", universal_rgb_converter, + NULL)); + prep_conversion(babl_conversion_new( + babl_format_with_space("RGB float", babl), + babl_format_with_space("RGB float", space), + "linear", universal_rgb_converter, + NULL)); + prep_conversion(babl_conversion_new( + babl_format_with_space("Y float", space), + babl_format_with_space("Y float", babl), + "linear", universal_y_converter, + NULL)); + prep_conversion(babl_conversion_new( + babl_format_with_space("YaA float", babl), + babl_format_with_space("YaA float", space), + "linear", universal_ya_converter, + NULL)); + prep_conversion(babl_conversion_new( + babl_format_with_space("YA float", babl), + babl_format_with_space("YA float", space), + "linear", universal_ya_converter, + NULL)); + } + return 0; +} + +/* The first time a new Babl space is used - for creation of a fish, is when + * this function is called, it adds conversions hooks that provides its formats + * with conversions internally as well as for conversions to and from other RGB + * spaces. + */ +void +BABL_SIMD_SUFFIX(_babl_space_add_universal_rgb) (const Babl *space); +void +BABL_SIMD_SUFFIX(_babl_space_add_universal_rgb) (const Babl *space) +{ + babl_space_class_for_each (add_rgb_adapter, (void*)space); +} diff --git a/babl/base/babl-trc.c b/babl/base/babl-trc.c new file mode 100644 index 0000000..09beb07 --- /dev/null +++ b/babl/base/babl-trc.c @@ -0,0 +1,610 @@ +/* babl - dynamically extendable universal pixel conversion library. + * Copyright (C) 2017 Øyvind Kolås. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 3 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General + * Public License along with this library; if not, see + * <https://www.gnu.org/licenses/>. + */ + +#define MAX_TRCS 100 + +/* FIXME: choose parameters more intelligently */ +#define POLY_GAMMA_X0 ( 0.5 / 255.0) +#define POLY_GAMMA_X1 (254.5 / 255.0) +#define POLY_GAMMA_DEGREE 6 +#define POLY_GAMMA_SCALE 2 + +#include "config.h" +#include "babl-internal.h" +#include "babl-base.h" +#include "base/util.h" + +static BablTRC trc_db[MAX_TRCS]; + +static inline float +_babl_trc_linear (const Babl *trc_, + float value) +{ + return value; +} + +static inline float +babl_trc_lut_from_linear (const Babl *trc_, + float x) +{ + BablTRC *trc = (void*)trc_; + int entry; + float ret, diff; + + entry = x * (trc->lut_size-1); + diff = ( (x * (trc->lut_size-1)) - entry); + + if (entry >= trc->lut_size -1) + { + entry = trc->lut_size - 1; + diff = 0.0f; + } + else if (entry < 0) entry = 0; + + if (diff > 0.0f) + { + ret = trc->inv_lut[entry] * (1.0f - diff) + trc->inv_lut[entry+1] * diff; + } + else + { + ret = trc->inv_lut[entry]; + } + return ret; +} + +static inline float +babl_trc_lut_to_linear (const Babl *trc_, + float x) +{ + BablTRC *trc = (void*)trc_; + int entry; + float ret, diff; + + entry = x * (trc->lut_size-1); + diff = ( (x * (trc->lut_size-1)) - entry); + + if (entry >= trc->lut_size) entry = trc->lut_size - 1; + else if (entry < 0) entry = 0; + + if (diff > 0.0f && entry < trc->lut_size - 1) + { + ret = trc->lut[entry] * (1.0f - diff) + trc->lut[entry+1] * diff; + } + else + { + ret = trc->lut[entry]; + } + return ret; +} + +static inline float +_babl_trc_gamma_to_linear (const Babl *trc_, + float value) +{ + BablTRC *trc = (void*)trc_; + if (value >= trc->poly_gamma_to_linear_x0 && + value <= trc->poly_gamma_to_linear_x1) + { + return babl_polynomial_eval (&trc->poly_gamma_to_linear, value); + } + else if (value > 0.0f) + { + return powf (value, trc->gamma); + } + return 0.0f; +} + +static inline float +_babl_trc_gamma_from_linear (const Babl *trc_, + float value) +{ + BablTRC *trc = (void*)trc_; + if (value >= trc->poly_gamma_from_linear_x0 && + value <= trc->poly_gamma_from_linear_x1) + { + return babl_polynomial_eval (&trc->poly_gamma_from_linear, value); + } + else if (value > 0.0f) + { + return powf (value, trc->rgamma); + } + return 0.0f; +} + +static inline void +_babl_trc_gamma_to_linear_buf (const Babl *trc_, + const float *__restrict__ in, + float *__restrict__ out, + int in_gap, + int out_gap, + int components, + int count) +{ + if (in_gap == out_gap && in_gap == 4 && components == 3) + { + for (int i = 0; i < count; i ++) + for (int c = 0; c < 3; c ++) + out[4 * i + c] = _babl_trc_gamma_to_linear (trc_, in[4 *i + c]); + } + else + { + for (int i = 0; i < count; i ++) + for (int c = 0; c < components; c ++) + out[out_gap * i + c] = _babl_trc_gamma_to_linear (trc_, in[in_gap *i + c]); + } +} + +static inline void +_babl_trc_gamma_from_linear_buf (const Babl *trc_, + const float *__restrict__ in, + float *__restrict__ out, + int in_gap, + int out_gap, + int components, + int count) +{ + if (in_gap == out_gap && in_gap == 4 && components == 3) + { + for (int i = 0; i < count; i ++) + for (int c = 0; c < 3; c ++) + out[4 * i + c] = _babl_trc_gamma_from_linear (trc_, in[4 *i + c]); + } + else + { + for (int i = 0; i < count; i ++) + for (int c = 0; c < components; c ++) + out[out_gap * i + c] = _babl_trc_gamma_from_linear (trc_, in[in_gap *i + c]); + } +} + +static inline float +_babl_trc_formula_srgb_from_linear (const Babl *trc_, + float value) +{ + BablTRC *trc = (void*)trc_; + float x= value; + float a = trc->lut[1]; + float b = trc->lut[2]; + float c = trc->lut[3]; + float d = trc->lut[4]; + float e = trc->lut[5]; + float f = trc->lut[6]; + + if (x - f > c * d) // XXX: verify that this math is the correct inverse + { + float v = _babl_trc_gamma_from_linear ((Babl *) trc, x - f); + v = (v-b)/a; + if (v < 0.0f || v >= 0.0f) + return v; + return 0.0f; + } + if (c > 0.0f) + return (x - e) / c; + return 0.0f; +} + +static inline float +_babl_trc_formula_srgb_to_linear (const Babl *trc_, + float value) +{ + BablTRC *trc = (void*)trc_; + float x= value; + float a = trc->lut[1]; + float b = trc->lut[2]; + float c = trc->lut[3]; + float d = trc->lut[4]; + float e = trc->lut[5]; + float f = trc->lut[6]; + + if (x >= d) // OPT can be reduced to be branchless + { + return _babl_trc_gamma_to_linear ((Babl *) trc, a * x + b) + e; + } + return c * x + f; +} +static inline float +_babl_trc_formula_cie_from_linear (const Babl *trc_, + float value) +{ + BablTRC *trc = (void*)trc_; + float x= value; + float a = trc->lut[1]; + float b = trc->lut[2]; + float c = trc->lut[3]; + + if (x > c) + { + float v = _babl_trc_gamma_from_linear ((Babl *) trc, x - c); + v = (v-b)/a; + if (v < 0.0f || v >= 0.0f) + return v; + } + return 0.0f; +} + +static inline float +_babl_trc_formula_cie_to_linear (const Babl *trc_, + float value) +{ + BablTRC *trc = (void*)trc_; + float x= value; + float a = trc->lut[1]; + float b = trc->lut[2]; + float c = trc->lut[3]; + + if (x >= -b / a) + { + return _babl_trc_gamma_to_linear ((Babl *) trc, a * x + b) + c; + } + return c; +} + + + +static inline float +_babl_trc_srgb_to_linear (const Babl *trc_, + float value) +{ + return babl_gamma_2_2_to_linearf (value); +} + +static inline float +_babl_trc_srgb_from_linear (const Babl *trc_, + float value) +{ + return babl_linear_to_gamma_2_2f (value); +} + +static inline void +_babl_trc_srgb_to_linear_buf (const Babl *trc_, + const float *in, + float *out, + int in_gap, + int out_gap, + int components, + int count) +{ + if (in_gap == out_gap && in_gap == 4 && components == 3) + { + for (int i = 0; i < count; i ++) + for (int c = 0; c < 3; c++) + out[4 * i + c] = babl_gamma_2_2_to_linearf (in[4 * i + c]); + } + else + { + for (int i = 0; i < count; i ++) + for (int c = 0; c < components; c++) + out[out_gap * i + c] = babl_gamma_2_2_to_linearf (in[in_gap * i + c]); + } +} + +static inline void +_babl_trc_srgb_from_linear_buf (const Babl *trc_, + const float *in, + float *out, + int in_gap, + int out_gap, + int components, + int count) +{ + if (in_gap == out_gap && in_gap == 4 && components == 3) + { + for (int i = 0; i < count; i ++) + for (int c = 0; c < 3; c++) + out[4 * i + c] = babl_linear_to_gamma_2_2f (in[4 * i + c]); + } + else + { + for (int i = 0; i < count; i ++) + for (int c = 0; c < components; c++) + out[out_gap * i + c] = babl_linear_to_gamma_2_2f (in[in_gap * i + c]); + } +} + +static inline void +_babl_trc_to_linear_buf_generic (const Babl *trc_, + const float *__restrict__ in, + float *__restrict__ out, + int in_gap, + int out_gap, + int components, + int count) +{ + BablTRC *trc = (void*)trc_; + if (in_gap == out_gap && in_gap == 4 && components == 3) + { + for (int i = 0; i < count; i ++) + for (int c = 0; c < 3; c ++) + out[4 * i + c] = trc->fun_to_linear (trc_, in[4 * i + c]); + } + else + { + for (int i = 0; i < count; i ++) + for (int c = 0; c < components; c ++) + out[out_gap * i + c] = trc->fun_to_linear (trc_, in[in_gap * i + c]); + } +} + +static inline void +_babl_trc_from_linear_buf_generic (const Babl *trc_, + const float *__restrict__ in, + float *__restrict__ out, + int in_gap, + int out_gap, + int components, + int count) +{ + BablTRC *trc = (void*)trc_; + if (in_gap == out_gap && in_gap == 4 && components == 3) + { + for (int i = 0; i < count; i ++) + for (int c = 0; c < 3; c ++) + out[4 * i + c] = trc->fun_from_linear (trc_, in[4 * i + c]); + } + else + { + for (int i = 0; i < count; i ++) + for (int c = 0; c < components; c ++) + out[out_gap * i + c] = trc->fun_from_linear (trc_, in[in_gap * i + c]); + } +} + + + +static inline void _babl_trc_linear_buf (const Babl *trc_, + const float *__restrict__ in, + float *__restrict__ out, + int in_gap, + int out_gap, + int components, + int count) +{ + if (in_gap == out_gap && in_gap == 4 && components == 3) + { + for (int i = 0; i < count; i ++) + for (int c = 0; c < 3; c ++) + out[i * 4 + c] = in[i * 4 + c]; + } + else + { + for (int i = 0; i < count; i ++) + for (int c = 0; c < components; c ++) + out[i * out_gap + c] = in[i * in_gap + c]; + } +} + +const Babl * +BABL_SIMD_SUFFIX (babl_trc_lookup_by_name) (const char *name); + +const Babl * +BABL_SIMD_SUFFIX (babl_trc_lookup_by_name) (const char *name) +{ + int i; + for (i = 0; trc_db[i].instance.class_type; i++) + if (!strcmp (trc_db[i].instance.name, name)) + { + return (Babl*)&trc_db[i]; + } + babl_log("failed to find trc '%s'\n", name); + return NULL; +} + +const Babl * +BABL_SIMD_SUFFIX (babl_trc_new) (const char *name, + BablTRCType type, + double gamma, + int n_lut, + float *lut); + +const Babl * +BABL_SIMD_SUFFIX (babl_trc_new) (const char *name, + BablTRCType type, + double gamma, + int n_lut, + float *lut) +{ + int i=0; + static BablTRC trc; + trc.instance.class_type = BABL_TRC; + trc.instance.id = 0; + trc.type = type; + trc.gamma = gamma > 0.0 ? gamma : 0.0; + trc.rgamma = gamma > 0.0001 ? 1.0 / gamma : 0.0; + + if (n_lut ) + { + for (i = 0; trc_db[i].instance.class_type; i++) + { + if ( trc_db[i].lut_size == n_lut && + (memcmp (trc_db[i].lut, lut, sizeof (float) * n_lut)==0) + ) + { + return (void*)&trc_db[i]; + } + } + } + else + for (i = 0; trc_db[i].instance.class_type; i++) + { + int offset = ((char*)&trc_db[i].type) - (char*)(&trc_db[i]); + int size = ((char*)&trc_db[i].gamma + sizeof(double)) - ((char*)&trc_db[i].type); + + if (memcmp ((char*)(&trc_db[i]) + offset, ((char*)&trc) + offset, size)==0) + { + return (void*)&trc_db[i]; + } + } + if (i >= MAX_TRCS-1) + { + babl_log ("too many BablTRCs"); + return NULL; + } + trc_db[i]=trc; + trc_db[i].instance.name = trc_db[i].name; + if (name) + snprintf (trc_db[i].name, sizeof (trc_db[i].name), "%s", name); + else if (n_lut) + snprintf (trc_db[i].name, sizeof (trc_db[i].name), "lut-trc"); + else + snprintf (trc_db[i].name, sizeof (trc_db[i].name), "trc-%i-%f", type, gamma); + + if (n_lut) + { + int j; + trc_db[i].lut_size = n_lut; + trc_db[i].lut = babl_calloc (sizeof (float), n_lut); + memcpy (trc_db[i].lut, lut, sizeof (float) * n_lut); + trc_db[i].inv_lut = babl_calloc (sizeof (float), n_lut); + + for (j = 0; j < n_lut; j++) + { + int k; + double min = 0.0; + double max = 1.0; + for (k = 0; k < 16; k++) + { + double guess = (min + max) / 2; + float reversed_index = babl_trc_lut_to_linear (BABL(&trc_db[i]), guess) * (n_lut-1.0f); + + if (reversed_index < j) + { + min = guess; + } + else if (reversed_index > j) + { + max = guess; + } + } + trc_db[i].inv_lut[j] = (min + max) / 2; + } + } + + trc_db[i].fun_to_linear_buf = _babl_trc_to_linear_buf_generic; + trc_db[i].fun_from_linear_buf = _babl_trc_from_linear_buf_generic; + + switch (trc_db[i].type) + { + case BABL_TRC_LINEAR: + trc_db[i].fun_to_linear = _babl_trc_linear; + trc_db[i].fun_from_linear = _babl_trc_linear; + trc_db[i].fun_from_linear_buf = _babl_trc_linear_buf; + trc_db[i].fun_to_linear_buf = _babl_trc_linear_buf; + break; + case BABL_TRC_FORMULA_GAMMA: + trc_db[i].fun_to_linear = _babl_trc_gamma_to_linear; + trc_db[i].fun_from_linear = _babl_trc_gamma_from_linear; + trc_db[i].fun_to_linear_buf = _babl_trc_gamma_to_linear_buf; + trc_db[i].fun_from_linear_buf = _babl_trc_gamma_from_linear_buf; + + trc_db[i].poly_gamma_to_linear_x0 = POLY_GAMMA_X0; + trc_db[i].poly_gamma_to_linear_x1 = POLY_GAMMA_X1; + babl_polynomial_approximate_gamma (&trc_db[i].poly_gamma_to_linear, + trc_db[i].gamma, + trc_db[i].poly_gamma_to_linear_x0, + trc_db[i].poly_gamma_to_linear_x1, + POLY_GAMMA_DEGREE, POLY_GAMMA_SCALE); + + trc_db[i].poly_gamma_from_linear_x0 = POLY_GAMMA_X0; + trc_db[i].poly_gamma_from_linear_x1 = POLY_GAMMA_X1; + babl_polynomial_approximate_gamma (&trc_db[i].poly_gamma_from_linear, + trc_db[i].rgamma, + trc_db[i].poly_gamma_from_linear_x0, + trc_db[i].poly_gamma_from_linear_x1, + POLY_GAMMA_DEGREE, POLY_GAMMA_SCALE); + break; + case BABL_TRC_FORMULA_CIE: + trc_db[i].lut = babl_calloc (sizeof (float), 4); + { + int j; + for (j = 0; j < 4; j++) + trc_db[i].lut[j] = lut[j]; + } + trc_db[i].fun_to_linear = _babl_trc_formula_cie_to_linear; + trc_db[i].fun_from_linear = _babl_trc_formula_cie_from_linear; + + trc_db[i].poly_gamma_to_linear_x0 = lut[4]; + trc_db[i].poly_gamma_to_linear_x1 = POLY_GAMMA_X1; + babl_polynomial_approximate_gamma (&trc_db[i].poly_gamma_to_linear, + trc_db[i].gamma, + trc_db[i].poly_gamma_to_linear_x0, + trc_db[i].poly_gamma_to_linear_x1, + POLY_GAMMA_DEGREE, POLY_GAMMA_SCALE); + + trc_db[i].poly_gamma_from_linear_x0 = lut[3] * lut[4]; + trc_db[i].poly_gamma_from_linear_x1 = POLY_GAMMA_X1; + babl_polynomial_approximate_gamma (&trc_db[i].poly_gamma_from_linear, + trc_db[i].rgamma, + trc_db[i].poly_gamma_from_linear_x0, + trc_db[i].poly_gamma_from_linear_x1, + POLY_GAMMA_DEGREE, POLY_GAMMA_SCALE); + break; + + case BABL_TRC_FORMULA_SRGB: + trc_db[i].lut = babl_calloc (sizeof (float), 7); + { + int j; + for (j = 0; j < 7; j++) + trc_db[i].lut[j] = lut[j]; + } + trc_db[i].fun_to_linear = _babl_trc_formula_srgb_to_linear; + trc_db[i].fun_from_linear = _babl_trc_formula_srgb_from_linear; + + trc_db[i].poly_gamma_to_linear_x0 = lut[4]; + trc_db[i].poly_gamma_to_linear_x1 = POLY_GAMMA_X1; + babl_polynomial_approximate_gamma (&trc_db[i].poly_gamma_to_linear, + trc_db[i].gamma, + trc_db[i].poly_gamma_to_linear_x0, + trc_db[i].poly_gamma_to_linear_x1, + POLY_GAMMA_DEGREE, POLY_GAMMA_SCALE); + + trc_db[i].poly_gamma_from_linear_x0 = lut[3] * lut[4]; + trc_db[i].poly_gamma_from_linear_x1 = POLY_GAMMA_X1; + babl_polynomial_approximate_gamma (&trc_db[i].poly_gamma_from_linear, + trc_db[i].rgamma, + trc_db[i].poly_gamma_from_linear_x0, + trc_db[i].poly_gamma_from_linear_x1, + POLY_GAMMA_DEGREE, POLY_GAMMA_SCALE); + break; + case BABL_TRC_SRGB: + trc_db[i].fun_to_linear = _babl_trc_srgb_to_linear; + trc_db[i].fun_from_linear = _babl_trc_srgb_from_linear; + trc_db[i].fun_from_linear_buf = _babl_trc_srgb_from_linear_buf; + trc_db[i].fun_to_linear_buf = _babl_trc_srgb_to_linear_buf; + break; + case BABL_TRC_LUT: + trc_db[i].fun_to_linear = babl_trc_lut_to_linear; + trc_db[i].fun_from_linear = babl_trc_lut_from_linear; + break; + } + return (Babl*)&trc_db[i]; +} + +void +BABL_SIMD_SUFFIX(babl_trc_class_for_each) (BablEachFunction each_fun, + void *user_data); + +void +BABL_SIMD_SUFFIX(babl_trc_class_for_each) (BablEachFunction each_fun, + void *user_data) +{ + int i=0; + for (i = 0; trc_db[i].instance.class_type; i++) + if (each_fun (BABL (&trc_db[i]), user_data)) + return; +} + diff --git a/babl/base/babl-trc.h b/babl/base/babl-trc.h new file mode 100644 index 0000000..1901fd2 --- /dev/null +++ b/babl/base/babl-trc.h @@ -0,0 +1,110 @@ +/* babl - dynamically extendable universal pixel conversion library. + * Copyright (C) 2017, Øyvind Kolås and others. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 3 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General + * Public License along with this library; if not, see + * <https://www.gnu.org/licenses/>. + */ + +#ifndef _BABL_TRC_H +#define _BABL_TRC_H + +#include <math.h> +#include <string.h> +#include "base/util.h" +#include "babl-polynomial.h" + +BABL_CLASS_DECLARE (trc); + +typedef enum {BABL_TRC_LINEAR, + BABL_TRC_FORMULA_GAMMA, + BABL_TRC_SRGB, + BABL_TRC_FORMULA_SRGB, + BABL_TRC_LUT, + BABL_TRC_FORMULA_CIE} +BablTRCType; + +typedef struct +{ + BablInstance instance; + BablTRCType type; + int lut_size; + double gamma; + float rgamma; + float (*fun_to_linear)(const Babl *trc, float val); + float (*fun_from_linear)(const Babl *trc, float val); + + void (*fun_to_linear_buf)(const Babl *trc, + const float *in, + float *out, + int in_gap, + int out_gap, + int components, + int count); + void (*fun_from_linear_buf)(const Babl *trc, + const float *in, + float *out, + int in_gap, + int out_gap, + int components, + int count); + BablPolynomial poly_gamma_to_linear; + float poly_gamma_to_linear_x0; + float poly_gamma_to_linear_x1; + BablPolynomial poly_gamma_from_linear; + float poly_gamma_from_linear_x0; + float poly_gamma_from_linear_x1; + float *lut; + float *inv_lut; + char name[128]; + int valid_u8_lut; + float u8_lut[256]; +} BablTRC; + +static inline void babl_trc_from_linear_buf (const Babl *trc_, + const float *in, float *out, + int in_gap, int out_gap, + int components, + int count) +{ + BablTRC *trc = (void*)trc_; + trc->fun_from_linear_buf (trc_, in, out, in_gap, out_gap, components, count); +} + +static inline void babl_trc_to_linear_buf (const Babl *trc_, + const float *in, float *out, + int in_gap, int out_gap, + int components, + int count) +{ + BablTRC *trc = (void*)trc_; + trc->fun_to_linear_buf (trc_, in, out, in_gap, out_gap, components, count); +} + +static inline float babl_trc_from_linear (const Babl *trc_, float value) +{ + BablTRC *trc = (void*)trc_; + return trc->fun_from_linear (trc_, value); +} + +static inline float babl_trc_to_linear (const Babl *trc_, float value) +{ + BablTRC *trc = (void*)trc_; + return trc->fun_to_linear (trc_, value); +} + +void +babl_trc_class_init_generic (void); + + +#endif diff --git a/babl/base/formats.c b/babl/base/formats.c index bad9d14..03488c4 100644 --- a/babl/base/formats.c +++ b/babl/base/formats.c @@ -25,7 +25,7 @@ #include "babl-base.h" void -babl_formats_init (void) +BABL_SIMD_SUFFIX (babl_formats_init) (void) { const Babl *types[]={ babl_type_from_id (BABL_DOUBLE), @@ -35,7 +35,7 @@ babl_formats_init (void) babl_type_from_id (BABL_U16), babl_type_from_id (BABL_U32) }; - for (int i = 0; i < sizeof (types)/sizeof(types[0]);i++) + for (size_t i = 0; i < sizeof (types) / sizeof(types[0]); i++) { const Babl *type = types[i]; diff --git a/babl/base/meson.build b/babl/base/meson.build index a78fd84..e59609c 100644 --- a/babl/base/meson.build +++ b/babl/base/meson.build @@ -14,10 +14,43 @@ babl_base_sources = [ 'type-u16.c', 'type-u32.c', 'type-u8.c', + 'babl-trc.c', + 'babl-rgb-converter.c', ] babl_base = static_library('babl_base', babl_base_sources, include_directories: [rootInclude, bablInclude], dependencies: [math, lcms], -) + c_args: common_c_flags + [sse2_cflags] +) + +if host_cpu_family == 'x86_64' + + babl_base_x86_64_v2 = static_library('babl_base-x86-64-v2', + babl_base_sources, + include_directories: [rootInclude, bablInclude], + dependencies: [math, lcms], + c_args: common_c_flags + x86_64_v2_flags + ) + + babl_base_x86_64_v3 = static_library('babl_base-x86-64-v3', + babl_base_sources, + include_directories: [rootInclude, bablInclude], + dependencies: [math, lcms], + c_args: common_c_flags + x86_64_v3_flags + ) + +endif + + +if host_cpu_family == 'arm' + + babl_base_arm_neon = static_library('babl_base-arm-neon', + babl_base_sources, + include_directories: [rootInclude, bablInclude], + dependencies: [math, lcms], + c_args: common_c_flags + arm_neon_flags + ) + +endif diff --git a/babl/base/model-cmyk.c b/babl/base/model-cmyk.c index 13fdedf..1fa02be 100644 --- a/babl/base/model-cmyk.c +++ b/babl/base/model-cmyk.c @@ -613,7 +613,7 @@ cmy_to_rgba (const Babl *conversion, #endif void -babl_base_model_cmyk (void) +BABL_SIMD_SUFFIX (babl_base_model_cmyk) (void) { babl_component_new ("cyan", NULL); babl_component_new ("yellow", NULL); diff --git a/babl/base/model-gray.c b/babl/base/model-gray.c index 3862400..7441baa 100644 --- a/babl/base/model-gray.c +++ b/babl/base/model-gray.c @@ -31,7 +31,7 @@ static void formats (void); static void init_single_precision (void); void -babl_base_model_gray (void) +BABL_SIMD_SUFFIX (babl_base_model_gray) (void) { components (); models (); @@ -90,7 +90,6 @@ models (void) "linear", NULL); - babl_model_new ( "id", BABL_GRAY_ALPHA, babl_component_from_id (BABL_GRAY_LINEAR), diff --git a/babl/base/model-rgb.c b/babl/base/model-rgb.c index a3064ef..824665a 100644 --- a/babl/base/model-rgb.c +++ b/babl/base/model-rgb.c @@ -32,7 +32,7 @@ static void formats (void); static void init_single_precision (void); void -babl_base_model_rgb (void) +BABL_SIMD_SUFFIX (babl_base_model_rgb) (void) { components (); models (); diff --git a/babl/base/model-ycbcr.c b/babl/base/model-ycbcr.c index 64db6a2..e061298 100644 --- a/babl/base/model-ycbcr.c +++ b/babl/base/model-ycbcr.c @@ -34,7 +34,7 @@ static void conversions (void); static void formats (void); void -babl_base_model_ycbcr (void) +BABL_SIMD_SUFFIX (babl_base_model_ycbcr) (void) { components (); models (); diff --git a/babl/base/pow-24.h b/babl/base/pow-24.h index ecd1282..98e2374 100644 --- a/babl/base/pow-24.h +++ b/babl/base/pow-24.h @@ -98,7 +98,7 @@ static inline float babl_frexpf(float x, int *e) if (!ee) { if (x) { - x = babl_frexpf(x*18446744073709551616.0, e); + x = babl_frexpf(x*18446744073709551616.0f, e); *e -= 64; } else *e = 0; return x; @@ -130,11 +130,12 @@ static inline float babl_frexpf(float x, int *e) static inline float init_newtonf (float x, float exponent, float c0, float c1, float c2) { +#define fM_LN2 0.69314718055994530942f int iexp = 0; float y = babl_frexpf(x, &iexp); y = 2*y+(iexp-2); - c1 *= M_LN2*exponent; - c2 *= M_LN2*M_LN2*exponent*exponent; + c1 *= fM_LN2*exponent; + c2 *= fM_LN2*fM_LN2*exponent*exponent; return y = c0 + c1*y + c2*y*y; } diff --git a/babl/base/type-float.c b/babl/base/type-float.c index 5b03b3f..9517831 100644 --- a/babl/base/type-float.c +++ b/babl/base/type-float.c @@ -83,7 +83,7 @@ convert_float_float (const Babl *babl, void -babl_base_type_float (void) +BABL_SIMD_SUFFIX (babl_base_type_float) (void) { babl_type_new ( "float", diff --git a/babl/base/type-half.c b/babl/base/type-half.c index 862d662..a146185 100644 --- a/babl/base/type-half.c +++ b/babl/base/type-half.c @@ -395,7 +395,7 @@ convert_half_float (BablConversion *conversion, void -babl_base_type_half (void) +BABL_SIMD_SUFFIX (babl_base_type_half) (void) { babl_type_new ( "half", diff --git a/babl/base/type-u15.c b/babl/base/type-u15.c index ea35453..7224c63 100644 --- a/babl/base/type-u15.c +++ b/babl/base/type-u15.c @@ -198,7 +198,7 @@ convert_u15_float_scaled (BablConversion *conversion, MAKE_CONVERSIONS_float (u15, 0.0, 1.0, 0, (1<<15)) void -babl_base_type_u15 (void) +BABL_SIMD_SUFFIX (babl_base_type_u15) (void) { babl_hmpf_on_name_lookups--; babl_type_new ( diff --git a/babl/base/type-u16.c b/babl/base/type-u16.c index c5a41dc..e7ab936 100644 --- a/babl/base/type-u16.c +++ b/babl/base/type-u16.c @@ -196,7 +196,7 @@ MAKE_CONVERSIONS_float (u16, 0.0, 1.0, 0, UINT16_MAX) void -babl_base_type_u16 (void) +BABL_SIMD_SUFFIX (babl_base_type_u16) (void) { babl_type_new ( "u16", diff --git a/babl/base/type-u32.c b/babl/base/type-u32.c index 48b1506..7d07ff1 100644 --- a/babl/base/type-u32.c +++ b/babl/base/type-u32.c @@ -69,7 +69,7 @@ convert_u32_double_scaled (BablConversion *c, { while (n--) { - int u32val = *(uint32_t *) src; + uint32_t u32val = *(uint32_t *) src; double dval; if (u32val < min) @@ -154,7 +154,7 @@ convert_u32_float_scaled (BablConversion *c, { while (n--) { - int u32val = *(uint32_t *) src; + uint32_t u32val = *(uint32_t *) src; float dval; if (u32val < min) @@ -196,7 +196,7 @@ MAKE_CONVERSIONS_float(u32, 0.0, 1.0, 0, UINT32_MAX) void -babl_base_type_u32 (void) +BABL_SIMD_SUFFIX (babl_base_type_u32) (void) { babl_type_new ( "u32", diff --git a/babl/base/type-u8.c b/babl/base/type-u8.c index d41d5e0..9abbf67 100644 --- a/babl/base/type-u8.c +++ b/babl/base/type-u8.c @@ -202,7 +202,7 @@ MAKE_CONVERSIONS_float (u8_chroma, -0.5, 0.5, 16, 240) void -babl_base_type_u8 (void) +BABL_SIMD_SUFFIX (babl_base_type_u8) (void) { babl_type_new ( "u8", diff --git a/babl/base/util.h b/babl/base/util.h index aba9c61..0d50363 100644 --- a/babl/base/util.h +++ b/babl/base/util.h @@ -50,23 +50,17 @@ static inline double babl_epsilon_for_zero (double value) { - if (value <= BABL_ALPHA_FLOOR && - value >= -BABL_ALPHA_FLOOR) - { - return BABL_ALPHA_FLOOR; - } - return value; + return value * (value > BABL_ALPHA_FLOOR || value < -BABL_ALPHA_FLOOR) + + BABL_ALPHA_FLOOR * (value <= BABL_ALPHA_FLOOR && + value >= -BABL_ALPHA_FLOOR); } static inline float babl_epsilon_for_zero_float (float value) { - if (value <= BABL_ALPHA_FLOOR_F && - value >= -BABL_ALPHA_FLOOR_F) - { - return BABL_ALPHA_FLOOR_F; - } - return value; + return value * (value > BABL_ALPHA_FLOOR_F || value < -BABL_ALPHA_FLOOR_F) + + BABL_ALPHA_FLOOR_F * (value <= BABL_ALPHA_FLOOR_F && + value >= -BABL_ALPHA_FLOOR_F); } |