diff options
-rw-r--r-- | babl/base/babl-rgb-converter.c | 117 | ||||
-rw-r--r-- | babl/base/babl-trc.c | 9 | ||||
-rw-r--r-- | babl/base/babl-trc.h | 98 |
3 files changed, 167 insertions, 57 deletions
diff --git a/babl/base/babl-rgb-converter.c b/babl/base/babl-rgb-converter.c index 0f7e2fb..be0159a 100644 --- a/babl/base/babl-rgb-converter.c +++ b/babl/base/babl-rgb-converter.c @@ -3,8 +3,6 @@ #include "base/util.h" #include "babl-trc.h" #include "babl-base.h" -/////////////////// - static void prep_conversion (const Babl *babl) @@ -60,9 +58,26 @@ prep_conversion (const Babl *babl) } \ }while(0) -#define TRC_OUT(rgba_in, rgba_out) do{\ +#define TRC_IN_u8(rgba_in, rgba_out) do{ \ + if ((source_space->space.trc[0] == source_space->space.trc[1]) && \ + (source_space->space.trc[1] == source_space->space.trc[2])) \ + { \ + const Babl *trc = (void*)source_space->space.trc[0]; \ + _babl_trc_to_linear_buf_u8_generic(trc, rgba_in, rgba_out, 4, 4, 3, samples); \ + } \ + else \ { \ unsigned int c; \ + for (c = 0; c < 3; c ++) \ + { \ + const Babl *trc = (void*)source_space->space.trc[c]; \ + _babl_trc_to_linear_buf_u8_generic (trc, rgba_in + c, rgba_out + c, 4, 4, 1, samples); \ + } \ + } \ +}while(0) + +#define TRC_OUT(rgba_in, rgba_out) do{\ + { \ if ((destination_space->space.trc[0] == destination_space->space.trc[1]) && \ (destination_space->space.trc[1] == destination_space->space.trc[2])) \ { \ @@ -71,6 +86,7 @@ prep_conversion (const Babl *babl) } \ else \ { \ + unsigned int c; \ for (c = 0; c < 3; c ++) \ { \ const Babl *trc = (void*)destination_space->space.trc[c]; \ @@ -81,6 +97,26 @@ prep_conversion (const Babl *babl) } while(0) +#define TRC_OUT_u8(rgba_in, rgba_out) do{\ + { \ + if ((destination_space->space.trc[0] == destination_space->space.trc[1]) && \ + (destination_space->space.trc[1] == destination_space->space.trc[2])) \ + { \ + const Babl *trc = (void*)destination_space->space.trc[0]; \ + _babl_trc_from_linear_buf_u8_generic(trc, rgba_in, rgba_out, 4, 4, 3, samples); \ + } \ + else \ + { \ + unsigned int c; \ + for (c = 0; c < 3; c ++) \ + { \ + const Babl *trc = (void*)destination_space->space.trc[c]; \ + _babl_trc_from_linear_buf_u8_generic(trc, rgba_in + c, rgba_out + c, 4, 4, 1, samples); \ + } \ + } \ + }\ +} while(0) + static inline void @@ -146,38 +182,18 @@ universal_nonlinear_rgba_u8_converter (const Babl *conversion, void *data) { const Babl *destination_space = conversion->conversion.destination->format.space; - + const Babl *source_space = babl_conversion_get_source_space (conversion); float * matrixf = data; - float * in_trc_lut_red = matrixf + 9; - float * in_trc_lut_green = in_trc_lut_red + 256; - float * in_trc_lut_blue = in_trc_lut_green + 256; - unsigned int i; uint8_t *rgba_in_u8 = (void*)src_char; uint8_t *rgba_out_u8 = (void*)dst_char; - - float rgb[4*samples]; - - for (i = 0; i < samples; i++) + float rgba[4*samples]; + for (int i = 0; i < samples * 4; i+=4) { - rgb[i*4+0]=in_trc_lut_red[rgba_in_u8[i*4+0]]; - rgb[i*4+1]=in_trc_lut_green[rgba_in_u8[i*4+1]]; - rgb[i*4+2]=in_trc_lut_blue[rgba_in_u8[i*4+2]]; - rgba_out_u8[i*4+3] = rgba_in_u8[i*4+3]; - } - - babl_matrix_mul_vectorff_buf4 (matrixf, rgb, rgb, samples); - - { - const Babl *from_trc_red = (void*)destination_space->space.trc[0]; - const Babl *from_trc_green = (void*)destination_space->space.trc[1]; - const Babl *from_trc_blue = (void*)destination_space->space.trc[2]; - for (i = 0; i < samples * 4; i+=4) - { - rgba_out_u8[i+0] = babl_trc_from_linear (from_trc_red, rgb[i+0]) * 255.5f; - rgba_out_u8[i+1] = babl_trc_from_linear (from_trc_green, rgb[i+1]) * 255.5f; - rgba_out_u8[i+2] = babl_trc_from_linear (from_trc_blue, rgb[i+2]) * 255.5f; - } + rgba_out_u8[i+3] = rgba_in_u8[i+3]; } + TRC_IN_u8(rgba_in_u8, rgba); + babl_matrix_mul_vectorff_buf4 (matrixf, rgba, rgba, samples); + TRC_OUT_u8(rgba, rgba_out_u8); } @@ -255,7 +271,7 @@ universal_nonlinear_rgb_u8_converter (const Babl *conversion, rgba_out[i*4+0]=in_trc_lut_red[rgb_in_u8[i*3+0]]; rgba_out[i*4+1]=in_trc_lut_green[rgb_in_u8[i*3+1]]; rgba_out[i*4+2]=in_trc_lut_blue[rgb_in_u8[i*3+2]]; - rgba_out[i*4+3]=rgb_in_u8[i*3+2] * 255.5f; + rgba_out[i*4+3]=rgb_in_u8[i*3+2] * 255.0f; } babl_matrix_mul_vectorff_buf4 (matrixf, rgba_out, rgba_out, samples); @@ -265,7 +281,7 @@ universal_nonlinear_rgb_u8_converter (const Babl *conversion, for (i = 0; i < samples; i++) for (unsigned int c = 0; c < 3; c ++) - rgb_out_u8[i*3+c] = rgba_out[i*4+c] * 255.5f; + rgb_out_u8[i*3+c] = rgba_out[i*4+c] * 255.0f; } } @@ -301,7 +317,6 @@ static inline void babl_matrix_mul_vectorff_buf4_sse2 (const float *mat, #undef m - static inline void universal_nonlinear_rgba_converter_sse2 (const Babl *conversion, unsigned char *__restrict__ src_char, @@ -344,36 +359,24 @@ universal_nonlinear_rgba_u8_converter_sse2 (const Babl *conversion, long samples, void *data) { + const Babl *source_space = conversion->conversion.source->format.space; const Babl *destination_space = conversion->conversion.destination->format.space; float * matrixf = data; - float * in_trc_lut_red = matrixf + 9; - float * in_trc_lut_green = in_trc_lut_red + 256; - float * in_trc_lut_blue = in_trc_lut_green + 256; - unsigned int i; uint8_t *rgba_in_u8 = (void*)src_char; uint8_t *rgba_out_u8 = (void*)dst_char; - float rgba_out[4*samples]; + float rgba[4*samples]; - for (i = 0; i < samples * 4; i+= 4) + for (int i = 0; i < samples*4; i+=4) { - rgba_out[i+0]=in_trc_lut_red[rgba_in_u8[i+0]]; - rgba_out[i+1]=in_trc_lut_green[rgba_in_u8[i+1]]; - rgba_out[i+2]=in_trc_lut_blue[rgba_in_u8[i+2]]; rgba_out_u8[i+3] = rgba_in_u8[i+3]; } + TRC_IN_u8(rgba_in_u8, rgba); - babl_matrix_mul_vectorff_buf4_sse2 (matrixf, rgba_out, rgba_out, samples); - - { - int c; - TRC_OUT(rgba_out, rgba_out); + babl_matrix_mul_vectorff_buf4_sse2 (matrixf, rgba, rgba, samples); - for (i = 0; i < samples * 4; i+= 4) - for (c = 0; c < 3; c ++) - rgba_out_u8[i+c] = rgba_out[i+c] * 255.5f; - } + TRC_OUT_u8(rgba, rgba_out_u8); } static inline void @@ -409,7 +412,7 @@ universal_nonlinear_rgb_u8_converter_sse2 (const Babl *conversion, for (i = 0; i < samples; i++) for (unsigned c = 0; c < 3; c ++) - rgb_out_u8[i*3+c] = rgba_out[i*4+c] * 255.5f; + rgb_out_u8[i*3+c] = rgba_out[i*4+c] * 255 + 0.5f; } } @@ -530,6 +533,7 @@ add_rgb_adapter (Babl *babl, else #endif { +#if 1 prep_conversion(babl_conversion_new( babl_format_with_space("RGBA float", space), babl_format_with_space("RGBA float", babl), @@ -551,7 +555,8 @@ add_rgb_adapter (Babl *babl, babl_format_with_space("R'G'B'A float", space), "linear", universal_nonlinear_rgba_converter, NULL)); - +#endif +#if 1 prep_conversion(babl_conversion_new( babl_format_with_space("R'G'B'A float", space), babl_format_with_space("RGBA float", babl), @@ -562,6 +567,7 @@ add_rgb_adapter (Babl *babl, babl_format_with_space("RGBA float", space), "linear", universal_nonlinear_rgb_linear_converter, NULL)); +#endif prep_conversion(babl_conversion_new( babl_format_with_space("R'G'B'A u8", space), @@ -573,7 +579,7 @@ add_rgb_adapter (Babl *babl, babl_format_with_space("R'G'B'A u8", space), "linear", universal_nonlinear_rgba_u8_converter, NULL)); - +#if 1 prep_conversion(babl_conversion_new( babl_format_with_space("R'G'B' u8", space), babl_format_with_space("R'G'B' u8", babl), @@ -595,8 +601,8 @@ add_rgb_adapter (Babl *babl, babl_format_with_space("R'G'B'A float", babl), "linear", universal_linear_rgb_nonlinear_converter, NULL)); +#endif } -#if 0 prep_conversion(babl_conversion_new( babl_format_with_space("RGB float", space), babl_format_with_space("RGB float", babl), @@ -607,7 +613,6 @@ add_rgb_adapter (Babl *babl, babl_format_with_space("RGB float", space), "linear", universal_rgb_converter, NULL)); -#endif prep_conversion(babl_conversion_new( babl_format_with_space("Y float", space), babl_format_with_space("Y float", babl), diff --git a/babl/base/babl-trc.c b/babl/base/babl-trc.c index 6cb4900..a2f1d6e 100644 --- a/babl/base/babl-trc.c +++ b/babl/base/babl-trc.c @@ -211,7 +211,7 @@ _babl_trc_formula_srgb_to_linear (const Babl *trc_, float e = trc->lut[5]; float f = trc->lut[6]; - if (x >= d) + if (x >= d) // OPT can be reduced to be branchless { return _babl_trc_gamma_to_linear ((Babl *) trc, a * x + b) + e; } @@ -364,6 +364,8 @@ _babl_trc_from_linear_buf_generic (const Babl *trc_, } } + + static inline void _babl_trc_linear_buf (const Babl *trc_, const float *__restrict__ in, float *__restrict__ out, @@ -494,6 +496,8 @@ BABL_SIMD_SUFFIX (babl_trc_new) (const char *name, trc_db[i].fun_to_linear_buf = _babl_trc_to_linear_buf_generic; trc_db[i].fun_from_linear_buf = _babl_trc_from_linear_buf_generic; + trc_db[i].fun_from_linear_buf_u8 = _babl_trc_from_linear_buf_u8_generic; + trc_db[i].fun_to_linear_buf_u8 = _babl_trc_to_linear_buf_u8_generic; switch (trc_db[i].type) { @@ -502,12 +506,14 @@ BABL_SIMD_SUFFIX (babl_trc_new) (const char *name, trc_db[i].fun_from_linear = _babl_trc_linear; trc_db[i].fun_from_linear_buf = _babl_trc_linear_buf; trc_db[i].fun_to_linear_buf = _babl_trc_linear_buf; + //trc_db[i].fun_to_linear_buf_u8 = _babl_trc_linear_buf_u8; break; case BABL_TRC_FORMULA_GAMMA: trc_db[i].fun_to_linear = _babl_trc_gamma_to_linear; trc_db[i].fun_from_linear = _babl_trc_gamma_from_linear; trc_db[i].fun_to_linear_buf = _babl_trc_gamma_to_linear_buf; trc_db[i].fun_from_linear_buf = _babl_trc_gamma_from_linear_buf; + //trc_db[i].fun_from_linear_buf_u8 = _babl_trc_gamma_from_linear_buf_u8; trc_db[i].poly_gamma_to_linear_x0 = POLY_GAMMA_X0; trc_db[i].poly_gamma_to_linear_x1 = POLY_GAMMA_X1; @@ -582,6 +588,7 @@ BABL_SIMD_SUFFIX (babl_trc_new) (const char *name, trc_db[i].fun_to_linear = _babl_trc_srgb_to_linear; trc_db[i].fun_from_linear = _babl_trc_srgb_from_linear; trc_db[i].fun_from_linear_buf = _babl_trc_srgb_from_linear_buf; + //trc_db[i].fun_from_linear_buf_u8 = _babl_trc_srgb_from_linear_buf_u8; trc_db[i].fun_to_linear_buf = _babl_trc_srgb_to_linear_buf; break; case BABL_TRC_LUT: diff --git a/babl/base/babl-trc.h b/babl/base/babl-trc.h index 79b6891..4556c96 100644 --- a/babl/base/babl-trc.h +++ b/babl/base/babl-trc.h @@ -51,6 +51,13 @@ typedef struct int out_gap, int components, int count); + void (*fun_to_linear_buf_u8)(const Babl *trc, + const uint8_t *in, + float *out, + int in_gap, + int out_gap, + int components, + int count); void (*fun_from_linear_buf)(const Babl *trc, const float *in, float *out, @@ -58,6 +65,13 @@ typedef struct int out_gap, int components, int count); + void (*fun_from_linear_buf_u8)(const Babl *trc, + const float *in, + uint8_t *out, + int in_gap, + int out_gap, + int components, + int count); BablPolynomial poly_gamma_to_linear; float poly_gamma_to_linear_x0; float poly_gamma_to_linear_x1; @@ -67,6 +81,8 @@ typedef struct float *lut; float *inv_lut; char name[128]; + int valid_u8_lut; + float u8_lut[256]; } BablTRC; static inline void babl_trc_from_linear_buf (const Babl *trc_, @@ -79,6 +95,26 @@ static inline void babl_trc_from_linear_buf (const Babl *trc_, trc->fun_from_linear_buf (trc_, in, out, in_gap, out_gap, components, count); } +static inline void babl_trc_from_linear_buf_u8 (const Babl *trc_, + const float *in, uint8_t *out, + int in_gap, int out_gap, + int components, + int count) +{ + BablTRC *trc = (void*)trc_; + trc->fun_from_linear_buf_u8 (trc_, in, out, in_gap, out_gap, components, count); +} + +static inline void babl_trc_to_linear_buf_u8 (const Babl *trc_, + const uint8_t *in, float *out, + int in_gap, int out_gap, + int components, + int count) +{ + BablTRC *trc = (void*)trc_; + trc->fun_to_linear_buf_u8 (trc_, in, out, in_gap, out_gap, components, count); +} + static inline void babl_trc_to_linear_buf (const Babl *trc_, const float *in, float *out, int in_gap, int out_gap, @@ -104,4 +140,66 @@ static inline float babl_trc_to_linear (const Babl *trc_, float value) void babl_trc_class_init_generic (void); + +static inline void +_babl_trc_from_linear_buf_u8_generic (const Babl *trc_, + const float *__restrict__ in, + uint8_t *__restrict__ out, + int in_gap, + int out_gap, + int components, + int count) +{ + BablTRC *trc = (void*)trc_; + if (in_gap == out_gap && in_gap == 4 && components == 3) + { + for (int i = 0; i < count; i ++) + for (int c = 0; c < 3; c ++) + { + int val = trc->fun_from_linear (trc_, in[4 * i + c]) * 255.0 + 0.5; + out[4 * i + c] = val > 255 ? 255 : val; + } + } + else + { + for (int i = 0; i < count; i ++) + for (int c = 0; c < components; c ++) + { + int val = trc->fun_from_linear (trc_, in[in_gap * i + c]) * 255.0 + 0.5; + out[out_gap * i + c] = val > 255 ? 255 : 0; + } + } +} + +static inline void +_babl_trc_to_linear_buf_u8_generic (const Babl *trc_, + const uint8_t *__restrict__ in, + float *__restrict__ out, + int in_gap, + int out_gap, + int components, + int count) +{ + BablTRC *trc = (void*)trc_; + if (!trc->valid_u8_lut) + { + for (int i = 0; i <= 255; i++) + trc->u8_lut[i] = trc->fun_to_linear (trc_, i/255.0f); + trc->valid_u8_lut=1; + } + if (in_gap == out_gap && in_gap == 4 && components == 3) + { + for (int i = 0; i < count; i ++) + for (int c = 0; c < 3; c ++) + out[4 * i + c] = trc->u8_lut[in[4 * i + c]]; + } + else + { + for (int i = 0; i < count; i ++) + for (int c = 0; c < components; c ++) + out[out_gap * i + c] = trc->u8_lut[in[in_gap * i + c]]; + } +} + + #endif |