Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 0 additions & 2 deletions data/kernels/basic.cl
Original file line number Diff line number Diff line change
Expand Up @@ -3722,8 +3722,6 @@ interpolation_resample (read_only image2d_t in,
// store final result
if (iy == 0 && x < width && y < height)
{
// Clip negative RGB that may be produced by Lanczos undershooting
// Negative RGB are invalid values no matter the RGB space (light is positive)
write_ipixel(out, (int2)(x, y), buffer[ylid]);
}
}
Expand Down
2 changes: 1 addition & 1 deletion data/kernels/common.h
Original file line number Diff line number Diff line change
Expand Up @@ -280,5 +280,5 @@ static inline float readalpha(read_only image2d_t in, int col, int row)

static inline void write_ipixel(write_only image2d_t out, const int2 pos, const float4 pixel)
{
write_imagef(out, pos, fmax(0.0f, pixel));
write_imagef(out, pos, pixel);
}
4 changes: 2 additions & 2 deletions data/kernels/extended.cl
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ graduatedndp (read_only image2d_t in,
float len = length_base + y*length_inc_y + x*length_inc_x;
float dens = dtcl_exp2(density * clipf(0.5f + len));
pixel = pixel / (color + ((float4)1.0f - color) * (float4)dens);
write_imagef (out, (int2)(x, y), fmax(0.0f, pixel));
write_ipixel(out, (int2)(x, y), pixel);
}


Expand All @@ -67,7 +67,7 @@ graduatedndm (read_only image2d_t in,
float len = length_base + y*length_inc_y + x*length_inc_x;
float dens = dtcl_exp2(-density * clipf(0.5f - len));
pixel = pixel * (color + ((float4)1.0f - color) * (float4)dens);
write_imagef(out, (int2)(x, y), fmax(0.0f, pixel));
write_ipixel(out, (int2)(x, y), pixel);
}

__kernel void
Expand Down
13 changes: 4 additions & 9 deletions src/common/interpolation.c
Original file line number Diff line number Diff line change
Expand Up @@ -44,9 +44,6 @@ enum border_mode
// !! Make sure to sync this with the filter array !!
#define MAX_HALF_FILTER_WIDTH 3

// Add *verbose* (like one msg per pixel out) debug message to stderr
#define DEBUG_PRINT_VERBOSE 0

/* --------------------------------------------------------------------------
* Debug helpers
* ------------------------------------------------------------------------*/
Expand Down Expand Up @@ -552,7 +549,6 @@ float dt_interpolation_compute_sample(const dt_interpolation_t *itor,
s += kernelv[i] * h;
in += linestride;
}
s = _interpolated_out(s * oonorm);
}
else if(ix >= 0 && iy >= 0 && ix < width && iy < height)
{
Expand All @@ -574,9 +570,8 @@ float dt_interpolation_compute_sample(const dt_interpolation_t *itor,
}
s += kernelv[i] * h;
}
s = _interpolated_out(s * oonorm);
}
return s; // if called for masks make sure to CLIP to avoid interpolator under/overshoots
return s * oonorm; // if called for masks make sure to CLIP to avoid interpolator under/overshoots
}

/* --------------------------------------------------------------------------
Expand Down Expand Up @@ -645,7 +640,7 @@ void dt_interpolation_compute_pixel4c(const dt_interpolation_t *itor,
}

for_each_channel(c,aligned(out))
out[c] = _interpolated_out(pixel[c] * oonorm);
out[c] = pixel[c] * oonorm;
}
else if(ix >= 0 && iy >= 0 && ix < width && iy < height)
{
Expand Down Expand Up @@ -675,7 +670,7 @@ void dt_interpolation_compute_pixel4c(const dt_interpolation_t *itor,
}

for_each_channel(c,aligned(out))
out[c] = _interpolated_out(pixel[c] * oonorm);
out[c] = pixel[c] * oonorm;
}
else
{
Expand Down Expand Up @@ -1118,7 +1113,7 @@ void dt_interpolation_resample(const dt_interpolation_t *itor,

dt_aligned_pixel_t pixel;
for_each_channel(c, aligned(vs:16))
pixel[c] = _interpolated_out(vs[c]);
pixel[c] = vs[c];
copy_pixel_nontemporal(out + baseidx, pixel);

// Reset vertical resampling context
Expand Down
9 changes: 0 additions & 9 deletions src/common/math.h
Original file line number Diff line number Diff line change
Expand Up @@ -811,15 +811,6 @@ static inline double rad2deg(const double radians)
return radians / M_PI * 180.0;
}

/* Reminder: keep in sync with opencl write_ipixel()
All pixel interpolators use this, currently we restrict output of resampling to be at least zero
*/
static inline float _interpolated_out(const float val)
{
return MAX(0.0f, val);
}


// clang-format off
// modelines: These editor modelines have been set for all relevant files by tools/update_modelines.py
// vim: shiftwidth=2 expandtab tabstop=2 cindent
Expand Down
37 changes: 5 additions & 32 deletions src/iop/graduatednd.c
Original file line number Diff line number Diff line change
Expand Up @@ -743,35 +743,9 @@ int scrolled(
return 0;
}

DT_OMP_DECLARE_SIMD(simdlen(4))
static inline float _density_times_length(const float dens, const float length)
{
return (dens * CLIP(0.5f + length) / 8.0f);
}

DT_OMP_DECLARE_SIMD(simdlen(4))
static inline float _compute_density(const float dens, const float length)
{
#ifdef __FAST_MATH__
// !!! approximation is ok only when highest density is 8
// for input x = (data->density * CLIP( 0.5+length ), calculate 2^x as (e^(ln2*x/8))^8
// use exp2f approximation to calculate e^(ln2*x/8)
// in worst case - density==8,CLIP(0.5-length) == 1.0 it gives 0.6% of error
const float t = M_LN2f * _density_times_length(dens,length);
const float d1 = t * t * 0.5f;
const float d2 = d1 * t * 0.333333333f;
const float d3 = d2 * t * 0.25f;
const float d = 1 + t + d1 + d2 + d3; /* taylor series for e^x till x^4 */
float density = d * d;
density = density * density;
density = density * density;
#else
// use fair exp2f
// for GCC10 on recent hardware, exp2f is actually faster than the above approximation,
// but it does not vectorize so it is slower overall
const float density = exp2f(dens * CLIP(0.5f + length));
#endif
return density;
return exp2f(dens * CLIP(0.5f + length));
}

void process(dt_iop_module_t *self,
Expand Down Expand Up @@ -813,7 +787,6 @@ void process(dt_iop_module_t *self,
// these into registers when it vectorizes
const dt_aligned_pixel_t color = { data->color[0], data->color[1], data->color[2], data->color[3] };
const dt_aligned_pixel_t color1 = { data->color1[0], data->color1[1], data->color1[2], data->color1[3] };
const dt_aligned_pixel_t zero = { 0.0f, 0.0f, 0.0f, 0.0f };

if(density > 0)
{
Expand Down Expand Up @@ -843,7 +816,7 @@ void process(dt_iop_module_t *self,
dt_aligned_pixel_t res; // the compiler will optimize this into a register
for_each_channel(l, aligned(in : 16))
{
res[l] = MAX(zero[l], (in[4*(x+i)+l] / (color[l] + color1[l] * curr_density[i])));
res[l] = in[4*(x+i)+l] / (color[l] + color1[l] * curr_density[i]);
}
// use streaming writes to eliminate the memory reads from loading cache lines
copy_pixel_nontemporal(out + 4*(x+i), res);
Expand All @@ -857,7 +830,7 @@ void process(dt_iop_module_t *self,
dt_aligned_pixel_t res; // the compiler will optimize this into a register
for_each_channel(l, aligned(in : 16))
{
res[l] = MAX(zero[l], (in[4*x+l] / (color[l] + color1[l] * curr_density)));
res[l] = in[4*x+l] / (color[l] + color1[l] * curr_density);
}
// use streaming writes to eliminate the memory reads from loading cache lines
copy_pixel_nontemporal(out + 4*x, res);
Expand Down Expand Up @@ -894,7 +867,7 @@ void process(dt_iop_module_t *self,
dt_aligned_pixel_t res; // the compiler will optimize this into a register
for_each_channel(l, aligned(in : 16))
{
res[l] = MAX(zero[l], (in[4*(x+i)+l] * (color[l] + color1[l] * curr_density[i])));
res[l] = in[4*(x+i)+l] * (color[l] + color1[l] * curr_density[i]);
}
// use streaming writes to eliminate the memory reads from loading cache lines
copy_pixel_nontemporal(out + 4*(x+i), res);
Expand All @@ -908,7 +881,7 @@ void process(dt_iop_module_t *self,
dt_aligned_pixel_t res; // the compiler will optimize this into a register
for_each_channel(l, aligned(in : 16))
{
res[l] = MAX(zero[l], (in[4*x+l] * (color[l] + color1[l] * curr_density)));
res[l] = in[4*x+l] * (color[l] + color1[l] * curr_density);
}
// use streaming writes to eliminate the memory reads from loading cache lines
copy_pixel_nontemporal(out + 4*x, res);
Expand Down
Loading