#version 430
#extension GL_NV_shader_atomic_float : require

layout(local_size_x = 1) in;

layout(location = 0) uniform sampler2D texSpeed;
layout(location = 1) uniform sampler2D texColor;
layout(location = 2) uniform sampler2D texDepth;
layout(location = 3) uniform int uNbPSF;
layout(location = 4) uniform ivec2 uHalfPadding;

layout(r32f, binding = 0) restrict uniform image2D texOutR;
layout(r32f, binding = 1) restrict uniform image2D texOutG;
layout(r32f, binding = 2) restrict uniform image2D texOutB;
layout(r32f, binding = 3) restrict uniform image2D texOutA;

layout(std430, binding = 0) buffer SpreadletsVBO
{
    float points[]; // z is laplacian(f)
};

layout(std430, binding = 1) buffer SpreadletsSizesVBO
{
    int cumsum[];
};

const float zNear = 0.01, zFar = 50.;

void main()
{
    ivec2 pixel = ivec2(gl_WorkGroupID.xy);
    float nl = texelFetch(texDepth, pixel, 0).r * 2. - 1.,
        depth = 2. * zNear * zFar / (zFar + zNear - nl * (zFar - zNear));
    depth = (depth - zNear) / (zFar - zNear) * 2. - 1.;
    
    vec3 color = texelFetch(texColor, pixel, 0).rgb;
    
    // For SimpleLens ; beta = 2
    int index = int(sqrt(abs(depth)) * uNbPSF);
    for(int i = cumsum[index]; i < cumsum[index + 1]; i++)
    {
        vec3 p = vec3(points[i * 3], points[i * 3 + 1], points[i * 3 + 2]);
        ivec2 pxOut = pixel + ivec2(p.xy) + uHalfPadding;
        imageAtomicAdd(texOutR, pxOut, color.r * p.z);
        imageAtomicAdd(texOutG, pxOut, color.g * p.z);
        imageAtomicAdd(texOutB, pxOut, color.b * p.z);
        imageAtomicAdd(texOutA, pxOut, p.z);
    }
}
