为什么我的计算着色器没有调整所有输入粒子的位置?武尔坎/GLSL/C++

Why is my compute shader not adjusting the positions of all of the input particles? Vulkan/GLSL/C++

提问人:ModernEraCaveman 提问时间:7/11/2023 最后编辑:ModernEraCaveman 更新时间:7/11/2023 访问量:53

问:

我刚刚弄清楚了使用 Vulkan 实现的计算着色器。但是,我很难理解为什么我输入到着色器的粒子中只有一小部分正在更新。 在视频中。int PARTICLE_COUNT = 32000

YouTube上传的问题。请原谅我没有使用 Imgur,它在过去的几个小时里一直不适合我,并且不允许我创建任何可访问的上传。

计算着色器代码如下:

#version 450

struct camera {
    mat4 view;
    mat4 proj;
    vec3 position;
};

layout(binding = 0) uniform UniformBufferObject {
    float dt;
    mat4 model;
    camera cam;  
} ubo;

struct Particle {
    vec4 position;
    vec4 color;
    vec4 velocity; 
};

layout(std140, set = 2, binding = 0) readonly buffer inSSBO {
   Particle particlesIn[ ];
};

layout(std140, set = 2, binding = 1) buffer outSSBO {
   Particle particlesOut[ ];
};

layout (local_size_x = 10, local_size_y = 10, local_size_z = 10) in;

// Organization and Indexing
uvec3 nWG = gl_NumWorkGroups;
uvec3 sWG = gl_WorkGroupSize;
uint i = gl_WorkGroupID.x + (nWG.x * gl_WorkGroupID.y) + (nWG.x * nWG.y * gl_WorkGroupID.z);
uint j = gl_LocalInvocationID.x + (sWG.x * gl_LocalInvocationID.y) + (sWG.x * sWG.y *gl_LocalInvocationID.z);
    
// Globals
const float c = 1.0f;

// Calculates acceleration towards a position
vec3 Gravity(vec3 p1, vec3 p2, float m1, float m2) {
    vec3 rN = normalize(p2 - p1);
    float dist2 = distance(p2, p1);
    dist2 *= dist2;
    return rN * ((m1 * m2) / (dist2));
}

void main() 
{
    particlesOut[i].position.xyz = particlesIn[i].position.xyz;
    
    // Kinematic Motion of the Elements of the System
    vec3 Acceleration;
    if (i != j)
    {// Particle Interaction Calculations
        // Interacting Particle Properties
        float m0 = 1.f;
        float m1 = 1.f;
        vec3 p0 = particlesIn[i].position.xyz;
        vec3 p1 = particlesIn[j].position.xyz;
        
        // Velocity Calculation
        particlesOut[i].velocity.xyz += Gravity(p0, p1, m0, m1) * ubo.dt;

        if (length(particlesOut[i].velocity) > c/2)
        {// Sets the Velocity Maximum to the Speed of Light (divided by two bc ITS TOO FAST)
            normalize(particlesOut[i].velocity);
            particlesOut[i].velocity *= c/2;
        }

        particlesOut[i].position.xyz += particlesOut[i].velocity.xyz * ubo.dt;

        // Flip movement at volume border
        if ((particlesOut[i].position.x <= -1.0) || (particlesOut[i].position.x >= 1.0)) {
            particlesOut[i].velocity.x = -particlesOut[i].velocity.x;
        }
        if ((particlesOut[i].position.y <= -1.0) || (particlesOut[i].position.y >= 1.0)) {
            particlesOut[i].velocity.y = -particlesOut[i].velocity.y;
        }
        if ((particlesOut[i].position.z <= -1.0) || (particlesOut[i].position.z >= 1.0)) {
            particlesOut[i].velocity.z = -particlesOut[i].velocity.z;
        }
    }   
}

计算着色器调度代码如下:

void computeCommand(VkCommandBuffer& commandBuffer, uint32_t setCount, VkDescriptorSet* sets) {
    VkCommandBufferBeginInfo beginInfo
    { VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO };

    if (vkBeginCommandBuffer(commandBuffer, &beginInfo) != VK_SUCCESS) {
        throw std::runtime_error("failed to begin recording command buffer!");
    }

    vkCmdBindPipeline(commandBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, mPipeline);

    vkCmdBindDescriptorSets(commandBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, mLayout, 0, setCount, sets, 0, nullptr);

    vkCmdDispatch(commandBuffer, PARTICLE_COUNT / (1000), PARTICLE_COUNT / (1000), PARTICLE_COUNT / (1000));

    if (vkEndCommandBuffer(commandBuffer) != VK_SUCCESS) {
        throw std::runtime_error("failed to record compute command buffer!");
    }
}

以及最后的潜在罪魁祸首,粒子和数据缓冲区代码:struct

struct Particle {
    glm::vec4 position;
    glm::vec4 color;
    glm::vec4 velocity;

    const static VkPrimitiveTopology topology = VK_PRIMITIVE_TOPOLOGY_POINT_LIST;
    static VkVertexInputBindingDescription vkCreateBindings() {
        VkVertexInputBindingDescription bindingDescription{};
        bindingDescription.binding = 0;
        bindingDescription.stride = sizeof(Particle);
        bindingDescription.inputRate = VK_VERTEX_INPUT_RATE_VERTEX;

        return bindingDescription;
    }

    static std::array<VkVertexInputAttributeDescription, 2> vkCreateAttributes() {
        std::array<VkVertexInputAttributeDescription, 2> attributeDescriptions{};

        attributeDescriptions[0].binding = 0;
        attributeDescriptions[0].location = 0;
        attributeDescriptions[0].format = VK_FORMAT_R32G32B32A32_SFLOAT;
        attributeDescriptions[0].offset = offsetof(Particle, position);

        attributeDescriptions[1].binding = 0;
        attributeDescriptions[1].location = 1;
        attributeDescriptions[1].format = VK_FORMAT_R32G32B32A32_SFLOAT;
        attributeDescriptions[1].offset = offsetof(Particle, color);

        return attributeDescriptions;
    }
    static VkPipelineVertexInputStateCreateInfo vkCreateVertexInput() {
        static auto bindingDescription = vkCreateBindings();
        static auto attributeDescriptions = vkCreateAttributes();

        VkPipelineVertexInputStateCreateInfo vertexInputInfo
        { VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO };
        vertexInputInfo.vertexBindingDescriptionCount = 1;
        vertexInputInfo.vertexAttributeDescriptionCount = static_cast<uint32_t>(attributeDescriptions.size());
        vertexInputInfo.pVertexBindingDescriptions = &bindingDescription;
        vertexInputInfo.pVertexAttributeDescriptions = attributeDescriptions.data();
        return vertexInputInfo;
    }
};

// SSBO struct initializes and stores an std::vector<Particle> particles;

void createDataBuffer(SSBO& ssbo) {
    void* data;
    VkBuffer stagingBuffer;
    VkDeviceMemory stagingBufferMemory;

    Buffer.resize(MAX_FRAMES_IN_FLIGHT);
    Memory.resize(MAX_FRAMES_IN_FLIGHT);

    bufferSize = sizeof(Particle)*PARTICLE_COUNT;

    createBuffer(stagingBuffer, stagingBufferMemory,
        VK_BUFFER_USAGE_TRANSFER_SRC_BIT,
        VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT);
    
    vkMapMemory(VkGPU::device, stagingBufferMemory, 0, bufferSize, 0, &data);
    memcpy(data, ssbo.particles.data(), (size_t)bufferSize);
    vkUnmapMemory(VkGPU::device, stagingBufferMemory);
    
    for (size_t i = 0; i < MAX_FRAMES_IN_FLIGHT; i++) {
        createBuffer(Buffer[i], Memory[i],
            VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | VK_BUFFER_USAGE_VERTEX_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT,
            VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT);

        copyBuffer(stagingBuffer, Buffer[i]);
    }

    vkDestroyBuffer(VkGPU::device, stagingBuffer, nullptr);
    vkFreeMemory(VkGPU::device, stagingBufferMemory, nullptr);
}

当 .移动的粒子更少。2000 个粒子中最多有 10 个。再次,请原谅我使用 YouTube 进行上传。PARTICLE_COUNT = 2000

我有一种感觉,问题出在计算着色器中的索引上,但我不完全确定。我的另一个想法是,分发给工作组的粒子数量可能是问题的根源,但减少粒子数量只会使问题更加明显。

编辑:修复了计算着色器中未准确更新对象位置的行,但这是测试 SSBO 属性的残余。修复该行对解决问题没有任何影响。ParticlesOut[]readonly

C++ GLSL Vulkan 计算着色器

评论


答:

0赞 ModernEraCaveman 7/11/2023 #1

有时,努力工作后休息一下是件好事。经过一顿丰盛的饭菜和休息,我能够用新鲜的眼光看待我的代码,并找出我做错了什么。

在调度计算着色器时,我给每个工作组轴的粒子太少而无法处理。我原来的调度命令是:

vkCmdDispatch(commandBuffer, PARTICLE_COUNT / (1000), PARTICLE_COUNT / (1000), PARTICLE_COUNT / (1000));

而我的计算着色器的布局是:

layout (local_size_x = 10, local_size_y = 10, local_size_z = 10) in;

将 dispatch 命令更新为以下代码行可提供要处理的适当数量的粒子:

vkCmdDispatch(commandBuffer, PARTICLE_COUNT / (100), PARTICLE_COUNT / (100), PARTICLE_COUNT / (100));

我认为问题源于一开始给每个工作组的粒子太少,所以不是每个粒子都被处理。我通过降低调度命令来测试这一点,这导致了可怕的帧率下降,因为每次调用都必须处理 10 倍以上的粒子。groupCountPARTICLE_COUNT / (10)

我不完全清楚 3D 工作组背后的数学原理或原理,但这似乎与其他两个工作组轴的大小有关,其中除数等于其他局部工作组的乘积。即 和 ,所以除数等于 或 。如果有人能更好地解释计算背后的数学,我将不胜感激,因为我不完全理解它,超出了我在这里所能解释的内容。local_size_y = 10local_size_z = 1010*10100groupCount