为什么我的计算着色器没有调整所有输入粒子的位置？武尔坎/GLSL/C++-解网

问：

我刚刚弄清楚了使用 Vulkan 实现的计算着色器。但是，我很难理解为什么我输入到着色器的粒子中只有一小部分正在更新。在视频中。int PARTICLE_COUNT = 32000

YouTube上传的问题。请原谅我没有使用 Imgur，它在过去的几个小时里一直不适合我，并且不允许我创建任何可访问的上传。

计算着色器代码如下：

#version 450

struct camera {
    mat4 view;
    mat4 proj;
    vec3 position;
};

layout(binding = 0) uniform UniformBufferObject {
    float dt;
    mat4 model;
    camera cam;  
} ubo;

struct Particle {
    vec4 position;
    vec4 color;
    vec4 velocity; 
};

layout(std140, set = 2, binding = 0) readonly buffer inSSBO {
   Particle particlesIn[ ];
};

layout(std140, set = 2, binding = 1) buffer outSSBO {
   Particle particlesOut[ ];
};

layout (local_size_x = 10, local_size_y = 10, local_size_z = 10) in;

// Organization and Indexing
uvec3 nWG = gl_NumWorkGroups;
uvec3 sWG = gl_WorkGroupSize;
uint i = gl_WorkGroupID.x + (nWG.x * gl_WorkGroupID.y) + (nWG.x * nWG.y * gl_WorkGroupID.z);
uint j = gl_LocalInvocationID.x + (sWG.x * gl_LocalInvocationID.y) + (sWG.x * sWG.y *gl_LocalInvocationID.z);
    
// Globals
const float c = 1.0f;

// Calculates acceleration towards a position
vec3 Gravity(vec3 p1, vec3 p2, float m1, float m2) {
    vec3 rN = normalize(p2 - p1);
    float dist2 = distance(p2, p1);
    dist2 *= dist2;
    return rN * ((m1 * m2) / (dist2));
}

void main() 
{
    particlesOut[i].position.xyz = particlesIn[i].position.xyz;
    
    // Kinematic Motion of the Elements of the System
    vec3 Acceleration;
    if (i != j)
    {// Particle Interaction Calculations
        // Interacting Particle Properties
        float m0 = 1.f;
        float m1 = 1.f;
        vec3 p0 = particlesIn[i].position.xyz;
        vec3 p1 = particlesIn[j].position.xyz;
        
        // Velocity Calculation
        particlesOut[i].velocity.xyz += Gravity(p0, p1, m0, m1) * ubo.dt;

        if (length(particlesOut[i].velocity) > c/2)
        {// Sets the Velocity Maximum to the Speed of Light (divided by two bc ITS TOO FAST)
            normalize(particlesOut[i].velocity);
            particlesOut[i].velocity *= c/2;
        }

        particlesOut[i].position.xyz += particlesOut[i].velocity.xyz * ubo.dt;

        // Flip movement at volume border
        if ((particlesOut[i].position.x <= -1.0) || (particlesOut[i].position.x >= 1.0)) {
            particlesOut[i].velocity.x = -particlesOut[i].velocity.x;
        }
        if ((particlesOut[i].position.y <= -1.0) || (particlesOut[i].position.y >= 1.0)) {
            particlesOut[i].velocity.y = -particlesOut[i].velocity.y;
        }
        if ((particlesOut[i].position.z <= -1.0) || (particlesOut[i].position.z >= 1.0)) {
            particlesOut[i].velocity.z = -particlesOut[i].velocity.z;
        }
    }   
}

计算着色器调度代码如下：

void computeCommand(VkCommandBuffer& commandBuffer, uint32_t setCount, VkDescriptorSet* sets) {
    VkCommandBufferBeginInfo beginInfo
    { VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO };

    if (vkBeginCommandBuffer(commandBuffer, &beginInfo) != VK_SUCCESS) {
        throw std::runtime_error("failed to begin recording command buffer!");
    }

    vkCmdBindPipeline(commandBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, mPipeline);

    vkCmdBindDescriptorSets(commandBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, mLayout, 0, setCount, sets, 0, nullptr);

    vkCmdDispatch(commandBuffer, PARTICLE_COUNT / (1000), PARTICLE_COUNT / (1000), PARTICLE_COUNT / (1000));

    if (vkEndCommandBuffer(commandBuffer) != VK_SUCCESS) {
        throw std::runtime_error("failed to record compute command buffer!");
    }
}

以及最后的潜在罪魁祸首，粒子和数据缓冲区代码：struct

struct Particle {
    glm::vec4 position;
    glm::vec4 color;
    glm::vec4 velocity;

    const static VkPrimitiveTopology topology = VK_PRIMITIVE_TOPOLOGY_POINT_LIST;
    static VkVertexInputBindingDescription vkCreateBindings() {
        VkVertexInputBindingDescription bindingDescription{};
        bindingDescription.binding = 0;
        bindingDescription.stride = sizeof(Particle);
        bindingDescription.inputRate = VK_VERTEX_INPUT_RATE_VERTEX;

        return bindingDescription;
    }

    static std::array<VkVertexInputAttributeDescription, 2> vkCreateAttributes() {
        std::array<VkVertexInputAttributeDescription, 2> attributeDescriptions{};

        attributeDescriptions[0].binding = 0;
        attributeDescriptions[0].location = 0;
        attributeDescriptions[0].format = VK_FORMAT_R32G32B32A32_SFLOAT;
        attributeDescriptions[0].offset = offsetof(Particle, position);

        attributeDescriptions[1].binding = 0;
        attributeDescriptions[1].location = 1;
        attributeDescriptions[1].format = VK_FORMAT_R32G32B32A32_SFLOAT;
        attributeDescriptions[1].offset = offsetof(Particle, color);

        return attributeDescriptions;
    }
    static VkPipelineVertexInputStateCreateInfo vkCreateVertexInput() {
        static auto bindingDescription = vkCreateBindings();
        static auto attributeDescriptions = vkCreateAttributes();

        VkPipelineVertexInputStateCreateInfo vertexInputInfo
        { VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO };
        vertexInputInfo.vertexBindingDescriptionCount = 1;
        vertexInputInfo.vertexAttributeDescriptionCount = static_cast<uint32_t>(attributeDescriptions.size());
        vertexInputInfo.pVertexBindingDescriptions = &bindingDescription;
        vertexInputInfo.pVertexAttributeDescriptions = attributeDescriptions.data();
        return vertexInputInfo;
    }
};

// SSBO struct initializes and stores an std::vector<Particle> particles;

void createDataBuffer(SSBO& ssbo) {
    void* data;
    VkBuffer stagingBuffer;
    VkDeviceMemory stagingBufferMemory;

    Buffer.resize(MAX_FRAMES_IN_FLIGHT);
    Memory.resize(MAX_FRAMES_IN_FLIGHT);

    bufferSize = sizeof(Particle)*PARTICLE_COUNT;

    createBuffer(stagingBuffer, stagingBufferMemory,
        VK_BUFFER_USAGE_TRANSFER_SRC_BIT,
        VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT);
    
    vkMapMemory(VkGPU::device, stagingBufferMemory, 0, bufferSize, 0, &data);
    memcpy(data, ssbo.particles.data(), (size_t)bufferSize);
    vkUnmapMemory(VkGPU::device, stagingBufferMemory);
    
    for (size_t i = 0; i < MAX_FRAMES_IN_FLIGHT; i++) {
        createBuffer(Buffer[i], Memory[i],
            VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | VK_BUFFER_USAGE_VERTEX_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT,
            VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT);

        copyBuffer(stagingBuffer, Buffer[i]);
    }

    vkDestroyBuffer(VkGPU::device, stagingBuffer, nullptr);
    vkFreeMemory(VkGPU::device, stagingBufferMemory, nullptr);
}

当 .移动的粒子更少。2000 个粒子中最多有 10 个。再次，请原谅我使用 YouTube 进行上传。PARTICLE_COUNT = 2000

我有一种感觉，问题出在计算着色器中的索引上，但我不完全确定。我的另一个想法是，分发给工作组的粒子数量可能是问题的根源，但减少粒子数量只会使问题更加明显。

编辑：修复了计算着色器中未准确更新对象位置的行，但这是测试 SSBO 属性的残余。修复该行对解决问题没有任何影响。ParticlesOut[]readonly

C++ GLSL Vulkan 计算着色器

答：

0赞 ModernEraCaveman 7/11/2023 #1

有时，努力工作后休息一下是件好事。经过一顿丰盛的饭菜和休息，我能够用新鲜的眼光看待我的代码，并找出我做错了什么。

在调度计算着色器时，我给每个工作组轴的粒子太少而无法处理。我原来的调度命令是：

vkCmdDispatch(commandBuffer, PARTICLE_COUNT / (1000), PARTICLE_COUNT / (1000), PARTICLE_COUNT / (1000));

而我的计算着色器的布局是：

layout (local_size_x = 10, local_size_y = 10, local_size_z = 10) in;

将 dispatch 命令更新为以下代码行可提供要处理的适当数量的粒子：

vkCmdDispatch(commandBuffer, PARTICLE_COUNT / (100), PARTICLE_COUNT / (100), PARTICLE_COUNT / (100));

我认为问题源于一开始给每个工作组的粒子太少，所以不是每个粒子都被处理。我通过降低调度命令来测试这一点，这导致了可怕的帧率下降，因为每次调用都必须处理 10 倍以上的粒子。groupCountPARTICLE_COUNT / (10)

我不完全清楚 3D 工作组背后的数学原理或原理，但这似乎与其他两个工作组轴的大小有关，其中除数等于其他局部工作组的乘积。即和，所以除数等于或。如果有人能更好地解释计算背后的数学，我将不胜感激，因为我不完全理解它，超出了我在这里所能解释的内容。local_size_y = 10local_size_z = 1010*10100groupCount

上一个：是否可以将函数作为参数重载运算符？

下一个：如何在其他标头中同时使用头文件和 cpp 文件？

为什么我的计算着色器没有调整所有输入粒子的位置？武尔坎/GLSL/C++

Why is my compute shader not adjusting the positions of all of the input particles? Vulkan/GLSL/C++

评论