IAudioClient 剪切短声音

IAudioClient cutting short sounds

提问人:Pavel Dubsky 提问时间:5/12/2023 更新时间:5/12/2023 访问量:25

问:

我尝试使用WAV文件播放,但我的行为非常奇怪。如果我尝试播放非常短的声音(大约 0.2 秒 - 0.3 秒),它们会在最后或开始时被切断。确切的行为可以是Windows 媒体播放器中的观察者。但是,如果其他应用程序正在播放声音(Google Chrome 选项卡、Spotify、音频文件在 VS Code 编辑器中打开),那么我的程序可以正确播放声音(以及 Windows Media Player)。IAudioClient

下面是一个完整的示例代码,演示了该问题(基于 https://learn.microsoft.com/en-us/windows/win32/coreaudio/rendering-a-stream)。有人可以在我的方法中提供假设或确切的问题吗?源代码很长,但它是完整的,类中的所有内容都应该正常工作,它只是文件读取和重采样逻辑。我想问题出在函数算法的某个地方。AudioSourceRun

#include <Audioclient.h>
#include <mfapi.h>
#include <mftransform.h>
#include <mmdeviceapi.h>
#include <wmcodecdsp.h>
#include <mmdeviceapi.h>

#include <vector>

#pragma comment(lib, "Mfplat.lib")
#pragma comment(lib, "Mfuuid.lib")
#pragma comment(lib, "wmcodecdspuuid.lib")

using namespace std;

#define EXIT_ON_ERROR(hres) \
    if (FAILED(hres)) { return hres; }

constexpr REFERENCE_TIME ReftimesPerSecond = 10'000'000;
constexpr REFERENCE_TIME ReftimesPerMillisecond = 10'000;

struct WaveHeader
{
    CHAR riff[4];
    UINT32 fileSize;
    CHAR wave[4];
    CHAR fmt[4];
    UINT32 fmtLength;
    UINT16 fmtType;
    UINT16 channels;
    UINT32 sampleRate;
    UINT32 blockAlign;
    UINT16 byteRate;
    UINT16 bitsPerSample;
};

struct WaveDataHeader
{
    CHAR data[4];
    UINT32 dataSize;
};

struct AudioSource
{
    HRESULT LoadData(UINT32 bufferFrameCount, BYTE* pData, DWORD* flags, const WAVEFORMATEX& waveFormatOut)
    {
        HRESULT hr = S_OK;

        const UINT32 frameSize = waveFormatOut.wBitsPerSample / CHAR_BIT * waveFormatOut.nChannels;
        const UINT32 framesOut = dataOut.size() / frameSize;
        const UINT32 extraFrames = bufferFrameCount - framesOut;
        const UINT32 maxBytesToCopy = bufferFrameCount * frameSize;

        UINT32 bytesToCopy = dataOut.size();
        if (bytesToCopy > maxBytesToCopy)
        {
            bytesToCopy = maxBytesToCopy;
        }

        if (bytesToCopy == 0)
        {
            *flags = AUDCLNT_BUFFERFLAGS_SILENT;
        }

        memcpy(pData, dataOut.data(), bytesToCopy);
        memset(pData + bytesToCopy, 0, maxBytesToCopy - bytesToCopy);

        dataOut.erase(dataOut.begin(), dataOut.begin() + bytesToCopy);

        return hr;
    }

    HRESULT Read(const char* fileName, const WAVEFORMATEX& waveFormatOut)
    {
        HRESULT hr = S_OK;

        FILE* file = fopen(fileName, "rb");
        if (file)
        {
            fseek(file, 0, SEEK_END);
            const INT32 fileSize = ftell(file);
            rewind(file);

            WaveHeader waveFormatIn;
            fread(&waveFormatIn, sizeof(waveFormatIn), 1, file);

            WaveDataHeader dataHeader;
            do
            {
                fread(&dataHeader, sizeof(dataHeader), 1, file);

                if (dataHeader.data[0] == 'd' && dataHeader.data[1] == 'a' && dataHeader.data[2] == 't' && dataHeader.data[3] == 'a')
                {
                    break;
                }

                fseek(file, dataHeader.dataSize, SEEK_CUR);
            } while (ftell(file) != fileSize);

            BYTE* data = new BYTE[dataHeader.dataSize];
            fread(data, sizeof(data[0]), dataHeader.dataSize, file);

            dataIn.assign(data, data + dataHeader.dataSize);

            delete[] data;

            fclose(file);

            hr = Resample(waveFormatIn, waveFormatOut);
        }
        else
        {
            hr = S_FALSE;
        }

        return S_OK;
    }

private:
    HRESULT Resample(const WaveHeader& waveFormatIn, const WAVEFORMATEX& waveFormatOut)
    {
        HRESULT hr = S_OK;

        const INT32 bytesPerSampleIn = waveFormatIn.bitsPerSample / CHAR_BIT;
        const INT32 bytesPerSampleOut = waveFormatOut.wBitsPerSample / CHAR_BIT;
        const INT32 bytesPerFrameIn = bytesPerSampleIn * waveFormatIn.channels;
        const INT32 bytesPerFrameOut = bytesPerSampleOut * waveFormatOut.nChannels;

        const INT32 framesPerBytesIn = dataIn.size() / bytesPerFrameIn;
        const INT64 durationPerFramesIn = framesPerBytesIn * ReftimesPerSecond / waveFormatIn.sampleRate;
        const INT64 framesPerDurationOut = (durationPerFramesIn * waveFormatOut.nSamplesPerSec) / ReftimesPerSecond;
        const INT64 bytesPerFramesOut = bytesPerFrameOut * framesPerDurationOut;

        IMFTransform* resampler = nullptr;
        hr = CoCreateInstance(CLSID_CResamplerMediaObject, nullptr, CLSCTX_INPROC_SERVER, IID_IMFTransform, (void**)&resampler);
        EXIT_ON_ERROR(hr)

        IMFMediaType* mediaTypeIn = nullptr;
        hr = MFCreateMediaType(&mediaTypeIn);
        EXIT_ON_ERROR(hr)

        mediaTypeIn->SetGUID(MF_MT_MAJOR_TYPE, MFMediaType_Audio);
        mediaTypeIn->SetGUID(MF_MT_SUBTYPE, MFAudioFormat_PCM);
        mediaTypeIn->SetUINT32(MF_MT_AUDIO_NUM_CHANNELS, waveFormatIn.channels);
        mediaTypeIn->SetUINT32(MF_MT_AUDIO_CHANNEL_MASK, SPEAKER_FRONT_LEFT | SPEAKER_FRONT_RIGHT);
        mediaTypeIn->SetUINT32(MF_MT_AUDIO_SAMPLES_PER_SECOND, waveFormatIn.sampleRate);
        mediaTypeIn->SetUINT32(MF_MT_AUDIO_BLOCK_ALIGNMENT, bytesPerFrameIn);
        mediaTypeIn->SetUINT32(MF_MT_AUDIO_AVG_BYTES_PER_SECOND, waveFormatIn.sampleRate * bytesPerFrameIn);
        mediaTypeIn->SetUINT32(MF_MT_AUDIO_BITS_PER_SAMPLE, waveFormatIn.bitsPerSample);
        mediaTypeIn->SetUINT32(MF_MT_ALL_SAMPLES_INDEPENDENT, TRUE);

        IMFMediaType* mediaTypeOut = nullptr;
        hr = MFCreateMediaType(&mediaTypeOut);
        EXIT_ON_ERROR(hr)

        mediaTypeOut->SetGUID(MF_MT_MAJOR_TYPE, MFMediaType_Audio);
        mediaTypeOut->SetGUID(MF_MT_SUBTYPE, MFAudioFormat_Float);
        mediaTypeOut->SetUINT32(MF_MT_AUDIO_NUM_CHANNELS, waveFormatOut.nChannels);
        mediaTypeOut->SetUINT32(MF_MT_AUDIO_CHANNEL_MASK, SPEAKER_FRONT_LEFT | SPEAKER_FRONT_RIGHT);
        mediaTypeOut->SetUINT32(MF_MT_AUDIO_SAMPLES_PER_SECOND, waveFormatOut.nSamplesPerSec);
        mediaTypeOut->SetUINT32(MF_MT_AUDIO_BLOCK_ALIGNMENT, bytesPerFrameOut);
        mediaTypeOut->SetUINT32(MF_MT_AUDIO_AVG_BYTES_PER_SECOND, waveFormatOut.nSamplesPerSec * bytesPerFrameOut);
        mediaTypeOut->SetUINT32(MF_MT_AUDIO_BITS_PER_SAMPLE, waveFormatOut.wBitsPerSample);
        mediaTypeOut->SetUINT32(MF_MT_ALL_SAMPLES_INDEPENDENT, TRUE);

        DWORD inputStreamID = 0;
        hr = resampler->SetInputType(inputStreamID, mediaTypeIn, 0);
        EXIT_ON_ERROR(hr)

        hr = resampler->SetOutputType(0, mediaTypeOut, 0);
        EXIT_ON_ERROR(hr)

        MFT_OUTPUT_STREAM_INFO streamInfo;
        hr = resampler->GetOutputStreamInfo(0, &streamInfo);
        EXIT_ON_ERROR(hr)

        const bool resamplerNeedsSampleBuffer = (streamInfo.dwFlags & (MFT_OUTPUT_STREAM_PROVIDES_SAMPLES | MFT_OUTPUT_STREAM_CAN_PROVIDE_SAMPLES)) == 0;

        IMFSample* sampleIn = nullptr;
        hr = MFCreateSample(&sampleIn);
        EXIT_ON_ERROR(hr)

        IMFMediaBuffer* mediaBufferIn = nullptr;
        hr = MFCreateMemoryBuffer(dataIn.size(), &mediaBufferIn);
        EXIT_ON_ERROR(hr)

        BYTE* bufferIn = nullptr;
        DWORD maximumLength = 0;
        DWORD currentLength = 0;
        hr = mediaBufferIn->Lock(&bufferIn, &maximumLength, &currentLength);
        EXIT_ON_ERROR(hr)

        memcpy(bufferIn, dataIn.data(), dataIn.size());

        hr = mediaBufferIn->Unlock();
        EXIT_ON_ERROR(hr)

        hr = mediaBufferIn->SetCurrentLength(dataIn.size());
        EXIT_ON_ERROR(hr)

        hr = sampleIn->AddBuffer(mediaBufferIn);
        EXIT_ON_ERROR(hr)

        hr = resampler->ProcessInput(inputStreamID, sampleIn, 0);
        EXIT_ON_ERROR(hr)

        IMFSample* sampleOut = nullptr;
        if (resamplerNeedsSampleBuffer)
        {
            hr = MFCreateSample(&sampleOut);
            EXIT_ON_ERROR(hr)

            IMFMediaBuffer* mediaBufferOut = nullptr;
            hr = MFCreateMemoryBuffer(bytesPerFramesOut, &mediaBufferOut);
            EXIT_ON_ERROR(hr)

            hr = sampleOut->AddBuffer(mediaBufferOut);
            EXIT_ON_ERROR(hr)
        }

        MFT_OUTPUT_DATA_BUFFER outputDataBuffer;
        outputDataBuffer.dwStreamID = 0;
        outputDataBuffer.pEvents = nullptr;
        outputDataBuffer.dwStatus = 0;
        outputDataBuffer.pSample = resamplerNeedsSampleBuffer ? sampleOut : nullptr;

        DWORD status = 0;
        hr = resampler->ProcessOutput(0, 1, &outputDataBuffer, &status);
        EXIT_ON_ERROR(hr)

        IMFMediaBuffer* outputBuffer = nullptr;
        hr = outputDataBuffer.pSample->ConvertToContiguousBuffer(&outputBuffer);
        EXIT_ON_ERROR(hr)

        BYTE* bufferOut = nullptr;
        DWORD currentLengthOut = 0;
        hr = outputBuffer->Lock(&bufferOut, nullptr, &currentLengthOut);
        EXIT_ON_ERROR(hr)

        dataOut.assign(bufferOut, bufferOut + currentLengthOut);

        return hr;
    }

    vector<BYTE> dataIn;
    vector<BYTE> dataOut;
};

HRESULT Run()
{
    HRESULT hr = S_OK;

    hr = CoInitialize(nullptr);
    EXIT_ON_ERROR(hr)

    IMMDeviceEnumerator* deviceEnumerator = nullptr;
    hr = CoCreateInstance(__uuidof(MMDeviceEnumerator), nullptr, CLSCTX_ALL, __uuidof(IMMDeviceEnumerator), (void**)&deviceEnumerator);
    EXIT_ON_ERROR(hr)

    IMMDevice* device = nullptr;
    hr = deviceEnumerator->GetDefaultAudioEndpoint(eRender, eConsole, &device);
    EXIT_ON_ERROR(hr)

    IAudioClient* audioClient = nullptr;
    hr = device->Activate(__uuidof(IAudioClient), CLSCTX_ALL, nullptr, (void**)&audioClient);
    EXIT_ON_ERROR(hr)

    WAVEFORMATEX* waveFormat = nullptr;
    hr = audioClient->GetMixFormat(&waveFormat);
    EXIT_ON_ERROR(hr)

    const REFERENCE_TIME requestedDuration = ReftimesPerSecond;
    hr = audioClient->Initialize(AUDCLNT_SHAREMODE_SHARED, 0, requestedDuration, 0, waveFormat, nullptr);
    EXIT_ON_ERROR(hr)

    AudioSource audioSource;
    hr = audioSource.Read("../short1.wav", *waveFormat);
    EXIT_ON_ERROR(hr)

    // Get the actual size of the allocated buffer.
    UINT32 bufferFrameCount = 0;
    hr = audioClient->GetBufferSize(&bufferFrameCount);
    EXIT_ON_ERROR(hr)

    IAudioRenderClient* renderClient = nullptr;
    hr = audioClient->GetService(__uuidof(IAudioRenderClient), (void**)&renderClient);
    EXIT_ON_ERROR(hr)

    // Grab the entire buffer for the initial fill operation.
    BYTE* data = nullptr;
    hr = renderClient->GetBuffer(bufferFrameCount, &data);
    EXIT_ON_ERROR(hr)

    // Load the initial data into the shared buffer.
    DWORD flags = 0;
    hr = audioSource.LoadData(bufferFrameCount, data, &flags, *waveFormat);
    EXIT_ON_ERROR(hr)

    hr = renderClient->ReleaseBuffer(bufferFrameCount, flags);
    EXIT_ON_ERROR(hr)

    // Start playing.
    hr = audioClient->Start();
    EXIT_ON_ERROR(hr)

    // Calculate the actual duration of the allocated buffer.
    const REFERENCE_TIME actualDuration = ReftimesPerSecond * bufferFrameCount / waveFormat->nSamplesPerSec;
    const REFERENCE_TIME halfActualDurationInMilliseconds = actualDuration / ReftimesPerMillisecond / 2;

    // Each loop fills about half of the shared buffer.
    while (flags != AUDCLNT_BUFFERFLAGS_SILENT)
    {
        // Sleep for half the buffer duration.
        Sleep((DWORD)halfActualDurationInMilliseconds);

        // See how much buffer space is available.
        UINT32 paddingFrameCount = 0;
        hr = audioClient->GetCurrentPadding(&paddingFrameCount);
        EXIT_ON_ERROR(hr)

        const UINT32 availableFrameCount = bufferFrameCount - paddingFrameCount;

        // Grab all the available space in the shared buffer.
        hr = renderClient->GetBuffer(availableFrameCount, &data);
        EXIT_ON_ERROR(hr)

        // Get next 1/2-second of data from the audio source.
        hr = audioSource.LoadData(availableFrameCount, data, &flags, *waveFormat);
        EXIT_ON_ERROR(hr)

        hr = renderClient->ReleaseBuffer(availableFrameCount, flags);
        EXIT_ON_ERROR(hr)
    }

    // Wait for last data in buffer to play before stopping.
    Sleep((DWORD)halfActualDurationInMilliseconds);

    // Stop playing.
    hr = audioClient->Stop();
    EXIT_ON_ERROR(hr)

    return hr;
}

int main()
{
    Run();
}
C++ Windows 音频 音频流

评论


答: 暂无答案