提问人:Pavel Dubsky 提问时间:5/12/2023 更新时间:5/12/2023 访问量:25
IAudioClient 剪切短声音
IAudioClient cutting short sounds
问:
我尝试使用WAV文件播放,但我的行为非常奇怪。如果我尝试播放非常短的声音(大约 0.2 秒 - 0.3 秒),它们会在最后或开始时被切断。确切的行为可以是Windows 媒体播放器中的观察者。但是,如果其他应用程序正在播放声音(Google Chrome 选项卡、Spotify、音频文件在 VS Code 编辑器中打开),那么我的程序可以正确播放声音(以及 Windows Media Player)。IAudioClient
下面是一个完整的示例代码,演示了该问题(基于 https://learn.microsoft.com/en-us/windows/win32/coreaudio/rendering-a-stream)。有人可以在我的方法中提供假设或确切的问题吗?源代码很长,但它是完整的,类中的所有内容都应该正常工作,它只是文件读取和重采样逻辑。我想问题出在函数算法的某个地方。AudioSource
Run
#include <Audioclient.h>
#include <mfapi.h>
#include <mftransform.h>
#include <mmdeviceapi.h>
#include <wmcodecdsp.h>
#include <mmdeviceapi.h>
#include <vector>
#pragma comment(lib, "Mfplat.lib")
#pragma comment(lib, "Mfuuid.lib")
#pragma comment(lib, "wmcodecdspuuid.lib")
using namespace std;
#define EXIT_ON_ERROR(hres) \
if (FAILED(hres)) { return hres; }
constexpr REFERENCE_TIME ReftimesPerSecond = 10'000'000;
constexpr REFERENCE_TIME ReftimesPerMillisecond = 10'000;
struct WaveHeader
{
CHAR riff[4];
UINT32 fileSize;
CHAR wave[4];
CHAR fmt[4];
UINT32 fmtLength;
UINT16 fmtType;
UINT16 channels;
UINT32 sampleRate;
UINT32 blockAlign;
UINT16 byteRate;
UINT16 bitsPerSample;
};
struct WaveDataHeader
{
CHAR data[4];
UINT32 dataSize;
};
struct AudioSource
{
HRESULT LoadData(UINT32 bufferFrameCount, BYTE* pData, DWORD* flags, const WAVEFORMATEX& waveFormatOut)
{
HRESULT hr = S_OK;
const UINT32 frameSize = waveFormatOut.wBitsPerSample / CHAR_BIT * waveFormatOut.nChannels;
const UINT32 framesOut = dataOut.size() / frameSize;
const UINT32 extraFrames = bufferFrameCount - framesOut;
const UINT32 maxBytesToCopy = bufferFrameCount * frameSize;
UINT32 bytesToCopy = dataOut.size();
if (bytesToCopy > maxBytesToCopy)
{
bytesToCopy = maxBytesToCopy;
}
if (bytesToCopy == 0)
{
*flags = AUDCLNT_BUFFERFLAGS_SILENT;
}
memcpy(pData, dataOut.data(), bytesToCopy);
memset(pData + bytesToCopy, 0, maxBytesToCopy - bytesToCopy);
dataOut.erase(dataOut.begin(), dataOut.begin() + bytesToCopy);
return hr;
}
HRESULT Read(const char* fileName, const WAVEFORMATEX& waveFormatOut)
{
HRESULT hr = S_OK;
FILE* file = fopen(fileName, "rb");
if (file)
{
fseek(file, 0, SEEK_END);
const INT32 fileSize = ftell(file);
rewind(file);
WaveHeader waveFormatIn;
fread(&waveFormatIn, sizeof(waveFormatIn), 1, file);
WaveDataHeader dataHeader;
do
{
fread(&dataHeader, sizeof(dataHeader), 1, file);
if (dataHeader.data[0] == 'd' && dataHeader.data[1] == 'a' && dataHeader.data[2] == 't' && dataHeader.data[3] == 'a')
{
break;
}
fseek(file, dataHeader.dataSize, SEEK_CUR);
} while (ftell(file) != fileSize);
BYTE* data = new BYTE[dataHeader.dataSize];
fread(data, sizeof(data[0]), dataHeader.dataSize, file);
dataIn.assign(data, data + dataHeader.dataSize);
delete[] data;
fclose(file);
hr = Resample(waveFormatIn, waveFormatOut);
}
else
{
hr = S_FALSE;
}
return S_OK;
}
private:
HRESULT Resample(const WaveHeader& waveFormatIn, const WAVEFORMATEX& waveFormatOut)
{
HRESULT hr = S_OK;
const INT32 bytesPerSampleIn = waveFormatIn.bitsPerSample / CHAR_BIT;
const INT32 bytesPerSampleOut = waveFormatOut.wBitsPerSample / CHAR_BIT;
const INT32 bytesPerFrameIn = bytesPerSampleIn * waveFormatIn.channels;
const INT32 bytesPerFrameOut = bytesPerSampleOut * waveFormatOut.nChannels;
const INT32 framesPerBytesIn = dataIn.size() / bytesPerFrameIn;
const INT64 durationPerFramesIn = framesPerBytesIn * ReftimesPerSecond / waveFormatIn.sampleRate;
const INT64 framesPerDurationOut = (durationPerFramesIn * waveFormatOut.nSamplesPerSec) / ReftimesPerSecond;
const INT64 bytesPerFramesOut = bytesPerFrameOut * framesPerDurationOut;
IMFTransform* resampler = nullptr;
hr = CoCreateInstance(CLSID_CResamplerMediaObject, nullptr, CLSCTX_INPROC_SERVER, IID_IMFTransform, (void**)&resampler);
EXIT_ON_ERROR(hr)
IMFMediaType* mediaTypeIn = nullptr;
hr = MFCreateMediaType(&mediaTypeIn);
EXIT_ON_ERROR(hr)
mediaTypeIn->SetGUID(MF_MT_MAJOR_TYPE, MFMediaType_Audio);
mediaTypeIn->SetGUID(MF_MT_SUBTYPE, MFAudioFormat_PCM);
mediaTypeIn->SetUINT32(MF_MT_AUDIO_NUM_CHANNELS, waveFormatIn.channels);
mediaTypeIn->SetUINT32(MF_MT_AUDIO_CHANNEL_MASK, SPEAKER_FRONT_LEFT | SPEAKER_FRONT_RIGHT);
mediaTypeIn->SetUINT32(MF_MT_AUDIO_SAMPLES_PER_SECOND, waveFormatIn.sampleRate);
mediaTypeIn->SetUINT32(MF_MT_AUDIO_BLOCK_ALIGNMENT, bytesPerFrameIn);
mediaTypeIn->SetUINT32(MF_MT_AUDIO_AVG_BYTES_PER_SECOND, waveFormatIn.sampleRate * bytesPerFrameIn);
mediaTypeIn->SetUINT32(MF_MT_AUDIO_BITS_PER_SAMPLE, waveFormatIn.bitsPerSample);
mediaTypeIn->SetUINT32(MF_MT_ALL_SAMPLES_INDEPENDENT, TRUE);
IMFMediaType* mediaTypeOut = nullptr;
hr = MFCreateMediaType(&mediaTypeOut);
EXIT_ON_ERROR(hr)
mediaTypeOut->SetGUID(MF_MT_MAJOR_TYPE, MFMediaType_Audio);
mediaTypeOut->SetGUID(MF_MT_SUBTYPE, MFAudioFormat_Float);
mediaTypeOut->SetUINT32(MF_MT_AUDIO_NUM_CHANNELS, waveFormatOut.nChannels);
mediaTypeOut->SetUINT32(MF_MT_AUDIO_CHANNEL_MASK, SPEAKER_FRONT_LEFT | SPEAKER_FRONT_RIGHT);
mediaTypeOut->SetUINT32(MF_MT_AUDIO_SAMPLES_PER_SECOND, waveFormatOut.nSamplesPerSec);
mediaTypeOut->SetUINT32(MF_MT_AUDIO_BLOCK_ALIGNMENT, bytesPerFrameOut);
mediaTypeOut->SetUINT32(MF_MT_AUDIO_AVG_BYTES_PER_SECOND, waveFormatOut.nSamplesPerSec * bytesPerFrameOut);
mediaTypeOut->SetUINT32(MF_MT_AUDIO_BITS_PER_SAMPLE, waveFormatOut.wBitsPerSample);
mediaTypeOut->SetUINT32(MF_MT_ALL_SAMPLES_INDEPENDENT, TRUE);
DWORD inputStreamID = 0;
hr = resampler->SetInputType(inputStreamID, mediaTypeIn, 0);
EXIT_ON_ERROR(hr)
hr = resampler->SetOutputType(0, mediaTypeOut, 0);
EXIT_ON_ERROR(hr)
MFT_OUTPUT_STREAM_INFO streamInfo;
hr = resampler->GetOutputStreamInfo(0, &streamInfo);
EXIT_ON_ERROR(hr)
const bool resamplerNeedsSampleBuffer = (streamInfo.dwFlags & (MFT_OUTPUT_STREAM_PROVIDES_SAMPLES | MFT_OUTPUT_STREAM_CAN_PROVIDE_SAMPLES)) == 0;
IMFSample* sampleIn = nullptr;
hr = MFCreateSample(&sampleIn);
EXIT_ON_ERROR(hr)
IMFMediaBuffer* mediaBufferIn = nullptr;
hr = MFCreateMemoryBuffer(dataIn.size(), &mediaBufferIn);
EXIT_ON_ERROR(hr)
BYTE* bufferIn = nullptr;
DWORD maximumLength = 0;
DWORD currentLength = 0;
hr = mediaBufferIn->Lock(&bufferIn, &maximumLength, ¤tLength);
EXIT_ON_ERROR(hr)
memcpy(bufferIn, dataIn.data(), dataIn.size());
hr = mediaBufferIn->Unlock();
EXIT_ON_ERROR(hr)
hr = mediaBufferIn->SetCurrentLength(dataIn.size());
EXIT_ON_ERROR(hr)
hr = sampleIn->AddBuffer(mediaBufferIn);
EXIT_ON_ERROR(hr)
hr = resampler->ProcessInput(inputStreamID, sampleIn, 0);
EXIT_ON_ERROR(hr)
IMFSample* sampleOut = nullptr;
if (resamplerNeedsSampleBuffer)
{
hr = MFCreateSample(&sampleOut);
EXIT_ON_ERROR(hr)
IMFMediaBuffer* mediaBufferOut = nullptr;
hr = MFCreateMemoryBuffer(bytesPerFramesOut, &mediaBufferOut);
EXIT_ON_ERROR(hr)
hr = sampleOut->AddBuffer(mediaBufferOut);
EXIT_ON_ERROR(hr)
}
MFT_OUTPUT_DATA_BUFFER outputDataBuffer;
outputDataBuffer.dwStreamID = 0;
outputDataBuffer.pEvents = nullptr;
outputDataBuffer.dwStatus = 0;
outputDataBuffer.pSample = resamplerNeedsSampleBuffer ? sampleOut : nullptr;
DWORD status = 0;
hr = resampler->ProcessOutput(0, 1, &outputDataBuffer, &status);
EXIT_ON_ERROR(hr)
IMFMediaBuffer* outputBuffer = nullptr;
hr = outputDataBuffer.pSample->ConvertToContiguousBuffer(&outputBuffer);
EXIT_ON_ERROR(hr)
BYTE* bufferOut = nullptr;
DWORD currentLengthOut = 0;
hr = outputBuffer->Lock(&bufferOut, nullptr, ¤tLengthOut);
EXIT_ON_ERROR(hr)
dataOut.assign(bufferOut, bufferOut + currentLengthOut);
return hr;
}
vector<BYTE> dataIn;
vector<BYTE> dataOut;
};
HRESULT Run()
{
HRESULT hr = S_OK;
hr = CoInitialize(nullptr);
EXIT_ON_ERROR(hr)
IMMDeviceEnumerator* deviceEnumerator = nullptr;
hr = CoCreateInstance(__uuidof(MMDeviceEnumerator), nullptr, CLSCTX_ALL, __uuidof(IMMDeviceEnumerator), (void**)&deviceEnumerator);
EXIT_ON_ERROR(hr)
IMMDevice* device = nullptr;
hr = deviceEnumerator->GetDefaultAudioEndpoint(eRender, eConsole, &device);
EXIT_ON_ERROR(hr)
IAudioClient* audioClient = nullptr;
hr = device->Activate(__uuidof(IAudioClient), CLSCTX_ALL, nullptr, (void**)&audioClient);
EXIT_ON_ERROR(hr)
WAVEFORMATEX* waveFormat = nullptr;
hr = audioClient->GetMixFormat(&waveFormat);
EXIT_ON_ERROR(hr)
const REFERENCE_TIME requestedDuration = ReftimesPerSecond;
hr = audioClient->Initialize(AUDCLNT_SHAREMODE_SHARED, 0, requestedDuration, 0, waveFormat, nullptr);
EXIT_ON_ERROR(hr)
AudioSource audioSource;
hr = audioSource.Read("../short1.wav", *waveFormat);
EXIT_ON_ERROR(hr)
// Get the actual size of the allocated buffer.
UINT32 bufferFrameCount = 0;
hr = audioClient->GetBufferSize(&bufferFrameCount);
EXIT_ON_ERROR(hr)
IAudioRenderClient* renderClient = nullptr;
hr = audioClient->GetService(__uuidof(IAudioRenderClient), (void**)&renderClient);
EXIT_ON_ERROR(hr)
// Grab the entire buffer for the initial fill operation.
BYTE* data = nullptr;
hr = renderClient->GetBuffer(bufferFrameCount, &data);
EXIT_ON_ERROR(hr)
// Load the initial data into the shared buffer.
DWORD flags = 0;
hr = audioSource.LoadData(bufferFrameCount, data, &flags, *waveFormat);
EXIT_ON_ERROR(hr)
hr = renderClient->ReleaseBuffer(bufferFrameCount, flags);
EXIT_ON_ERROR(hr)
// Start playing.
hr = audioClient->Start();
EXIT_ON_ERROR(hr)
// Calculate the actual duration of the allocated buffer.
const REFERENCE_TIME actualDuration = ReftimesPerSecond * bufferFrameCount / waveFormat->nSamplesPerSec;
const REFERENCE_TIME halfActualDurationInMilliseconds = actualDuration / ReftimesPerMillisecond / 2;
// Each loop fills about half of the shared buffer.
while (flags != AUDCLNT_BUFFERFLAGS_SILENT)
{
// Sleep for half the buffer duration.
Sleep((DWORD)halfActualDurationInMilliseconds);
// See how much buffer space is available.
UINT32 paddingFrameCount = 0;
hr = audioClient->GetCurrentPadding(&paddingFrameCount);
EXIT_ON_ERROR(hr)
const UINT32 availableFrameCount = bufferFrameCount - paddingFrameCount;
// Grab all the available space in the shared buffer.
hr = renderClient->GetBuffer(availableFrameCount, &data);
EXIT_ON_ERROR(hr)
// Get next 1/2-second of data from the audio source.
hr = audioSource.LoadData(availableFrameCount, data, &flags, *waveFormat);
EXIT_ON_ERROR(hr)
hr = renderClient->ReleaseBuffer(availableFrameCount, flags);
EXIT_ON_ERROR(hr)
}
// Wait for last data in buffer to play before stopping.
Sleep((DWORD)halfActualDurationInMilliseconds);
// Stop playing.
hr = audioClient->Stop();
EXIT_ON_ERROR(hr)
return hr;
}
int main()
{
Run();
}
答: 暂无答案
评论