#include "sami_core.h" // help function std::vector<uint8_t> loadModelAsBinary(const std::string& path) { std::ifstream file(path, std::ios::binary | std::ios::ate); std::streamsize size = file.tellg(); file.seekg(0, std::ios::beg); std::vector<uint8_t> buffer(size); if(file.read((char*)buffer.data(), size)) { return buffer; } return {}; } // step 0, load model const std::string model_path = "/path/to/time_align.model"; std::vector<uint_8> model_buf = loadModelAsBinary(model_path); assert(model_buf.size() > 0); // step 1, create handle SAMICoreHandle executor; SAMICoreExecutorContextCreateParameter engineCreateParameter; engineCreateParameter.sampleRate = sample_rate; engineCreateParameter.maxBlockSize = sample_rate / 10 // max block size; engineCreateParameter.modelBuffer = (const char*)modelBuffer.data(); engineCreateParameter.modelLen = modelBuffer.size(); int ret = SAMICoreCreateHandleByIdentify(&executor, identify::SAMICoreIdentify_EngineExecutor_TimeAlign, &engineCreateParameter); if(ret == SAMI_ENGINE_CREATE_ERROR) { std::cerr << "Cannot create Engine handle\n"; return -1; } // step 2, create input and output audio block SAMICoreAudioBuffer mic_audio_buffer; mic_audio_buffer.isInterleave = isInterleave; mic_audio_buffer.numberChannels = num_channels_mic; mic_audio_buffer.numberSamples = max_block_size; mic_audio_buffer.data = new float*[isInterleave ? 1 : num_channels_mic]; SAMICoreAudioBuffer ref_audio_buffer; ref_audio_buffer.isInterleave = isInterleave; ref_audio_buffer.numberChannels = num_channels_ref; ref_audio_buffer.numberSamples = max_block_size; ref_audio_buffer.data = new float*[isInterleave ? 1 : num_channels_ref]; SAMICoreTimeAlignParameter timeAlignInput; timeAlignInput.mic = &mic_audio_buffer; timeAlignInput.ref = &ref_audio_buffer; SAMICoreBlock samiCoreBlock; memset(&samiCoreBlock, 0, sizeof(SAMICoreBlock)); samiCoreBlock.dataType = SAMICoreDataType_TimeAlign; samiCoreBlock.numberAudioData = 1; samiCoreBlock.audioData = &timeAlignInput; // step 3, process block by block for(;hasAudioSamples(); { copySamplesToInputBuffer(in_audio_buffer); //拷贝数据或者修改数据指针in_audio_buffer的指向 int ret = SAMICoreProcessAsync(executor, &samiCoreBlock); assert(ret == SAMI_OK); // do something after process doSomethingAfterProcess(out_block); //业务从out_block拷贝处理后的数据 } // step 4, get output SAMICoreProperty property; ret = SAMICoreGetPropertyById((SAMICoreHandle)executor, SAMICoreEngineExecutorOutPut, &property); if (ret == SAMI_ENGINE_GETOUTPUT_NO_OUTPUT) { std::cout << "Can't detect." << std::endl; } else { float delay_ms = *reinterpret_cast<float*>(property.data); std::cout << "FINAL delay " << delay_ms << " ms" << std::endl; } // step 5, remember release resource SAMICoreDestroyProperty(&property); SAMICoreDestroyHandle(executor); delete[] mic_audio_buffer.data; delete[] ref_audio_buffer.data;
即读取整个模型文件到内存,实现方法自由发挥,例子中loadModelAsBinary
仅供参考。
传入模型内存地址、模型大小、采样率和 maxBlockSize,通过 SAMICoreCreateHandleByIdentify
创建 handle。
SAMICoreHandle executor; SAMICoreExecutorContextCreateParameter engineCreateParameter; engineCreateParameter.sampleRate = sample_rate; engineCreateParameter.maxBlockSize = max_block_size; engineCreateParameter.modelBuffer = (const char*)modelBuffer.data(); engineCreateParameter.modelLen = modelBuffer.size(); int ret = SAMICoreCreateHandleByIdentify(&executor, identify::SAMICoreIdentify_EngineExecutor_TimeAlign, &engineCreateParameter); if(ret == SAMI_ENGINE_CREATE_ERROR) { std::cerr << "Cannot create Engine handle\n"; return -1; }
算法支持的音频格式
采样率与模型相关,目前仅提供了16k和44.1k采样率的模型,支持交错/非交错音频,32位浮点数,单双声道均可
有几种情况会导致创建失败:
模型数据不正确,例如模型数据损坏或者大小不对。
Block size 数据不正确。
采样率不匹配模型
SAMICoreAudioBuffer,用于存放音频数据,它支持 Planar-Float 以及 Interleaved-Float 类型数据。更多关于音频数据格式请参看名词解释一节。SAMICoreBlock,用于存放需要处理的数据。
SAMICoreAudioBuffer mic_audio_buffer; mic_audio_buffer.isInterleave = isInterleave; mic_audio_buffer.numberChannels = num_channels_mic; mic_audio_buffer.numberSamples = max_block_size; mic_audio_buffer.data = new float*[isInterleave ? 1 : num_channels_mic]; SAMICoreAudioBuffer ref_audio_buffer; ref_audio_buffer.isInterleave = isInterleave; ref_audio_buffer.numberChannels = num_channels_ref; ref_audio_buffer.numberSamples = max_block_size; ref_audio_buffer.data = new float*[isInterleave ? 1 : num_channels_ref]; SAMICoreTimeAlignParameter timeAlignInput; timeAlignInput.mic = &mic_audio_buffer; timeAlignInput.ref = &ref_audio_buffer; SAMICoreBlock samiCoreBlock; memset(&samiCoreBlock, 0, sizeof(SAMICoreBlock)); samiCoreBlock.dataType = SAMICoreDataType_TimeAlign; samiCoreBlock.numberAudioData = 1; samiCoreBlock.audioData = &timeAlignInput;
将待处理的音频数据拷贝到 in_audio_buffer
中,经过 SAMICoreProcess
处理后,结果将拷贝至 output 中。示例中采用这种方法。
for(;hasAudioSamples(); { copySamplesToInputBuffer(in_audio_buffer); //拷贝数据或者修改数据指针in_audio_buffer的指向 int ret = SAMICoreProcessAsync(executor, &samiCoreBlock); assert(ret == SAMI_OK); // do something after process doSomethingAfterProcess(out_block); //业务从out_block拷贝处理后的数据 }
有几种情况导致处理失败:
无效的 handle。handle 创建失败了,但仍然拿错误的 handle 进行 process
SAMICoreBlock 和 SAMICoreTimeAlignParameter 设置错误。
需要注意的是:当输入总体数据量较小时,结果可能精准度略差。
目前算法支持流式处理,所以支持任意block_size, 但总体数据量较小时误差可能较大或者更容易无法计算延迟。当返回值为 SAMI_ENGINE_GETOUTPUT_NO_OUTPUT 时,即没有找到匹配的片段,无法计算延迟。
SAMICoreProperty property; ret = SAMICoreGetPropertyById((SAMICoreHandle)executor, SAMICoreEngineExecutorOutPut, &property); if (ret == SAMI_ENGINE_GETOUTPUT_NO_OUTPUT) { std::cout << "Can't detect." << std::endl; } else { float delay_ms = *reinterpret_cast<float*>(property.data); std::cout << "FINAL delay " << delay_ms << " ms" << std::endl; } SAMICoreDestroyProperty(&property);
释放 handle
ret = SAMICoreDestroyHandle(handle);
此外,还要注意音频数据数据的内存释放(如果有)。例如:
SAMICoreDestroyHandle(executor); delete[] mic_audio_buffer.data; delete[] ref_audio_buffer.data;