Java jni调用nnom rnn-denoise 降噪
介绍:https://github.com/majianjia/nnom/blob/master/examples/rnn-denoise/README_CN.md
默认提供了一个wav的例子
#include <stdint.h>
#include <stdlib.h>
#include <stdio.h>
#include <math.h>
#include <string.h>
#include "nnom.h"
#include "denoise_weights.h"
#include "mfcc.h"
#include "wav.h"
// the bandpass filter coefficiences
#include "equalizer_coeff.h"
#define NUM_FEATURES NUM_FILTER
#define _MAX(x, y) (((x) > (y)) ? (x) : (y))
#define _MIN(x, y) (((x) < (y)) ? (x) : (y))
#define NUM_CHANNELS 1
#define SAMPLE_RATE 16000
#define AUDIO_FRAME_LEN 512
// audio buffer for input
float audio_buffer[AUDIO_FRAME_LEN] = {0};
int16_t audio_buffer_16bit[AUDIO_FRAME_LEN] = {0};
// buffer for output
int16_t audio_buffer_filtered[AUDIO_FRAME_LEN/2] = { 0 };
// mfcc features and their derivatives
float mfcc_feature[NUM_FEATURES] = { 0 };
float mfcc_feature_prev[NUM_FEATURES] = { 0 };
float mfcc_feature_diff[NUM_FEATURES] = { 0 };
float mfcc_feature_diff_prev[NUM_FEATURES] = { 0 };
float mfcc_feature_diff1[NUM_FEATURES] = { 0 };
// features for NN
float nn_features[64] = {0};
int8_t nn_features_q7[64] = {0};
// NN results, which is the gains for each frequency band
float band_gains[NUM_FILTER] = {0};
float band_gains_prev[NUM_FILTER] = {0};
// 0db gains coefficient
float coeff_b[NUM_FILTER][NUM_COEFF_PAIR] = FILTER_COEFF_B;
float coeff_a[NUM_FILTER][NUM_COEFF_PAIR] = FILTER_COEFF_A;
// dynamic gains coefficient
float b_[NUM_FILTER][NUM_COEFF_PAIR] = {0};
// update the history
void y_h_update(float *y_h, uint32_t len)
{
for (uint32_t i = len-1; i >0 ;i--)
y_h[i] = y_h[i-1];
}
// equalizer by multiple n order iir band pass filter.
// y[i] = b[0] * x[i] + b[1] * x[i - 1] + b[2] * x[i - 2] - a[1] * y[i - 1] - a[2] * y[i - 2]...
void equalizer(float* x, float* y, uint32_t signal_len, float *b, float *a, uint32_t num_band, uint32_t num_order)
{
// the y history for each band
static float y_h[NUM_FILTER][NUM_COEFF_PAIR] = { 0 };
static float x_h[NUM_COEFF_PAIR * 2] = { 0 };
uint32_t num_coeff = num_order * 2 + 1;
// i <= num_coeff (where historical x is involved in the first few points)
// combine state and new data to get a continual x input.
memcpy(x_h + num_coeff, x, num_coeff * sizeof(float));
for (uint32_t i = 0; i < num_coeff; i++)
{
y[i] = 0;
for (uint32_t n = 0; n < num_band; n++)
{
y_h_update(y_h[n], num_coeff);
y_h[n][0] = b[n * num_coeff] * x_h[i+ num_coeff];
for (uint32_t c = 1; c < num_coeff; c++)
y_h[n][0] += b[n * num_coeff + c] * x_h[num_coeff + i - c] - a[n * num_coeff + c] * y_h[n][c];
y[i] += y_h[n][0];
}
}
// store the x for the state of next round
memcpy(x_h, &x[signal_len - num_coeff], num_coeff * sizeof(float));
// i > num_coeff; the rest data not involed the x history
for (uint32_t i = num_coeff; i < signal_len; i++)
{
y[i] = 0;
for (uint32_t n = 0; n < num_band; n++)
{
y_h_update(y_h[n], num_coeff);
y_h[n][0] = b[n * num_coeff] * x[i];
for (uint32_t c = 1; c < num_coeff; c++)
y_h[n][0] += b[n * num_coeff + c] * x[i - c] - a[n * num_coeff + c] * y_h[n][c];
y[i] += y_h[n][0];
}
}
}
// set dynamic gains. Multiple gains x b_coeff
void set_gains(float *b_in, float *b_out, float* gains, uint32_t num_band, uint32_t num_order)
{
uint32_t num_coeff = num_order * 2 + 1;
for (uint32_t i = 0; i < num_band; i++)
for (uint32_t c = 0; c < num_coeff; c++)
b_out[num_coeff *i + c] = b_in[num_coeff * i + c] * gains[i]; // only need to set b.
}
void quantize_data(float*din, int8_t *dout, uint32_t size, uint32_t int_bit)
{
float limit = (1 << int_bit);
for(uint32_t i=0; i<size; i++)
dout[i] = (int8_t)(_MAX(_MIN(din[i], limit), -limit) / limit * 127);
}
void log_values(float* value, uint32_t size, FILE* f)
{
char line[16];
for (uint32_t i = 0; i < size; i++) {
snprintf(line, 16, "%f,", value[i]);
fwrite(line, strlen(line), 1, f);
}
fwrite("\n", 2, 1, f);
}
int main(int argc, char* argv[])
{
wav_header_t wav_header;
size_t size;
char* input_file = "sample.wav";
char* output_file = "filtered_sample.wav";
FILE* src_file;
FILE* des_file;
char* log_file = "log.csv";
FILE* flog = fopen(log_file, "wb");
// if user has specify input and output files.
if (argc > 1)
input_file = argv[1];
if (argc > 2)
output_file = argv[2];
src_file = fopen(input_file, "rb");
des_file = fopen(output_file, "wb");
if (src_file == NULL)
{
printf("Cannot open wav files, default input:'%s'\n", input_file);
printf("Or use command to specify input file: xxx.exe [input.wav] [output.wav]\n");
return -1;
}
if (des_file == NULL)
{
fclose(src_file);
return -1;
}
// read wav file header, copy it to the output file.
fread(&wav_header, sizeof(wav_header), 1, src_file);
fwrite(&wav_header, sizeof(wav_header), 1, des_file);
// lets jump to the "data" chunk of the WAV file.
if (strncmp(wav_header.datachunk_id, "data", 4)){
wav_chunk_t chunk = { .size= wav_header.datachunk_size};
// find the 'data' chunk
do {
char* buf = malloc(chunk.size);
fread(buf, chunk.size, 1, src_file);
fwrite(buf, chunk.size, 1, des_file);
free(buf);
fread(&chunk, sizeof(wav_chunk_t), 1, src_file);
fwrite(&chunk, sizeof(wav_chunk_t), 1, des_file);
} while (strncmp(chunk.id, "data", 4));
}
// NNoM model
nnom_model_t *model = model = nnom_model_create();
// 26 features, 0 offset, 26 bands, 512fft, 0 preempha, attached_energy_to_band0
mfcc_t * mfcc = mfcc_create(NUM_FEATURES, 0, NUM_FEATURES, 512, 0, true);
printf("\nProcessing file: %s\n", input_file);
while(1) {
// move buffer (50%) overlapping, move later 50% to the first 50, then fill
memcpy(audio_buffer_16bit, &audio_buffer_16bit[AUDIO_FRAME_LEN/2], AUDIO_FRAME_LEN/2*sizeof(int16_t));
// now read the new data
size = fread(&audio_buffer_16bit[AUDIO_FRAME_LEN / 2], AUDIO_FRAME_LEN / 2 * sizeof(int16_t), 1, src_file);
if(size == 0)
break;
// get mfcc
mfcc_compute(mfcc, audio_buffer_16bit, mfcc_feature);
//log_values(mfcc_feature, NUM_FEATURES, flog);
// get the first and second derivative of mfcc
for(uint32_t i=0; i< NUM_FEATURES; i++)
{
mfcc_feature_diff[i] = mfcc_feature[i] - mfcc_feature_prev[i];
mfcc_feature_diff1[i] = mfcc_feature_diff[i] - mfcc_feature_diff_prev[i];
}
memcpy(mfcc_feature_prev, mfcc_feature, NUM_FEATURES * sizeof(float));
memcpy(mfcc_feature_diff_prev, mfcc_feature_diff, NUM_FEATURES * sizeof(float));
// combine MFCC with derivatives
memcpy(nn_features, mfcc_feature, NUM_FEATURES*sizeof(float));
memcpy(&nn_features[NUM_FEATURES], mfcc_feature_diff, 10*sizeof(float));
memcpy(&nn_features[NUM_FEATURES+10], mfcc_feature_diff1, 10*sizeof(float));
//log_values(nn_features, NUM_FEATURES+20, flog);
// quantise them using the same scale as training data (in keras), by 2^n.
quantize_data(nn_features, nn_features_q7, NUM_FEATURES+20, 3);
// run the mode with the new input
memcpy(nnom_input_data, nn_features_q7, sizeof(nnom_input_data));
model_run(model);
// read the result, convert it back to float (q0.7 to float)
for(int i=0; i< NUM_FEATURES; i++)
band_gains[i] = (float)(nnom_output_data[i]) / 127.f;
log_values(band_gains, NUM_FILTER, flog);
// one more step, limit the change of gians, to smooth the speech, per RNNoise paper
for(int i=0; i< NUM_FEATURES; i++)
band_gains[i] = _MAX(band_gains_prev[i]*0.8f, band_gains[i]);
memcpy(band_gains_prev, band_gains, NUM_FEATURES *sizeof(float));
// apply the dynamic gains to each frequency band.
set_gains((float*)coeff_b, (float*)b_, band_gains, NUM_FILTER, NUM_ORDER);
// convert 16bit to float for equalizer
for (int i = 0; i < AUDIO_FRAME_LEN/2; i++)
audio_buffer[i] = audio_buffer_16bit[i + AUDIO_FRAME_LEN / 2] / 32768.f;
// finally, we apply the equalizer to this audio frame to denoise
equalizer(audio_buffer, &audio_buffer[AUDIO_FRAME_LEN / 2], AUDIO_FRAME_LEN/2, (float*)b_,(float*)coeff_a, NUM_FILTER, NUM_ORDER);
// convert the filtered signal back to int16
for (int i = 0; i < AUDIO_FRAME_LEN / 2; i++)
audio_buffer_filtered[i] = audio_buffer[i + AUDIO_FRAME_LEN / 2] * 32768.f *0.6f;
// write the filtered frame to WAV file.
fwrite(audio_buffer_filtered, 256*sizeof(int16_t), 1, des_file);
}
// print some model info
model_io_format(model);
model_stat(model);
model_delete(model);
fclose(flog);
fclose(src_file);
fclose(des_file);
printf("\nNoisy signal '%s' has been de-noised by NNoM.\nThe output is saved to '%s'.\n", input_file, output_file);
return 0;
}
去掉wav的信息就能解析pcm了
创建cmake 文件 编译dll
cmake_minimum_required(VERSION 3.10)
project(RnnDenoise)
set(CMAKE_CXX_STANDARD 11)
set(CMAKE_CXX_STANDARD_REQUIRED True)
include_directories(nnom-master/port/)
include_directories(nnom-master/inc/)
include_directories(nnom-master/examples/rnn-denoise/)
include_directories(D:/java/jdk1.8x64/include/)
include_directories(D:/java/jdk1.8x64/include/win32/)
set(EXPLICIT_SOURCES
RnnDenoise.c
nnom-master/examples/rnn-denoise/mfcc.c
)
file(GLOB_RECURSE SRC_FILES "nnom-master/src/*/*.c")
set(SOURCES ${EXPLICIT_SOURCES} ${SRC_FILES})
add_library(RnnDenoise SHARED ${SOURCES})
java 库封装示例
package com.lilin.demoasr.nnom;
public class RnnDenoise implements AutoCloseable{
private long rnndenoise;
public long getRnndenoise() {
return rnndenoise;
}
public void setRnndenoise(long rnndenoise) {
this.rnndenoise = rnndenoise;
}
private static final Object globalLock = new Object();
/**
*https://github.com/majianjia/nnom/blob/master/examples/rnn-denoise/
*
*
*
* @throws Exception
*/
public RnnDenoise() throws Exception {
synchronized (globalLock) {
RnnLoad.load("RnnDenoise");
}
this.rnndenoise = createRnnDenoise0();
}
private static native long createRnnDenoise0();
private native short[] denoise0(long rnndenoise,short[] var1);
/**
* 固定320 每次 可以修改c 改大
* @param input
* @return
*/
public short[] denoise(short[] input) {
// synchronized (this) {
return this.denoise0(this.rnndenoise ,input);
// }
}
private native long destroyRnnDenoise0();
public void close() {
synchronized (this) {
this.destroyRnnDenoise0();
this.rnndenoise = 0L;
}
}
public boolean isClosed() {
synchronized (this) {
return this.rnndenoise == 0L;
}
}
}
test:
public static void main (String[] args) {
String sList []= new String[]{"G:\\work\\ai\\ZipEnhancer\\r1.pcm","C:\\Users\\\\lilin\\Desktop\\16k.pcm"};
// String sList []= new String[]{"C:\\Users\\\\lilin\\Desktop\\16k.pcm"};
List< Thread> lts= new ArrayList<>();
for (int i = 0; i < sList.length; i++) {
String file =sList[i];
int finalI = i;
lts.add(new Thread(new Runnable() {
@Override
public void run() {
try {
RnnDenoise rnnDenoise = new RnnDenoise();
System.out.println(rnnDenoise.getRnndenoise());
FileInputStream f = new FileInputStream(file);
FileOutputStream f1 = new FileOutputStream("C:\\Users\\\\lilin\\Desktop\\"+ finalI +".pcm");
int n=0;
byte[] z = new byte[640];
while ((n = f.read(z)) != -1) {
short [] sa = new short[320];
ByteBuffer.wrap(z).order(ByteOrder.LITTLE_ENDIAN).asShortBuffer().get(sa);
short[] denoisedAudio = rnnDenoise.denoise(sa);
byte[] z1 = new byte[640];
ByteBuffer.wrap(z1).order(ByteOrder.LITTLE_ENDIAN).asShortBuffer().put(denoisedAudio);
f1.write(z1);
}
System.out.println(finalI+"end.");
rnnDenoise.close();
f1.close();
}catch (Exception e){e.printStackTrace();}
}
}));
}
for (Thread y: lts ) {
y.start();
}
for (Thread y: lts ) {
try{ y.join();}catch (Exception e){e.printStackTrace();}
}
System.out.println("end...");
}
}
nnom 默认的denoise_weights.h 是单例的无法同时创建多个实例 所以java无法在多线程使用, 可以自己更改下 主要涉及static变量和nnom_tensor_t 需要改用malloc的方式创建。
测试速度挺快的 ,几十分钟的很快降噪完成 ,也可以和freeswitch对接多路实时降噪 在识别,
如果模块或流程觉得麻烦可以到
https://item.taobao.com/item.htm?id=653611115230
视频教程yuexiajiayan的个人空间-yuexiajiayan个人主页-哔哩哔哩视频