-
Notifications
You must be signed in to change notification settings - Fork 0
Feature extraction corrections #1
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
8849c87
951c8f8
0cac0cd
0513dcb
2857f02
f7b678f
9d4a686
05787ba
d57e872
405c1f2
b9b1c21
c7f9c70
89ebc4c
a39f114
836777f
dc3ec8a
6bcc917
16f1e7e
4af2d0e
f2d026e
af33fc6
289c7d4
644ed2a
b5dadd8
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,7 @@ | ||
| onnx/out | ||
| onnx/.vs/ | ||
| onnx/libs/ | ||
| onnx/test/.vs/ | ||
| onnx/test/out/ | ||
| onnx/test/Audiofile/ | ||
| src/BeatNet/__pycache__/ |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -70,9 +70,8 @@ BeatNet::BeatNet( | |
| env(nullptr), session(nullptr), session_options(nullptr), | ||
| memory_info(nullptr), allocator(nullptr), run_options(nullptr), | ||
| input_name(nullptr), output_name(nullptr), | ||
| signal_processor(FRAME_LENGTH, HOP_SIZE), | ||
| fft_processor(FRAME_LENGTH, FFT_SIZE, FRAME_SIZE_POW2), | ||
| filterbank_processor(BANKS_PER_OCTAVE, FFT_SIZE, SR_BEATNET, 30.0f, 11025.0f, true, true), | ||
| fft_processor(FRAME_LENGTH, FFT_SIZE, FRAME_LENGTH), | ||
| filterbank_processor(BANKS_PER_OCTAVE, FFT_SIZE, SR_BEATNET, 30.0f, 17000.0f, true, true), | ||
| SR(0),bufferSize(0) | ||
| { | ||
|
|
||
|
|
@@ -136,18 +135,38 @@ void BeatNet::setup(double sampleRate, int samplesPerBlock) { | |
|
|
||
| bool BeatNet::preprocess(const std::vector<float>& raw_input, std::vector<float>& preprocessed_input) { | ||
|
|
||
| std::vector<float> resampled = resampler.resample(raw_input); | ||
| std::vector<float> frame; | ||
| bool valid_frame = signal_processor.process(resampled,frame); | ||
| if (!valid_frame) { | ||
| // std::cout<<"invalid frame and will be invalid for the first ~"<<FRAME_LENGTH/resampled.size()-1<<" frames"<<std::endl; | ||
| return false; | ||
| } | ||
|
|
||
| spectrum = fft_processor.compute_fft(frame); | ||
| filters = filterbank_processor.apply(spectrum); | ||
| log_fb = log_compress(filters); | ||
| diff = spectral_diff(log_fb, prev_log_fb); | ||
| std::vector<float> resampledSignal = resampler.resample(raw_input); | ||
|
|
||
| // slice original signal to Frames | ||
| const int nFrames = 4; | ||
|
Owner
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Prefer defining hyperparameters outside functions. Especially since this is a fixed value, you can either define it using |
||
| FramedSignal framedSignal{ resampledSignal , nFrames, FRAME_LENGTH, HOP_SIZE }; | ||
|
Owner
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
At first, because the You should declare the object on the header, initialize it on the |
||
|
|
||
| // spectral difference | ||
| // last frame | ||
| auto frame_3 = framedSignal[3]; | ||
| auto spectrum_3 = fft_processor.compute_fft(frame_3); | ||
| auto filters_3 = filterbank_processor.apply(spectrum_3); | ||
| auto log_compress_3 = log_compress(filters_3); | ||
| log_fb = std::move(log_compress_3); | ||
|
|
||
| // frame before | ||
| auto frame_2 = framedSignal[2]; | ||
| auto spectrum_2 = fft_processor.compute_fft(frame_2); | ||
| auto filters_2 = filterbank_processor.apply(spectrum_2); | ||
| auto log_compress_2 = log_compress(filters_2); | ||
| prev_log_fb = std::move(log_compress_2); | ||
|
|
||
| // diff = log_fb3 - log_fb2 | ||
| diff.assign(log_fb.size(), 0.0f); | ||
| std::transform(log_fb.begin(), log_fb.end(), prev_log_fb.begin(), | ||
| diff.begin(), std::minus()); | ||
|
|
||
| // replace negative values with zero | ||
| std::replace_if(diff.begin(), diff.end(), | ||
| [](float x) {return x < 0.0f; }, | ||
| 0.0f); | ||
|
|
||
| // stack log spectrum and spectral difference | ||
| hstack(log_fb, diff, preprocessed_input); | ||
| return true; | ||
| } | ||
|
|
@@ -191,7 +210,7 @@ void BeatNet::inference(std::vector<float>& output) { | |
| output[i] = output_data[i]; | ||
| } | ||
|
|
||
| printOutputShape(output_tensor); | ||
| // printOutputShape(output_tensor); | ||
|
|
||
| ReleaseValue(input_tensor); | ||
| ReleaseValue(output_tensor); | ||
|
|
@@ -214,4 +233,4 @@ void BeatNet::printOutputShape(OrtValue* output_tensor) { | |
| std::cout << "]" << std::endl; | ||
|
|
||
| ReleaseTensorTypeAndShapeInfo(shape_info); | ||
| } | ||
| } | ||
| Original file line number | Diff line number | Diff line change | ||
|---|---|---|---|---|
| @@ -0,0 +1,56 @@ | ||||
| #include "framedSignal.h" | ||||
| #include <algorithm> | ||||
| #include <stdexcept> | ||||
| #include "iostream" | ||||
|
|
||||
| FramedSignal::FramedSignal(const std::vector<float>& inputSignal, int nFrames, int frameSize, int hopSize) | ||||
| : original_signal(inputSignal), | ||||
| nFrames(nFrames), | ||||
| frameSize(frameSize), | ||||
| hopSize(hopSize) | ||||
| { | ||||
| int nMax = ((nFrames -1) * hopSize) + frameSize; | ||||
| padded_signal.assign(nMax, 0.0f); | ||||
|
|
||||
| { | ||||
| auto s0 = original_signal.begin(); | ||||
| auto sEnd = original_signal.end(); | ||||
| auto destination = padded_signal.begin() + frameSize / 2; | ||||
|
|
||||
| int i = frameSize / 2; | ||||
|
|
||||
| std::copy_if(s0, sEnd, destination, | ||||
| [&i, nMax](float x) | ||||
| { | ||||
| return i++ < nMax; | ||||
| }); | ||||
| } | ||||
|
|
||||
| for (int iFrame = 0, index = 0; iFrame < nFrames; iFrame++, index += hopSize) | ||||
| { | ||||
| auto i0 = padded_signal.begin() + index; | ||||
|
|
||||
| std::vector<float> signal(i0, i0 + frameSize); | ||||
| frames.push_back(signal); | ||||
| } | ||||
| } | ||||
|
Comment on lines
+11
to
+36
Owner
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Implementing the framing logic on the constructor results in the This logic should be transferred to a Line 10 in 1985e3d
There's an input buffer coming in, in each call, and an output frame coming out, while the function returns true if a valid frame is produced (simply because the first buffers wont be enough in length to produce a full frame) |
||||
|
|
||||
| FramedSignal::~FramedSignal() | ||||
| { | ||||
|
|
||||
| } | ||||
|
|
||||
| std::vector<float> FramedSignal::operator[](int i) | ||||
| { | ||||
| return frames.at(i); | ||||
| } | ||||
|
|
||||
| std::vector<float> FramedSignal::getOriginalSignal() | ||||
| { | ||||
| return original_signal; | ||||
| } | ||||
|
|
||||
| int FramedSignal::get_nFrames() | ||||
| { | ||||
| return nFrames; | ||||
| } | ||||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,26 @@ | ||
| #ifndef FRAMEDSIGNAL_H | ||
| #define FRAMEDSIGNAL_H | ||
|
|
||
| #include <vector> | ||
|
|
||
| class FramedSignal { | ||
| public: | ||
|
|
||
| FramedSignal(const std::vector<float>& inputSignal, int nFrames, int frameSize, int hopSize); | ||
| ~FramedSignal(); | ||
|
|
||
| std::vector<float> operator[](int i); | ||
| std::vector<float> getOriginalSignal(); | ||
| int get_nFrames(); | ||
|
Comment on lines
+13
to
+14
Owner
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. These functions are not used. Should they be removed? |
||
|
|
||
| private: | ||
| std::vector<float> original_signal; | ||
| int nFrames; | ||
| int frameSize; | ||
| int hopSize; | ||
|
|
||
| std::vector<float> padded_signal; | ||
| std::vector<std::vector<float>> frames; | ||
| }; | ||
|
|
||
| #endif | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
You should probably keep this, to make sure that during the first calls of the function, where the first frame that is currently under formation while collecting the first buffers, will return false, aborting the inference of the model. Simply put, in such case, there is not yet a valid input signal to pass to the model.