-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathspectrogram.cu
More file actions
101 lines (80 loc) · 2.86 KB
/
spectrogram.cu
File metadata and controls
101 lines (80 loc) · 2.86 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
// cuda_spectrogram.cu
#include <cuda_runtime.h>
#include <iostream>
#include <fstream>
#include <cmath>
#include <vector>
#include <complex>
#define BLOCK_SIZE 256
#define WINDOW_SIZE 512
#define HOP_SIZE 256 // 50% overlap
__device__ float hann(int n, int N) {
return 0.5f * (1.0f - cosf(2.0f * M_PI * n / (N - 1)));
}
__global__ void stftKernel(const float* input, float* output, int numFrames, int frameSize, int hopSize) {
int frame = blockIdx.x;
int k = threadIdx.x;
if (frame >= numFrames || k >= frameSize)
return;
int start = frame * hopSize;
__shared__ float windowed[WINDOW_SIZE];
// Windowed input
if (start + k < numFrames * hopSize)
windowed[k] = input[start + k] * hann(k, frameSize);
else
windowed[k] = 0.0f;
__syncthreads();
// Compute DFT (naive, slow but OK for small window)
float real = 0.0f, imag = 0.0f;
for (int n = 0; n < frameSize; n++) {
float angle = 2.0f * M_PI * k * n / frameSize;
real += windowed[n] * cosf(angle);
imag -= windowed[n] * sinf(angle);
}
float mag = real * real + imag * imag;
output[frame * frameSize + k] = mag; // power spectrum
}
void readRawAudio(const char* filename, std::vector<float>& data) {
std::ifstream f(filename, std::ios::binary);
if (!f) {
std::cerr << "Error: Cannot open input audio\n";
exit(1);
}
f.seekg(0, std::ios::end);
size_t size = f.tellg() / sizeof(float);
f.seekg(0);
data.resize(size);
f.read(reinterpret_cast<char*>(data.data()), size * sizeof(float));
f.close();
}
void writeSpectrogram(const char* filename, const std::vector<float>& spec) {
std::ofstream f(filename, std::ios::binary);
f.write(reinterpret_cast<const char*>(spec.data()), spec.size() * sizeof(float));
f.close();
}
int main(int argc, char** argv) {
if (argc != 3) {
std::cout << "Usage: ./spectrogram input.raw output.raw\n";
return 1;
}
std::vector<float> audio;
readRawAudio(argv[1], audio);
int frameSize = WINDOW_SIZE;
int hopSize = HOP_SIZE;
int numFrames = (audio.size() - frameSize) / hopSize;
size_t inputBytes = audio.size() * sizeof(float);
size_t outputBytes = numFrames * frameSize * sizeof(float);
float *d_input, *d_output;
cudaMalloc(&d_input, inputBytes);
cudaMalloc(&d_output, outputBytes);
cudaMemcpy(d_input, audio.data(), inputBytes, cudaMemcpyHostToDevice);
stftKernel<<<numFrames, frameSize>>>(d_input, d_output, numFrames, frameSize, hopSize);
cudaDeviceSynchronize();
std::vector<float> spectrogram(numFrames * frameSize);
cudaMemcpy(spectrogram.data(), d_output, outputBytes, cudaMemcpyDeviceToHost);
writeSpectrogram(argv[2], spectrogram);
cudaFree(d_input);
cudaFree(d_output);
std::cout << "Spectrogram written to " << argv[2] << "\n";
return 0;
}