From ca58c530463a9f659a3b0ee0a53774d3872dc1c6 Mon Sep 17 00:00:00 2001
From: BB zhang <zhtclz@foxmail.com>
Date: Mon, 1 Oct 2018 11:45:27 +0800
Subject: [PATCH 1/2] export powerspec API

---
 python_speech_features/base.py | 37 ++++++++++++++++++++++++++++++----
 1 file changed, 33 insertions(+), 4 deletions(-)

diff --git a/python_speech_features/base.py b/python_speech_features/base.py
index 4161899..460849d 100644
--- a/python_speech_features/base.py
+++ b/python_speech_features/base.py
@@ -32,6 +32,28 @@ def mfcc(signal,samplerate=16000,winlen=0.025,winstep=0.01,numcep=13,
     if appendEnergy: feat[:,0] = numpy.log(energy) # replace first cepstral coefficient with log of frame energy
     return feat
 
+def powerspec(signal,samplerate=16000,winlen=0.025,winstep=0.01,
+          nfft=512, lowfreq=0,highfreq=None, preemph=0.97,
+          winfunc=lambda x:numpy.ones((x,))):
+    """Compute power spectorgram features from an audio signal.
+
+    :param signal: the audio signal from which to compute features. Should be an N*1 array
+    :param samplerate: the samplerate of the signal we are working with.
+    :param winlen: the length of the analysis window in seconds. Default is 0.025s (25 milliseconds)
+    :param winstep: the step between successive windows in seconds. Default is 0.01s (10 milliseconds)
+    :param nfft: the FFT size. Default is 512.
+    :param lowfreq: lowest band edge of mel filters. In Hz, default is 0.
+    :param highfreq: highest band edge of mel filters. In Hz, default is samplerate/2
+    :param preemph: apply preemphasis filter with preemph as coefficient. 0 is no filter. Default is 0.97.
+    :param winfunc: the analysis window to apply to each frame. By default no window is applied. You can use numpy window functions here e.g. winfunc=numpy.hamming
+    :returns:  first is a numpy array of size (NUMFRAMES by nfft) containing power spectrogram. 
+    """
+    highfreq= highfreq or samplerate/2
+    signal = sigproc.preemphasis(signal, preemph)
+    frames = sigproc.framesig(signal, winlen*samplerate, winstep*samplerate, winfunc)
+    pspec = sigproc.powspec(frames,nfft)
+    return pspec
+ 
 def fbank(signal,samplerate=16000,winlen=0.025,winstep=0.01,
           nfilt=26,nfft=512,lowfreq=0,highfreq=None,preemph=0.97,
           winfunc=lambda x:numpy.ones((x,))):
@@ -50,10 +72,7 @@ def fbank(signal,samplerate=16000,winlen=0.025,winstep=0.01,
     :returns: 2 values. The first is a numpy array of size (NUMFRAMES by nfilt) containing features. Each row holds 1 feature vector. The
         second return value is the energy in each frame (total energy, unwindowed)
     """
-    highfreq= highfreq or samplerate/2
-    signal = sigproc.preemphasis(signal,preemph)
-    frames = sigproc.framesig(signal, winlen*samplerate, winstep*samplerate, winfunc)
-    pspec = sigproc.powspec(frames,nfft)
+    pspec = powerspec(signal, samplerate, winlen, winstep, nfft, lowfreq, highfreq, preemph, winfunc) 
     energy = numpy.sum(pspec,1) # this stores the total energy in each frame
     energy = numpy.where(energy == 0,numpy.finfo(float).eps,energy) # if energy is zero, we get problems with log
 
@@ -83,6 +102,16 @@ def logfbank(signal,samplerate=16000,winlen=0.025,winstep=0.01,
     feat,energy = fbank(signal,samplerate,winlen,winstep,nfilt,nfft,lowfreq,highfreq,preemph,winfunc)
     return numpy.log(feat)
 
+def logfbank_from_powspec(pspec, samplerate=16000, nfilt=26, nfft=512, lowfreq=0,highfreq=None):
+    energy = numpy.sum(pspec,1) # this stores the total energy in each frame
+    energy = numpy.where(energy == 0, numpy.finfo(float).eps, energy) # if energy is zero, we get problems with log
+
+    fb = get_filterbanks(nfilt,nfft,samplerate,lowfreq,highfreq)
+    feat = numpy.dot(pspec,fb.T) # compute the filterbank energies
+    feat = numpy.where(feat == 0, numpy.finfo(float).eps, feat) # if feat is zero, we get problems with log
+
+    return numpy.log(feat)
+
 def ssc(signal,samplerate=16000,winlen=0.025,winstep=0.01,
         nfilt=26,nfft=512,lowfreq=0,highfreq=None,preemph=0.97,
         winfunc=lambda x:numpy.ones((x,))):

From 22ae31eae1a0516d2596ce17b91de738e612720a Mon Sep 17 00:00:00 2001
From: gaoyonghu <gaoyonghu@didichuxing.com>
Date: Mon, 1 Oct 2018 16:42:36 +0800
Subject: [PATCH 2/2] add librosa examples

---
 example.py | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

diff --git a/example.py b/example.py
index 4441acf..5bf8107 100644
--- a/example.py
+++ b/example.py
@@ -1,5 +1,6 @@
 #!/usr/bin/env python
 
+import librosa
 from python_speech_features import mfcc
 from python_speech_features import delta
 from python_speech_features import logfbank
@@ -11,3 +12,18 @@
 fbank_feat = logfbank(sig,rate)
 
 print(fbank_feat[1:3,:])
+
+
+# stride = 10 ms
+# sample_rate = 8K
+# hop_length = stride * sample_rate 
+assert librosa.samples_to_frames(1200000, hop_length=80) == 15000
+assert librosa.samples_to_frames(1280000, hop_length=80) == 16000
+assert librosa.frames_to_samples(16000, hop_length=80) == 1280000
+
+assert librosa.time_to_frames(10, sr=8000, hop_length=80) == 1000
+assert librosa.time_to_frames(300.29, sr=8000, hop_length=80) == 30029
+assert librosa.frames_to_time(30029, hop_length=80, sr=8000) ==300.29 
+
+assert librosa.samples_to_times(80000, sr=8000) == 10.0
+assert librosa.time_to_samples(10.0, sr=8000) == 80000