Commit 26ffe7b3 authored by Keunwoo Choi's avatar Keunwoo Choi
Browse files

add easy featur ext

parent 9548f4b3
......@@ -5,7 +5,36 @@ Repo for paper ["Transfer learning for music classification and regression tasks
![diagram](https://github.com/keunwoochoi/transfer_learning_music/blob/master/diagram.png "diagram")
![results](https://github.com/keunwoochoi/transfer_learning_music/blob/master/results.png "results")
# Mode 1/1. To use the same feature extractor
## Prequisite (Same as mode 2 except datasets)
- [Keras 1.2.2 (OLD ONE!)](https://github.com/fchollet/keras/tree/1.2.2/keras) (*NOT THE MOST RECENT VERSION*)
- [Kapre OLD VERSION for OLD KERAS](https://github.com/keunwoochoi/kapre/tree/a3bde3e38f62fc5458231198ea2528b752fbb373) by
```
$ git clone https://github.com/keunwoochoi/kapre.git
$ cd kapre
$ git checkout a3bde3e
$ python setup.py install
```
## Usage
```
$ python easy_feature_extraction.py audio_paths.txt some/path/features.npy
```
where `audio_path.txt` are line-by-line audio paths and `some/path/features.npy` is a path to save the result.
E.g., `audio_path.txt` :
```
blah/a.mp3
blahblah/234.wav
some/other.c.mp3
```
Then load the `.npy` file. The features are size of `(num_songs, 160)`.
# Mode 2/2. To reproduce the paper
## Prequisite
* Download datasets:
......@@ -36,6 +65,7 @@ $ python setup.py install
* `2_main_knn_svm_transfer`: Do SVM
* `3. knn and svm (with AveragePooling) results plots`: Plot results
# Common
## Links
- [Train/valid/test split of MSD](https://github.com/keunwoochoi/MSD_split_for_tagging/blob/master/README.md)
- [Paper: arXiv 1703.09179, Transfer Learning for Music Classification and Regression tasks](https://arxiv.org/abs/1703.09179)
......
import sys
from argparse import Namespace
import librosa
from keras import backend as K
from models_transfer import build_convnet_model
import numpy as np
import keras
import kapre
SR = 12000 # [Hz]
LEN_SRC = 29. # [second]
ref_n_src = 12000 * 29
if keras.__version__[0] != '1':
raise RuntimeError('Keras version should be 1.x, maybe 1.2.2')
def load_model(mid_idx):
assert 0 <= mid_idx <= 4
args = Namespace(test=False, data_percent=100, model_name='', tf_type='melgram',
normalize='no', decibel=True, fmin=0.0, fmax=6000,
n_mels=96, trainable_fb=False, trainable_kernel=False,
conv_until=mid_idx)
model = build_convnet_model(args, last_layer=False)
model.load_weights('weights_transfer/weights_layer{}_{}.hdf5'.format(mid_idx, K._backend),
by_name=True)
return model
def load_audio(audio_path):
src, sr = librosa.load(audio_path, sr=SR, duration=LEN_SRC)
len_src = len(src)
if len_src < ref_n_src:
new_src = np.zeros(ref_n_src)
new_src[:len_src] = src
return new_src[np.newaxis, np.newaxis, :]
else:
return src[np.newaxis, np.newaxis, :ref_n_src]
def main(txt_path, out_path):
models = [load_model(mid_idx) for mid_idx in range(5)] # for five models...
all_features = []
with open(txt_path) as f_path:
for line in f_path:
path = line.rstrip('\n')
print('Loading/extracting {}...'.format(path))
src = load_audio(path)
features = [models[i].predict(src)[0] for i in range(5)]
all_features.append(np.concatenate(features, axis=0))
all_features = np.array(all_features, dtype=np.float32)
print('Saving all features at {}..'.format(out_path))
np.save(out_path, all_features)
print('Done. Saved a numpy array size of (%d, %d)' % all_features.shape)
def warning():
print('-' * 65)
print(' * Python 2.7-ish')
print(' * Keras 1.2.2,')
print(' * Kapre old one (git checkout a3bde3e)')
print(" * Read README.md. Come on, it's short..")
print('')
print(' Usage: ')
print('$ python easy_feature_extraction.py audio_paths.txt features.npy')
print ''
print(' , where audio_path.txt is for paths audio line-by-line')
print(' and features.npy is the path to store result feature array.')
print('-' * 65)
if __name__ == '__main__':
warning()
txt_path = sys.argv[1]
out_path = sys.argv[2]
main(txt_path, out_path)
......@@ -25,7 +25,6 @@ SR = 12000
def build_convnet_model(args, last_layer=True, sr=None):
''' '''
start = time.time()
# ------------------------------------------------------------------#
tf = args.tf_type
normalize = args.normalize
......@@ -34,19 +33,9 @@ def build_convnet_model(args, last_layer=True, sr=None):
decibel = args.decibel
model = raw_vgg(args, tf=tf, normalize=normalize, decibel=decibel,
last_layer=last_layer, sr=sr)
for layer in model.layers:
if layer.name in ['ConvBNEluDr']:
layer.summary()
layer.count_params()
model.summary()
model.count_params()
print ' ---->>--- ready to compile keras model ---'
model.compile(optimizer=keras.optimizers.Adam(lr=5e-3),
loss='binary_crossentropy')
print "--- keras model was built, took %d seconds ---" % (time.time() - start)
# pdb.set_trace()
return model
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment