From f0713775974831ff502d92c961826124991ca9b1 Mon Sep 17 00:00:00 2001 From: Lukas Pfahler Date: Tue, 5 Dec 2017 10:21:04 +0100 Subject: [PATCH 01/77] changes for python3.6 --- run_expr.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/run_expr.py b/run_expr.py index f2ba50e..c66e35d 100644 --- a/run_expr.py +++ b/run_expr.py @@ -37,7 +37,7 @@ assert keras.backend.backend() == u'tensorflow', \ "Requires Tensorflow (>=1.2.1)." -assert hasGPU(), "Requires GPU." +#assert hasGPU(), "Requires GPU." parser = argparse.ArgumentParser(description='Run EigenPro tests.') parser.add_argument('--kernel', type=str, default='Gaussian', @@ -128,7 +128,7 @@ # Assemble SGD trainer. rff_weights = np.float32( # for Gaussian kernel np.sqrt(2. / (2 * 5 ** 2)) # s = 5 - * np.random.randn(D, d/2)) + * np.random.randn(D, d>>1)) input_shape = (D,) x = Input(shape=input_shape, dtype='float32', name='feat') rf_f = RFF(rff_weights, input_shape=input_shape) @@ -159,7 +159,7 @@ model=model, x_train = x_train, x_test=x_test) # Start training. -for name, trainer in trainers.iteritems(): +for name, trainer in trainers.items(): print("") initial_epoch=0 np.random.seed(1) # Keras uses numpy random number generator From 6580510807ce754991ac4e6076a8c6bd95b10e72 Mon Sep 17 00:00:00 2001 From: Lukas Pfahler Date: Tue, 5 Dec 2017 10:31:17 +0100 Subject: [PATCH 02/77] bla --- run_expr.py | 1 + 1 file changed, 1 insertion(+) diff --git a/run_expr.py b/run_expr.py index c66e35d..1c882d8 100644 --- a/run_expr.py +++ b/run_expr.py @@ -166,6 +166,7 @@ train_ts = 0 # training time in seconds for epoch in [1, 2, 5, 10, 20, 40]: start = time.time() + print("Running ",epoch," Iterations on ",name) trainer.model.fit( trainer.x_train, y_train, batch_size=bs, epochs=epoch, verbose=0, From 06248b0ba5023dbe4574aaa2e02588b5c7d5c8bf Mon Sep 17 00:00:00 2001 From: Lukas Pfahler Date: Tue, 5 Dec 2017 11:08:33 +0100 Subject: [PATCH 03/77] cifar10 --- run_expr.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/run_expr.py b/run_expr.py index 1c882d8..ebd128a 100644 --- a/run_expr.py +++ b/run_expr.py @@ -16,6 +16,7 @@ from distutils.version import StrictVersion from keras.layers import Dense, Input from keras.models import Model +from keras.datasets import cifar10 from keras import backend as K import kernels @@ -53,7 +54,12 @@ num_classes = 10 # number of classes -(x_train, y_train), (x_test, y_test) = mnist.load() +(x_train, y_train), (x_test, y_test) = cifar10.load_data() +print('x_train shape:', x_train.shape) +print(x_train.shape[0], 'train samples') +print(x_test.shape[0], 'test samples') +x_train = x_train.reshape((x_train.shape[0],-1)) +x_test = x_test.reshape((x_test.shape[0],-1)) n, D = x_train.shape # (n_sample, n_feature) d = np.int32(n / 2) * 2 # number of random features From 55a0f4658b03adc5c4923df52c37d8baa7750b71 Mon Sep 17 00:00:00 2001 From: Lukas Pfahler Date: Tue, 5 Dec 2017 11:24:19 +0100 Subject: [PATCH 04/77] cifar10 --- run_expr.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/run_expr.py b/run_expr.py index ebd128a..c0b3a0a 100644 --- a/run_expr.py +++ b/run_expr.py @@ -12,6 +12,7 @@ import numpy as np import time import warnings +from math import sqrt from distutils.version import StrictVersion from keras.layers import Dense, Input @@ -68,7 +69,7 @@ y_test = keras.utils.to_categorical(y_test, num_classes) if args_dict['kernel'] == 'Gaussian': - s = 5 # kernel bandwidth + s = sqrt(D) # kernel bandwidth kernel = lambda x,y: kernels.Gaussian(x, y, s) elif args_dict['kernel'] == 'Laplace': From ac3132ad328ad817b252445e92ec993dae0f3a36 Mon Sep 17 00:00:00 2001 From: Lukas Pfahler Date: Tue, 5 Dec 2017 14:28:41 +0100 Subject: [PATCH 05/77] kernel experiment --- kernels.py | 20 ++++++++++++++++++++ run_expr.py | 14 ++++++++++++-- 2 files changed, 32 insertions(+), 2 deletions(-) diff --git a/kernels.py b/kernels.py index ef6803f..8a3e587 100644 --- a/kernels.py +++ b/kernels.py @@ -13,6 +13,7 @@ def D2(X, Y): pointwise distances (n_sample, n_center). """ XX = K.sum(K.square(X), axis = 1, keepdims=True) + print(XX) if X is Y: YY = XX else: @@ -76,3 +77,22 @@ def Cauchy(X, Y, s): s2 = np.float32(s**2) G = 1 / K.exp( 1 + K.clip(d2, 0, None) / s2) return G + +import tensorflow as tf + +def ReLu(X,Y,s): + s = np.float32(s) + XX = K.sum(K.square(X), axis = 1, keepdims=True) + YY = K.sum(K.square(Y), axis = 1, keepdims=True) + XX = K.reshape(XX, (K.shape(X)[0], 1)) + YY = K.reshape(YY, (1,K.shape(Y)[0])) + xx = K.sqrt(XX/s+np.float32(1.0)) + yy = K.sqrt(YY/s+np.float32(1.0)) + XY = K.dot(X, K.transpose(Y)) + cos = (XY/s + np.float32(1.0)) / (xx*yy) + cos = K.clip(cos,-1,1) + delta = tf.acos(cos) + pi = np.float32(3.14159265359) + d = np.float32(1024*3)#K.int_shape(X)[1]) + return (xx*yy)*((pi-delta)*K.cos(delta)+K.sin(delta))/(2*d*pi) + diff --git a/run_expr.py b/run_expr.py index c0b3a0a..a20def4 100644 --- a/run_expr.py +++ b/run_expr.py @@ -56,12 +56,20 @@ num_classes = 10 # number of classes (x_train, y_train), (x_test, y_test) = cifar10.load_data() -print('x_train shape:', x_train.shape) + print(x_train.shape[0], 'train samples') print(x_test.shape[0], 'test samples') +print(type(x_train)) + x_train = x_train.reshape((x_train.shape[0],-1)) x_test = x_test.reshape((x_test.shape[0],-1)) +print('x_train shape:', x_train.shape) +print(x_train[0],x_train.dtype) n, D = x_train.shape # (n_sample, n_feature) +x_train = np.divide(x_train,255.0) +x_test = np.divide(x_test,255.0) + + d = np.int32(n / 2) * 2 # number of random features # convert class vectors to binary class matrices @@ -79,7 +87,9 @@ elif args_dict['kernel'] == 'Cauchy': s = np.sqrt(40, dtype=np.float32) kernel = lambda x,y: kernels.Cauchy(x, y, s) - +elif args_dict['kernel'] == 'ReLu': + s = D^2 + kernel = lambda x,y: kernels.ReLu(x, y, s) else: raise Exception("Unknown kernel function - %s. \ Try Gaussian, Laplace, or Cauchy" From 0d08d373a2d9df62adab2913ffd79f8c757b7c0c Mon Sep 17 00:00:00 2001 From: Lukas Pfahler Date: Tue, 5 Dec 2017 14:33:55 +0100 Subject: [PATCH 06/77] kernel experiment --- run_expr.py | 36 ++++++++++++++++++------------------ 1 file changed, 18 insertions(+), 18 deletions(-) diff --git a/run_expr.py b/run_expr.py index a20def4..1f96770 100644 --- a/run_expr.py +++ b/run_expr.py @@ -105,24 +105,24 @@ eta = np.float32(1.5 / s0) # 1.5 / s0 eta = eta * num_classes # correction due to mse loss -# Assemble Pegasos trainer. -input_shape = (D+1,) # n_feature, (sample) index -ix = Input(shape=input_shape, dtype='float32', name='indexed-feat') -x, index = utils.separate_index(ix) # features, sample_id -kfeat = KernelEmbedding(kernel, x_train, - input_shape=(D,))(x) -y = Dense(num_classes, input_shape=(n,), - activation='linear', - kernel_initializer='zeros', - use_bias=False)(kfeat) - -model = Model(ix, y) -model.compile(loss='mse', - optimizer=PSGD(pred_t=y, index_t=index, eta=eta), - metrics=['accuracy']) -trainers['Pegasos'] = Trainer(model=model, - x_train = utils.add_index(x_train), - x_test=utils.add_index(x_test)) +# # Assemble Pegasos trainer. +# input_shape = (D+1,) # n_feature, (sample) index +# ix = Input(shape=input_shape, dtype='float32', name='indexed-feat') +# x, index = utils.separate_index(ix) # features, sample_id +# kfeat = KernelEmbedding(kernel, x_train, +# input_shape=(D,))(x) +# y = Dense(num_classes, input_shape=(n,), +# activation='linear', +# kernel_initializer='zeros', +# use_bias=False)(kfeat) + +# model = Model(ix, y) +# model.compile(loss='mse', +# optimizer=PSGD(pred_t=y, index_t=index, eta=eta), +# metrics=['accuracy']) +# trainers['Pegasos'] = Trainer(model=model, +# x_train = utils.add_index(x_train), +# x_test=utils.add_index(x_test)) # Assemble kernel EigenPro trainer. embed = Model(ix, kfeat) From 40b2efe68913b68321c31f96d7ee0b1cc04f7dbb Mon Sep 17 00:00:00 2001 From: Lukas Pfahler Date: Tue, 5 Dec 2017 14:37:26 +0100 Subject: [PATCH 07/77] kernel experiment --- run_expr.py | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/run_expr.py b/run_expr.py index 1f96770..1adf1e5 100644 --- a/run_expr.py +++ b/run_expr.py @@ -88,7 +88,8 @@ s = np.sqrt(40, dtype=np.float32) kernel = lambda x,y: kernels.Cauchy(x, y, s) elif args_dict['kernel'] == 'ReLu': - s = D^2 + print("AAAH",np.linalg.norm(x_train,2,axis=0).shape) + s = np.max(np.linalg.norm(x_train,2,axis=0))**2 kernel = lambda x,y: kernels.ReLu(x, y, s) else: raise Exception("Unknown kernel function - %s. \ @@ -107,10 +108,11 @@ # # Assemble Pegasos trainer. # input_shape = (D+1,) # n_feature, (sample) index -# ix = Input(shape=input_shape, dtype='float32', name='indexed-feat') -# x, index = utils.separate_index(ix) # features, sample_id -# kfeat = KernelEmbedding(kernel, x_train, -# input_shape=(D,))(x) +ix = Input(shape=input_shape, dtype='float32', name='indexed-feat') +x, index = utils.separate_index(ix) # features, sample_id + kfeat = KernelEmbedding(kernel, x_train, + input_shape=(D,))(x) + # y = Dense(num_classes, input_shape=(n,), # activation='linear', # kernel_initializer='zeros', From 6b4228a47cc0fe85a0fbee363d3b62682223710b Mon Sep 17 00:00:00 2001 From: Lukas Pfahler Date: Tue, 5 Dec 2017 14:41:06 +0100 Subject: [PATCH 08/77] kernel experiment --- run_expr.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/run_expr.py b/run_expr.py index 1adf1e5..653827e 100644 --- a/run_expr.py +++ b/run_expr.py @@ -88,8 +88,8 @@ s = np.sqrt(40, dtype=np.float32) kernel = lambda x,y: kernels.Cauchy(x, y, s) elif args_dict['kernel'] == 'ReLu': - print("AAAH",np.linalg.norm(x_train,2,axis=0).shape) - s = np.max(np.linalg.norm(x_train,2,axis=0))**2 + print("AAAH",np.linalg.norm(x_train,2,axis=1).shape) + s = np.max(np.linalg.norm(x_train,2,axis=1))**2 kernel = lambda x,y: kernels.ReLu(x, y, s) else: raise Exception("Unknown kernel function - %s. \ @@ -110,7 +110,7 @@ # input_shape = (D+1,) # n_feature, (sample) index ix = Input(shape=input_shape, dtype='float32', name='indexed-feat') x, index = utils.separate_index(ix) # features, sample_id - kfeat = KernelEmbedding(kernel, x_train, +kfeat = KernelEmbedding(kernel, x_train, input_shape=(D,))(x) # y = Dense(num_classes, input_shape=(n,), From eaa3ab9f31202a1fe16b9575300944d56640a620 Mon Sep 17 00:00:00 2001 From: Lukas Pfahler Date: Tue, 5 Dec 2017 14:43:28 +0100 Subject: [PATCH 09/77] kernel experiment --- run_expr.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/run_expr.py b/run_expr.py index 653827e..3dd6c5f 100644 --- a/run_expr.py +++ b/run_expr.py @@ -107,7 +107,7 @@ eta = eta * num_classes # correction due to mse loss # # Assemble Pegasos trainer. -# input_shape = (D+1,) # n_feature, (sample) index +input_shape = (D+1,) # n_feature, (sample) index ix = Input(shape=input_shape, dtype='float32', name='indexed-feat') x, index = utils.separate_index(ix) # features, sample_id kfeat = KernelEmbedding(kernel, x_train, From 1395036ce807151d20c7d3f1bf46c1bf68216c45 Mon Sep 17 00:00:00 2001 From: Lukas Pfahler Date: Tue, 5 Dec 2017 15:30:00 +0100 Subject: [PATCH 10/77] kernel experiment --- run_expr.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/run_expr.py b/run_expr.py index 3dd6c5f..a0af090 100644 --- a/run_expr.py +++ b/run_expr.py @@ -51,7 +51,7 @@ # Set the hyper-parameters. bs = 256 # size of the mini-batch M = 4800 # (EigenPro) subsample size -k = 160 # (EigenPro) top-k eigensystem +k = 256 # (EigenPro) top-k eigensystem num_classes = 10 # number of classes @@ -188,7 +188,7 @@ print("Running ",epoch," Iterations on ",name) trainer.model.fit( trainer.x_train, y_train, - batch_size=bs, epochs=epoch, verbose=0, + batch_size=bs, epochs=epoch, verbose=2, validation_data=(trainer.x_test, y_test), initial_epoch=initial_epoch) train_ts += time.time() - start From bb3de95156caf830c57a13059f219c4c75fa8d09 Mon Sep 17 00:00:00 2001 From: Lukas Pfahler Date: Tue, 5 Dec 2017 15:51:12 +0100 Subject: [PATCH 11/77] kernel experiment --- run_expr.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/run_expr.py b/run_expr.py index a0af090..89e306c 100644 --- a/run_expr.py +++ b/run_expr.py @@ -91,6 +91,9 @@ print("AAAH",np.linalg.norm(x_train,2,axis=1).shape) s = np.max(np.linalg.norm(x_train,2,axis=1))**2 kernel = lambda x,y: kernels.ReLu(x, y, s) +elif args_dict['kernel'] == 'DeepKernel': + s = 4 + kernel = lambda x,y: kernels.DeepKernel(x, y, s) else: raise Exception("Unknown kernel function - %s. \ Try Gaussian, Laplace, or Cauchy" From 8a778f57b19509d507f475b67a67fab488f06620 Mon Sep 17 00:00:00 2001 From: Lukas Pfahler Date: Tue, 5 Dec 2017 15:51:39 +0100 Subject: [PATCH 12/77] kernel experiment --- kernels.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/kernels.py b/kernels.py index 8a3e587..61aa4db 100644 --- a/kernels.py +++ b/kernels.py @@ -96,3 +96,9 @@ def ReLu(X,Y,s): d = np.float32(1024*3)#K.int_shape(X)[1]) return (xx*yy)*((pi-delta)*K.cos(delta)+K.sin(delta))/(2*d*pi) + +def DeepKernel(X,Y,s): + XY = K.dot(X, K.transpose(Y)) + for i in range(s): + XY = 1/(2-XY) + return XY From c23f97bf30dee8ebdc10e38ace883a0662bde855 Mon Sep 17 00:00:00 2001 From: Lukas Pfahler Date: Tue, 5 Dec 2017 15:59:28 +0100 Subject: [PATCH 13/77] new kernels --- .gitignore | 1 + kernels.py | 4 +++- run_expr.py | 2 +- 3 files changed, 5 insertions(+), 2 deletions(-) create mode 100644 .gitignore diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..bee8a64 --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +__pycache__ diff --git a/kernels.py b/kernels.py index 61aa4db..16f7777 100644 --- a/kernels.py +++ b/kernels.py @@ -81,6 +81,7 @@ def Cauchy(X, Y, s): import tensorflow as tf def ReLu(X,Y,s): + #Francis Bach - Breaking the Curse of Dimensionality with Convex Neural Networks s = np.float32(s) XX = K.sum(K.square(X), axis = 1, keepdims=True) YY = K.sum(K.square(Y), axis = 1, keepdims=True) @@ -98,7 +99,8 @@ def ReLu(X,Y,s): def DeepKernel(X,Y,s): + # Zhang, Lee and Jordan - l1 regularized Neural Networks are Improperly Learniable in Polynomial Time XY = K.dot(X, K.transpose(Y)) for i in range(s): - XY = 1/(2-XY) + XY = np.float(1)/(np.float32(2)-XY) return XY diff --git a/run_expr.py b/run_expr.py index 89e306c..3b9ea8f 100644 --- a/run_expr.py +++ b/run_expr.py @@ -92,7 +92,7 @@ s = np.max(np.linalg.norm(x_train,2,axis=1))**2 kernel = lambda x,y: kernels.ReLu(x, y, s) elif args_dict['kernel'] == 'DeepKernel': - s = 4 + s = 2 kernel = lambda x,y: kernels.DeepKernel(x, y, s) else: raise Exception("Unknown kernel function - %s. \ From ed73564ce07350e629a0d81430bb11dddd6156e4 Mon Sep 17 00:00:00 2001 From: Lukas Pfahler Date: Tue, 5 Dec 2017 16:03:26 +0100 Subject: [PATCH 14/77] new kernels --- run_expr.py | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/run_expr.py b/run_expr.py index 3b9ea8f..c836001 100644 --- a/run_expr.py +++ b/run_expr.py @@ -92,7 +92,7 @@ s = np.max(np.linalg.norm(x_train,2,axis=1))**2 kernel = lambda x,y: kernels.ReLu(x, y, s) elif args_dict['kernel'] == 'DeepKernel': - s = 2 + s = 1 kernel = lambda x,y: kernels.DeepKernel(x, y, s) else: raise Exception("Unknown kernel function - %s. \ @@ -103,9 +103,16 @@ Trainer = collections.namedtuple('Trainer', ['model', 'x_train', 'x_test']) +import os.path +import pickle +fname = args_dict['kernel']+"-"+str(M)+"-"+str(k)+".pickle" +if os.path.isfile(fname): + kf, scale, s0 = pickle.load(open(fname,"rb")) +else: # Calculate step size and (Primal) EigenPro preconditioner. -kf, scale, s0 = utils.asm_eigenpro_f( + kf, scale, s0 = utils.asm_eigenpro_f( x_train, kernel, M, k, 1, in_rkhs=True) + pickle.dump((kf, scale, s0),open(fname,"wb")) eta = np.float32(1.5 / s0) # 1.5 / s0 eta = eta * num_classes # correction due to mse loss From 0c9f02469413777dc10d3997c51e42925b0919aa Mon Sep 17 00:00:00 2001 From: Lukas Pfahler Date: Tue, 5 Dec 2017 16:06:18 +0100 Subject: [PATCH 15/77] new kernels --- run_expr.py | 13 ++++--------- 1 file changed, 4 insertions(+), 9 deletions(-) diff --git a/run_expr.py b/run_expr.py index c836001..00efccb 100644 --- a/run_expr.py +++ b/run_expr.py @@ -92,7 +92,7 @@ s = np.max(np.linalg.norm(x_train,2,axis=1))**2 kernel = lambda x,y: kernels.ReLu(x, y, s) elif args_dict['kernel'] == 'DeepKernel': - s = 1 + s = 2 kernel = lambda x,y: kernels.DeepKernel(x, y, s) else: raise Exception("Unknown kernel function - %s. \ @@ -103,16 +103,11 @@ Trainer = collections.namedtuple('Trainer', ['model', 'x_train', 'x_test']) -import os.path -import pickle -fname = args_dict['kernel']+"-"+str(M)+"-"+str(k)+".pickle" -if os.path.isfile(fname): - kf, scale, s0 = pickle.load(open(fname,"rb")) -else: + + # Calculate step size and (Primal) EigenPro preconditioner. - kf, scale, s0 = utils.asm_eigenpro_f( +kf, scale, s0 = utils.asm_eigenpro_f( x_train, kernel, M, k, 1, in_rkhs=True) - pickle.dump((kf, scale, s0),open(fname,"wb")) eta = np.float32(1.5 / s0) # 1.5 / s0 eta = eta * num_classes # correction due to mse loss From b0abfee6a2259a6a734d7c36b4c60290a1bac0bb Mon Sep 17 00:00:00 2001 From: Lukas Pfahler Date: Tue, 5 Dec 2017 16:09:59 +0100 Subject: [PATCH 16/77] new kernels --- kernels.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/kernels.py b/kernels.py index 16f7777..e37ad8d 100644 --- a/kernels.py +++ b/kernels.py @@ -101,6 +101,12 @@ def ReLu(X,Y,s): def DeepKernel(X,Y,s): # Zhang, Lee and Jordan - l1 regularized Neural Networks are Improperly Learniable in Polynomial Time XY = K.dot(X, K.transpose(Y)) + XX = K.sum(K.square(X), axis = 1, keepdims=True) + YY = K.sum(K.square(Y), axis = 1, keepdims=True) + XX = K.reshape(XX, (K.shape(X)[0], 1)) + YY = K.reshape(YY, (1,K.shape(Y)[0])) + XY = XY/(XX*YY) + K.clip(XY,-1,1) for i in range(s): XY = np.float(1)/(np.float32(2)-XY) return XY From c039d7fa624b5823ee8f2b7310605a23ed4bbd88 Mon Sep 17 00:00:00 2001 From: Lukas Pfahler Date: Tue, 5 Dec 2017 16:16:30 +0100 Subject: [PATCH 17/77] new kernels --- run_expr.py | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/run_expr.py b/run_expr.py index 00efccb..91e50db 100644 --- a/run_expr.py +++ b/run_expr.py @@ -118,18 +118,18 @@ kfeat = KernelEmbedding(kernel, x_train, input_shape=(D,))(x) -# y = Dense(num_classes, input_shape=(n,), -# activation='linear', -# kernel_initializer='zeros', -# use_bias=False)(kfeat) - -# model = Model(ix, y) -# model.compile(loss='mse', -# optimizer=PSGD(pred_t=y, index_t=index, eta=eta), -# metrics=['accuracy']) -# trainers['Pegasos'] = Trainer(model=model, -# x_train = utils.add_index(x_train), -# x_test=utils.add_index(x_test)) +y = Dense(num_classes, input_shape=(n,), + activation='linear', + kernel_initializer='zeros', + use_bias=False)(kfeat) + +model = Model(ix, y) +model.compile(loss='mse', + optimizer=PSGD(pred_t=y, index_t=index, eta=eta), + metrics=['accuracy']) +trainers['Pegasos'] = Trainer(model=model, + x_train = utils.add_index(x_train), + x_test=utils.add_index(x_test)) # Assemble kernel EigenPro trainer. embed = Model(ix, kfeat) From e1107b365804f1d0e02b3867c75cc6f2494f59f5 Mon Sep 17 00:00:00 2001 From: Lukas Pfahler Date: Tue, 5 Dec 2017 17:09:14 +0100 Subject: [PATCH 18/77] new kernels --- kernels.py | 22 +++++++++++++++++++++- run_expr.py | 27 +++++++++++++++------------ 2 files changed, 36 insertions(+), 13 deletions(-) diff --git a/kernels.py b/kernels.py index e37ad8d..a9653b8 100644 --- a/kernels.py +++ b/kernels.py @@ -94,7 +94,8 @@ def ReLu(X,Y,s): cos = K.clip(cos,-1,1) delta = tf.acos(cos) pi = np.float32(3.14159265359) - d = np.float32(1024*3)#K.int_shape(X)[1]) + d = K.int_shape(X)[1] + print(d) return (xx*yy)*((pi-delta)*K.cos(delta)+K.sin(delta))/(2*d*pi) @@ -110,3 +111,22 @@ def DeepKernel(X,Y,s): for i in range(s): XY = np.float(1)/(np.float32(2)-XY) return XY + + +def CompositionalFeedForwardKernel(X,Y,layers): + # Amit Daniely, Roy Frostig, Yoram Singer - Toward Deeper Understanding of Neural Networks -- The Power of Initialization and a Dual View on Expressivity + XX = K.sum(K.square(X), axis = 1, keepdims=True) + YY = K.sum(K.square(Y), axis = 1, keepdims=True) + XX = K.reshape(XX, (K.shape(X)[0], 1)) + YY = K.reshape(YY, (1,K.shape(Y)[0])) + XY = K.dot(X, K.transpose(Y)) + XY = XY/(XX*YY) + XY = K.clip(XY,-1,1) + + pi = np.float32(3.14159265359) + d = K.int_shape(X)[1] + XY = XY/d + for i in range(layers): + XY = (K.sqrt(1-K.square(XY))+(pi-tf.acos(XY))*XY)/pi + XY = K.clip(XY,-1,1) + return XY \ No newline at end of file diff --git a/run_expr.py b/run_expr.py index 91e50db..952de75 100644 --- a/run_expr.py +++ b/run_expr.py @@ -94,6 +94,9 @@ elif args_dict['kernel'] == 'DeepKernel': s = 2 kernel = lambda x,y: kernels.DeepKernel(x, y, s) +elif args_dict['kernel'] == 'CompositionalFeedForwardKernel': + s = 2 + kernel = lambda x,y: kernels.CompositionalFeedForwardKernel(x, y, s) else: raise Exception("Unknown kernel function - %s. \ Try Gaussian, Laplace, or Cauchy" @@ -118,18 +121,18 @@ kfeat = KernelEmbedding(kernel, x_train, input_shape=(D,))(x) -y = Dense(num_classes, input_shape=(n,), - activation='linear', - kernel_initializer='zeros', - use_bias=False)(kfeat) +# y = Dense(num_classes, input_shape=(n,), +# activation='linear', +# kernel_initializer='zeros', +# use_bias=False)(kfeat) -model = Model(ix, y) -model.compile(loss='mse', - optimizer=PSGD(pred_t=y, index_t=index, eta=eta), - metrics=['accuracy']) -trainers['Pegasos'] = Trainer(model=model, - x_train = utils.add_index(x_train), - x_test=utils.add_index(x_test)) +# model = Model(ix, y) +# model.compile(loss='mse', +# optimizer=PSGD(pred_t=y, index_t=index, eta=eta), +# metrics=['accuracy']) +# trainers['Pegasos'] = Trainer(model=model, +# x_train = utils.add_index(x_train), +# x_test=utils.add_index(x_test)) # Assemble kernel EigenPro trainer. embed = Model(ix, kfeat) @@ -188,7 +191,7 @@ initial_epoch=0 np.random.seed(1) # Keras uses numpy random number generator train_ts = 0 # training time in seconds - for epoch in [1, 2, 5, 10, 20, 40]: + for epoch in [100]: start = time.time() print("Running ",epoch," Iterations on ",name) trainer.model.fit( From 81d307418e7d6461abd8092f25413846ca9fc052 Mon Sep 17 00:00:00 2001 From: Lukas Pfahler Date: Tue, 5 Dec 2017 17:12:57 +0100 Subject: [PATCH 19/77] new kernels --- run_expr.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/run_expr.py b/run_expr.py index 952de75..80d1e91 100644 --- a/run_expr.py +++ b/run_expr.py @@ -27,7 +27,8 @@ from backend_extra import hasGPU from layers import KernelEmbedding, RFF from optimizers import PSGD, SGD - +import tf +tf.logging.set_verbosity(tf.logging.ERROR) assert StrictVersion(keras.__version__) >= StrictVersion('2.0.8'), \ "Requires Keras (>=2.0.8)." @@ -95,7 +96,7 @@ s = 2 kernel = lambda x,y: kernels.DeepKernel(x, y, s) elif args_dict['kernel'] == 'CompositionalFeedForwardKernel': - s = 2 + s = 1 kernel = lambda x,y: kernels.CompositionalFeedForwardKernel(x, y, s) else: raise Exception("Unknown kernel function - %s. \ From 5d1316ff11d47899fe76010c8cabafc830dbd380 Mon Sep 17 00:00:00 2001 From: Lukas Pfahler Date: Tue, 5 Dec 2017 17:15:21 +0100 Subject: [PATCH 20/77] new kernels --- run_expr.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/run_expr.py b/run_expr.py index 80d1e91..ed0742e 100644 --- a/run_expr.py +++ b/run_expr.py @@ -27,7 +27,7 @@ from backend_extra import hasGPU from layers import KernelEmbedding, RFF from optimizers import PSGD, SGD -import tf +import tensorflow as tf tf.logging.set_verbosity(tf.logging.ERROR) assert StrictVersion(keras.__version__) >= StrictVersion('2.0.8'), \ "Requires Keras (>=2.0.8)." From 342c5a331e26959cc026d3067e02be3c60623bde Mon Sep 17 00:00:00 2001 From: Lukas Pfahler Date: Tue, 5 Dec 2017 17:18:54 +0100 Subject: [PATCH 21/77] new kernels --- kernels.py | 3 ++- run_expr.py | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/kernels.py b/kernels.py index a9653b8..dffa0a4 100644 --- a/kernels.py +++ b/kernels.py @@ -107,9 +107,10 @@ def DeepKernel(X,Y,s): XX = K.reshape(XX, (K.shape(X)[0], 1)) YY = K.reshape(YY, (1,K.shape(Y)[0])) XY = XY/(XX*YY) - K.clip(XY,-1,1) + XY = K.clip(XY,-1,1) for i in range(s): XY = np.float(1)/(np.float32(2)-XY) + XY = K.clip(XY,0,1) return XY diff --git a/run_expr.py b/run_expr.py index ed0742e..0fb4829 100644 --- a/run_expr.py +++ b/run_expr.py @@ -52,7 +52,7 @@ # Set the hyper-parameters. bs = 256 # size of the mini-batch M = 4800 # (EigenPro) subsample size -k = 256 # (EigenPro) top-k eigensystem +k = 160 # (EigenPro) top-k eigensystem num_classes = 10 # number of classes From fdea7e8d9af53a87e178722b0f410ec17f5fc054 Mon Sep 17 00:00:00 2001 From: Lukas Pfahler Date: Tue, 5 Dec 2017 20:15:36 +0100 Subject: [PATCH 22/77] new kernels --- kernels.py | 16 ++++++++-------- run_expr.py | 2 +- utils.py | 4 ++-- 3 files changed, 11 insertions(+), 11 deletions(-) diff --git a/kernels.py b/kernels.py index dffa0a4..120f2fc 100644 --- a/kernels.py +++ b/kernels.py @@ -95,7 +95,6 @@ def ReLu(X,Y,s): delta = tf.acos(cos) pi = np.float32(3.14159265359) d = K.int_shape(X)[1] - print(d) return (xx*yy)*((pi-delta)*K.cos(delta)+K.sin(delta))/(2*d*pi) @@ -114,20 +113,21 @@ def DeepKernel(X,Y,s): return XY -def CompositionalFeedForwardKernel(X,Y,layers): +def CompositionalFeedForwardKernel(X,Y,s): # Amit Daniely, Roy Frostig, Yoram Singer - Toward Deeper Understanding of Neural Networks -- The Power of Initialization and a Dual View on Expressivity - XX = K.sum(K.square(X), axis = 1, keepdims=True) - YY = K.sum(K.square(Y), axis = 1, keepdims=True) - XX = K.reshape(XX, (K.shape(X)[0], 1)) - YY = K.reshape(YY, (1,K.shape(Y)[0])) + s = np.float32(s) + # XX = K.sum(K.square(X), axis = 1, keepdims=True) + # YY = K.sum(K.square(Y), axis = 1, keepdims=True) + # XX = K.reshape(XX, (K.shape(X)[0], 1)) + # YY = K.reshape(YY, (1,K.shape(Y)[0])) XY = K.dot(X, K.transpose(Y)) - XY = XY/(XX*YY) + XY = XY/s XY = K.clip(XY,-1,1) pi = np.float32(3.14159265359) d = K.int_shape(X)[1] XY = XY/d - for i in range(layers): + for i in range(1): XY = (K.sqrt(1-K.square(XY))+(pi-tf.acos(XY))*XY)/pi XY = K.clip(XY,-1,1) return XY \ No newline at end of file diff --git a/run_expr.py b/run_expr.py index 0fb4829..b931cb2 100644 --- a/run_expr.py +++ b/run_expr.py @@ -96,7 +96,7 @@ s = 2 kernel = lambda x,y: kernels.DeepKernel(x, y, s) elif args_dict['kernel'] == 'CompositionalFeedForwardKernel': - s = 1 + s = np.max(np.linalg.norm(x_train,2,axis=1))**2 kernel = lambda x,y: kernels.CompositionalFeedForwardKernel(x, y, s) else: raise Exception("Unknown kernel function - %s. \ diff --git a/utils.py b/utils.py index d94b0dc..a9bef37 100644 --- a/utils.py +++ b/utils.py @@ -107,8 +107,8 @@ def asm_eigenpro_f(feat, phi, M, k, tau, in_rkhs=False): fmap = lambda _x: model.predict(_x, batch_size=1024) _s, _V, _sk = rsvd(feat, fmap, M, k) # phi is a feature map _s, _sk, _V = _s[:k], _s[-1], _V[:, :k] - print("SVD time: %.2f, Eigenvalue ratio: %.2f" % - (time.time() - start, _s[0] / _sk)) + print("SVD time: %.2f, Eigenvalue ratio: %.2f %.2f %.2f" % + (time.time() - start, _s[0] / _sk,_s[0] ,_sk)) s = K.constant(_s) V = K.constant(_V) From f9687d547eee77c40b50c858654fa90f0b9331b5 Mon Sep 17 00:00:00 2001 From: Lukas Pfahler Date: Tue, 5 Dec 2017 20:35:42 +0100 Subject: [PATCH 23/77] new kernels --- kernels.py | 30 ++++++++++++++++++++++++------ utils.py | 2 +- 2 files changed, 25 insertions(+), 7 deletions(-) diff --git a/kernels.py b/kernels.py index 120f2fc..da2c890 100644 --- a/kernels.py +++ b/kernels.py @@ -116,12 +116,12 @@ def DeepKernel(X,Y,s): def CompositionalFeedForwardKernel(X,Y,s): # Amit Daniely, Roy Frostig, Yoram Singer - Toward Deeper Understanding of Neural Networks -- The Power of Initialization and a Dual View on Expressivity s = np.float32(s) - # XX = K.sum(K.square(X), axis = 1, keepdims=True) - # YY = K.sum(K.square(Y), axis = 1, keepdims=True) - # XX = K.reshape(XX, (K.shape(X)[0], 1)) - # YY = K.reshape(YY, (1,K.shape(Y)[0])) + XX = K.sum(K.square(X), axis = 1, keepdims=True) + YY = K.sum(K.square(Y), axis = 1, keepdims=True) + XX = K.reshape(XX, (K.shape(X)[0], 1)) + YY = K.reshape(YY, (1,K.shape(Y)[0])) XY = K.dot(X, K.transpose(Y)) - XY = XY/s + XY = tf.divide(XY,tf.multiply(XX,YY)) XY = K.clip(XY,-1,1) pi = np.float32(3.14159265359) @@ -130,4 +130,22 @@ def CompositionalFeedForwardKernel(X,Y,s): for i in range(1): XY = (K.sqrt(1-K.square(XY))+(pi-tf.acos(XY))*XY)/pi XY = K.clip(XY,-1,1) - return XY \ No newline at end of file + return XY + + + # xs = [] + # for i in range(output_row): + # for j in range(output_col): + # slice_row = slice(i * stride_row, + # i * stride_row + kernel_size[0]) + # slice_col = slice(j * stride_col, + # j * stride_col + kernel_size[1]) + # if data_format == 'channels_first': + # xs.append(reshape(inputs[:, :, slice_row, slice_col], + # (1, -1, feature_dim))) + # else: + # xs.append(reshape(inputs[:, slice_row, slice_col, :], + # (1, -1, feature_dim))) + + # x_aggregate = concatenate(xs, axis=0) + # output = batch_dot(x_aggregate, kernel) diff --git a/utils.py b/utils.py index a9bef37..af9170f 100644 --- a/utils.py +++ b/utils.py @@ -107,7 +107,7 @@ def asm_eigenpro_f(feat, phi, M, k, tau, in_rkhs=False): fmap = lambda _x: model.predict(_x, batch_size=1024) _s, _V, _sk = rsvd(feat, fmap, M, k) # phi is a feature map _s, _sk, _V = _s[:k], _s[-1], _V[:, :k] - print("SVD time: %.2f, Eigenvalue ratio: %.2f %.2f %.2f" % + print("SVD time: %.2f, Eigenvalue ratio: %.2f %.2f %.8f" % (time.time() - start, _s[0] / _sk,_s[0] ,_sk)) s = K.constant(_s) From c7dac7ae15892399703bb5467a6a6e0cd6aa3bb1 Mon Sep 17 00:00:00 2001 From: Lukas Pfahler Date: Tue, 5 Dec 2017 20:43:26 +0100 Subject: [PATCH 24/77] new kernels --- kernels.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernels.py b/kernels.py index da2c890..d182d02 100644 --- a/kernels.py +++ b/kernels.py @@ -121,7 +121,7 @@ def CompositionalFeedForwardKernel(X,Y,s): XX = K.reshape(XX, (K.shape(X)[0], 1)) YY = K.reshape(YY, (1,K.shape(Y)[0])) XY = K.dot(X, K.transpose(Y)) - XY = tf.divide(XY,tf.multiply(XX,YY)) + XY = tf.div(XY,tf.multiply(XX,YY)) XY = K.clip(XY,-1,1) pi = np.float32(3.14159265359) From 7802865d6fd92ce9da873e64a6c46d124abce93f Mon Sep 17 00:00:00 2001 From: Lukas Pfahler Date: Tue, 5 Dec 2017 20:46:32 +0100 Subject: [PATCH 25/77] new kernels --- kernels.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/kernels.py b/kernels.py index d182d02..b33318e 100644 --- a/kernels.py +++ b/kernels.py @@ -116,12 +116,12 @@ def DeepKernel(X,Y,s): def CompositionalFeedForwardKernel(X,Y,s): # Amit Daniely, Roy Frostig, Yoram Singer - Toward Deeper Understanding of Neural Networks -- The Power of Initialization and a Dual View on Expressivity s = np.float32(s) - XX = K.sum(K.square(X), axis = 1, keepdims=True) - YY = K.sum(K.square(Y), axis = 1, keepdims=True) - XX = K.reshape(XX, (K.shape(X)[0], 1)) - YY = K.reshape(YY, (1,K.shape(Y)[0])) + # XX = K.sum(K.square(X), axis = 1, keepdims=True) + # YY = K.sum(K.square(Y), axis = 1, keepdims=True) + # XX = K.reshape(XX, (K.shape(X)[0], 1)) + # YY = K.reshape(YY, (1,K.shape(Y)[0])) XY = K.dot(X, K.transpose(Y)) - XY = tf.div(XY,tf.multiply(XX,YY)) + XY = XY/s #tf.div(XY,tf.multiply(XX,YY)) XY = K.clip(XY,-1,1) pi = np.float32(3.14159265359) From f396acdfb13f24951c941417d98143bab80c6caf Mon Sep 17 00:00:00 2001 From: Lukas Pfahler Date: Tue, 5 Dec 2017 20:52:25 +0100 Subject: [PATCH 26/77] new kernels --- kernels.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/kernels.py b/kernels.py index b33318e..c84cf29 100644 --- a/kernels.py +++ b/kernels.py @@ -116,12 +116,12 @@ def DeepKernel(X,Y,s): def CompositionalFeedForwardKernel(X,Y,s): # Amit Daniely, Roy Frostig, Yoram Singer - Toward Deeper Understanding of Neural Networks -- The Power of Initialization and a Dual View on Expressivity s = np.float32(s) - # XX = K.sum(K.square(X), axis = 1, keepdims=True) - # YY = K.sum(K.square(Y), axis = 1, keepdims=True) - # XX = K.reshape(XX, (K.shape(X)[0], 1)) - # YY = K.reshape(YY, (1,K.shape(Y)[0])) + XX = K.sum(K.square(X), axis = 1, keepdims=True) + YY = K.sum(K.square(Y), axis = 1, keepdims=True) + XX = K.reshape(XX, (K.shape(X)[0], 1)) + YY = K.reshape(YY, (1,K.shape(Y)[0])) XY = K.dot(X, K.transpose(Y)) - XY = XY/s #tf.div(XY,tf.multiply(XX,YY)) + XY = tf.div(XY,XX*YY) XY = K.clip(XY,-1,1) pi = np.float32(3.14159265359) From 7a09b15c5132fd8bb27f82973921c4d0fe4567de Mon Sep 17 00:00:00 2001 From: Lukas Pfahler Date: Tue, 5 Dec 2017 20:54:44 +0100 Subject: [PATCH 27/77] new kernels --- kernels.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernels.py b/kernels.py index c84cf29..ec8998e 100644 --- a/kernels.py +++ b/kernels.py @@ -128,7 +128,7 @@ def CompositionalFeedForwardKernel(X,Y,s): d = K.int_shape(X)[1] XY = XY/d for i in range(1): - XY = (K.sqrt(1-K.square(XY))+(pi-tf.acos(XY))*XY)/pi + XY = (K.sqrt(1-K.square(XY))+(pi-tf.multiply(tf.acos(XY),XY)))/pi XY = K.clip(XY,-1,1) return XY From 9f146f87e2374b283b82e844ac14d3f3292190bc Mon Sep 17 00:00:00 2001 From: Lukas Pfahler Date: Tue, 5 Dec 2017 20:58:46 +0100 Subject: [PATCH 28/77] new kernels --- kernels.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernels.py b/kernels.py index ec8998e..51668d2 100644 --- a/kernels.py +++ b/kernels.py @@ -121,7 +121,7 @@ def CompositionalFeedForwardKernel(X,Y,s): XX = K.reshape(XX, (K.shape(X)[0], 1)) YY = K.reshape(YY, (1,K.shape(Y)[0])) XY = K.dot(X, K.transpose(Y)) - XY = tf.div(XY,XX*YY) + XY = tf.div(XY,K.dot(XX,YY)) XY = K.clip(XY,-1,1) pi = np.float32(3.14159265359) From d307a68270ac7d921553421c969e67be4d5bfb4c Mon Sep 17 00:00:00 2001 From: Lukas Pfahler Date: Tue, 5 Dec 2017 21:01:40 +0100 Subject: [PATCH 29/77] new kernels --- kernels.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernels.py b/kernels.py index 51668d2..2e47c6b 100644 --- a/kernels.py +++ b/kernels.py @@ -123,7 +123,7 @@ def CompositionalFeedForwardKernel(X,Y,s): XY = K.dot(X, K.transpose(Y)) XY = tf.div(XY,K.dot(XX,YY)) XY = K.clip(XY,-1,1) - + return XY pi = np.float32(3.14159265359) d = K.int_shape(X)[1] XY = XY/d From aa57d51907d39d8a4118ae242e53a8b247ae9874 Mon Sep 17 00:00:00 2001 From: Lukas Pfahler Date: Tue, 5 Dec 2017 21:10:01 +0100 Subject: [PATCH 30/77] new kernels --- kernels.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/kernels.py b/kernels.py index 2e47c6b..53723fc 100644 --- a/kernels.py +++ b/kernels.py @@ -123,12 +123,12 @@ def CompositionalFeedForwardKernel(X,Y,s): XY = K.dot(X, K.transpose(Y)) XY = tf.div(XY,K.dot(XX,YY)) XY = K.clip(XY,-1,1) - return XY + pi = np.float32(3.14159265359) d = K.int_shape(X)[1] XY = XY/d for i in range(1): - XY = (K.sqrt(1-K.square(XY))+(pi-tf.multiply(tf.acos(XY),XY)))/pi + XY = (K.sqrt(1-K.square(XY))+tf.multiply((pi-tf.acos(XY)),XY))/pi XY = K.clip(XY,-1,1) return XY From 6781590934cdc050266d3b6c3084e1402e07d351 Mon Sep 17 00:00:00 2001 From: Lukas Pfahler Date: Tue, 5 Dec 2017 21:23:15 +0100 Subject: [PATCH 31/77] new kernels --- kernels.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/kernels.py b/kernels.py index 53723fc..e9ae431 100644 --- a/kernels.py +++ b/kernels.py @@ -87,15 +87,18 @@ def ReLu(X,Y,s): YY = K.sum(K.square(Y), axis = 1, keepdims=True) XX = K.reshape(XX, (K.shape(X)[0], 1)) YY = K.reshape(YY, (1,K.shape(Y)[0])) + xx = K.sqrt(XX/s+np.float32(1.0)) yy = K.sqrt(YY/s+np.float32(1.0)) + XY = K.dot(X, K.transpose(Y)) - cos = (XY/s + np.float32(1.0)) / (xx*yy) + + cos = (XY/s + np.float32(1.0)) / (K.dot(xx,yy)) # cos = K.clip(cos,-1,1) delta = tf.acos(cos) pi = np.float32(3.14159265359) d = K.int_shape(X)[1] - return (xx*yy)*((pi-delta)*K.cos(delta)+K.sin(delta))/(2*d*pi) + return K.dot(xx,yy)*((pi-delta)*K.cos(delta)+K.sin(delta))/(2*d*pi) #dot product, eg. macht nur das erste sinn. def DeepKernel(X,Y,s): @@ -116,10 +119,12 @@ def DeepKernel(X,Y,s): def CompositionalFeedForwardKernel(X,Y,s): # Amit Daniely, Roy Frostig, Yoram Singer - Toward Deeper Understanding of Neural Networks -- The Power of Initialization and a Dual View on Expressivity s = np.float32(s) + XX = K.sum(K.square(X), axis = 1, keepdims=True) YY = K.sum(K.square(Y), axis = 1, keepdims=True) XX = K.reshape(XX, (K.shape(X)[0], 1)) YY = K.reshape(YY, (1,K.shape(Y)[0])) + XY = K.dot(X, K.transpose(Y)) XY = tf.div(XY,K.dot(XX,YY)) XY = K.clip(XY,-1,1) From aabe6a2d452d3d01950304d24851744150d49766 Mon Sep 17 00:00:00 2001 From: Lukas Pfahler Date: Tue, 5 Dec 2017 22:04:23 +0100 Subject: [PATCH 32/77] new kernels --- kernels.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernels.py b/kernels.py index e9ae431..b5d8856 100644 --- a/kernels.py +++ b/kernels.py @@ -134,7 +134,7 @@ def CompositionalFeedForwardKernel(X,Y,s): XY = XY/d for i in range(1): XY = (K.sqrt(1-K.square(XY))+tf.multiply((pi-tf.acos(XY)),XY))/pi - XY = K.clip(XY,-1,1) + #XY = K.clip(XY,-1,1) return XY From d164bf6fe34847eb3ad15cc0afc810c757d1c476 Mon Sep 17 00:00:00 2001 From: Lukas Pfahler Date: Wed, 6 Dec 2017 10:08:04 +0100 Subject: [PATCH 33/77] new kernels --- kernels.py | 17 +++++++++++++++++ run_expr.py | 3 +++ 2 files changed, 20 insertions(+) diff --git a/kernels.py b/kernels.py index b5d8856..873ff46 100644 --- a/kernels.py +++ b/kernels.py @@ -137,6 +137,23 @@ def CompositionalFeedForwardKernel(X,Y,s): #XY = K.clip(XY,-1,1) return XY +def ArcCosine(X,Y,s): + # Youngmin Cho, Lawrence K. Saul - Kernel Methods for Deep Learning + # k(x,y) = \frac 1 \pi ||x||\cdot||y|| \cdot \left( \sin \theta + (\pi - \theta)\cos \theta\right) + s = np.float32(s) + + XX = K.sum(K.square(X), axis = 1, keepdims=True) + YY = K.sum(K.square(Y), axis = 1, keepdims=True) + XX = K.reshape(XX, (K.shape(X)[0], 1)) + YY = K.reshape(YY, (1,K.shape(Y)[0])) + + XY = K.dot(X, K.transpose(Y)) + XY = tf.div(XY,K.dot(XX,YY)) + XY = K.clip(XY,-1,1) + theta = tf.acos(XY) + pi = np.float32(3.14159265359) + return K.dot(XX,YY) * (K.sin(theta) + (pi-theta) * K.cos(theta)) + # xs = [] # for i in range(output_row): diff --git a/run_expr.py b/run_expr.py index b931cb2..8d361e8 100644 --- a/run_expr.py +++ b/run_expr.py @@ -98,6 +98,9 @@ elif args_dict['kernel'] == 'CompositionalFeedForwardKernel': s = np.max(np.linalg.norm(x_train,2,axis=1))**2 kernel = lambda x,y: kernels.CompositionalFeedForwardKernel(x, y, s) +elif args_dict['kernel'] == 'ArcCosine': + s = np.max(np.linalg.norm(x_train,2,axis=1))**2 + kernel = lambda x,y: kernels.ArcCosine(x, y, s) else: raise Exception("Unknown kernel function - %s. \ Try Gaussian, Laplace, or Cauchy" From ca889c7ab72411f39414f088affd25a803550a47 Mon Sep 17 00:00:00 2001 From: Lukas Pfahler Date: Wed, 6 Dec 2017 10:17:44 +0100 Subject: [PATCH 34/77] new kernels --- kernels.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/kernels.py b/kernels.py index 873ff46..25aa1a8 100644 --- a/kernels.py +++ b/kernels.py @@ -142,8 +142,8 @@ def ArcCosine(X,Y,s): # k(x,y) = \frac 1 \pi ||x||\cdot||y|| \cdot \left( \sin \theta + (\pi - \theta)\cos \theta\right) s = np.float32(s) - XX = K.sum(K.square(X), axis = 1, keepdims=True) - YY = K.sum(K.square(Y), axis = 1, keepdims=True) + XX = K.sqrt(K.sum(K.square(X), axis = 1, keepdims=True)) + YY = K.sqrt(K.sum(K.square(Y), axis = 1, keepdims=True)) XX = K.reshape(XX, (K.shape(X)[0], 1)) YY = K.reshape(YY, (1,K.shape(Y)[0])) From 6d77726c84fec4081e93c906a956c0a4c985e638 Mon Sep 17 00:00:00 2001 From: Lukas Pfahler Date: Wed, 6 Dec 2017 10:20:43 +0100 Subject: [PATCH 35/77] new kernels --- kernels.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/kernels.py b/kernels.py index 25aa1a8..f4be153 100644 --- a/kernels.py +++ b/kernels.py @@ -120,8 +120,8 @@ def CompositionalFeedForwardKernel(X,Y,s): # Amit Daniely, Roy Frostig, Yoram Singer - Toward Deeper Understanding of Neural Networks -- The Power of Initialization and a Dual View on Expressivity s = np.float32(s) - XX = K.sum(K.square(X), axis = 1, keepdims=True) - YY = K.sum(K.square(Y), axis = 1, keepdims=True) + XX = K.sqrt(K.sum(K.square(X), axis = 1, keepdims=True)) + YY = K.sqrt(K.sum(K.square(Y), axis = 1, keepdims=True)) XX = K.reshape(XX, (K.shape(X)[0], 1)) YY = K.reshape(YY, (1,K.shape(Y)[0])) @@ -152,7 +152,7 @@ def ArcCosine(X,Y,s): XY = K.clip(XY,-1,1) theta = tf.acos(XY) pi = np.float32(3.14159265359) - return K.dot(XX,YY) * (K.sin(theta) + (pi-theta) * K.cos(theta)) + return (K.dot(XX,YY) * (K.sin(theta) + (pi-theta) * K.cos(theta)))/pi # xs = [] From b31fa2d5e061082fc0b303ee6550f0088072214e Mon Sep 17 00:00:00 2001 From: Lukas Pfahler Date: Wed, 6 Dec 2017 10:39:20 +0100 Subject: [PATCH 36/77] new kernels --- kernels.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/kernels.py b/kernels.py index f4be153..567cf8d 100644 --- a/kernels.py +++ b/kernels.py @@ -104,11 +104,11 @@ def ReLu(X,Y,s): def DeepKernel(X,Y,s): # Zhang, Lee and Jordan - l1 regularized Neural Networks are Improperly Learniable in Polynomial Time XY = K.dot(X, K.transpose(Y)) - XX = K.sum(K.square(X), axis = 1, keepdims=True) - YY = K.sum(K.square(Y), axis = 1, keepdims=True) + XX = K.sqrt(K.sum(K.square(X), axis = 1, keepdims=True)) + YY = K.sqrt(K.sum(K.square(Y), axis = 1, keepdims=True)) XX = K.reshape(XX, (K.shape(X)[0], 1)) YY = K.reshape(YY, (1,K.shape(Y)[0])) - XY = XY/(XX*YY) + XY = XY/K.dot(XX,YY) XY = K.clip(XY,-1,1) for i in range(s): XY = np.float(1)/(np.float32(2)-XY) From 9c52922c49552452a8170a40439892b630de16d0 Mon Sep 17 00:00:00 2001 From: Lukas Pfahler Date: Wed, 6 Dec 2017 11:06:15 +0100 Subject: [PATCH 37/77] new kernels --- kernels.py | 288 ++++++++++++++++++++++++++++------------------------ run_expr.py | 5 +- 2 files changed, 159 insertions(+), 134 deletions(-) diff --git a/kernels.py b/kernels.py index 567cf8d..7bdd781 100644 --- a/kernels.py +++ b/kernels.py @@ -3,157 +3,179 @@ from keras import backend as K def D2(X, Y): - """ Calculate the pointwise (squared) distance. - - Arguments: - X: of shape (n_sample, n_feature). - Y: of shape (n_center, n_feature). - - Returns: - pointwise distances (n_sample, n_center). - """ - XX = K.sum(K.square(X), axis = 1, keepdims=True) - print(XX) - if X is Y: - YY = XX - else: - YY = K.sum(K.square(Y), axis = 1, keepdims=True) - XY = K.dot(X, K.transpose(Y)) - d2 = K.reshape(XX, (K.shape(X)[0], 1)) \ - + K.reshape(YY, (1, K.shape(Y)[0])) \ - - 2 * XY - return d2 + """ Calculate the pointwise (squared) distance. + + Arguments: + X: of shape (n_sample, n_feature). + Y: of shape (n_center, n_feature). + + Returns: + pointwise distances (n_sample, n_center). + """ + XX = K.sum(K.square(X), axis = 1, keepdims=True) + print(XX) + if X is Y: + YY = XX + else: + YY = K.sum(K.square(Y), axis = 1, keepdims=True) + XY = K.dot(X, K.transpose(Y)) + d2 = K.reshape(XX, (K.shape(X)[0], 1)) \ + + K.reshape(YY, (1, K.shape(Y)[0])) \ + - 2 * XY + return d2 def Gaussian(X, Y, s): - """ Gaussian kernel. - - Arguments: - X: of shape (n_sample, n_feature). - Y: of shape (n_center, n_feature). - s: kernel bandwidth. - - Returns: - kernel matrix of shape (n_sample, n_center). - """ - assert s > 0 - - d2 = D2(X, Y) - gamma = np.float32(1. / (2 * s ** 2)) - G = K.exp(-gamma * K.clip(d2, 0, None)) - return G + """ Gaussian kernel. + + Arguments: + X: of shape (n_sample, n_feature). + Y: of shape (n_center, n_feature). + s: kernel bandwidth. + + Returns: + kernel matrix of shape (n_sample, n_center). + """ + assert s > 0 + + d2 = D2(X, Y) + gamma = np.float32(1. / (2 * s ** 2)) + G = K.exp(-gamma * K.clip(d2, 0, None)) + return G def Laplace(X, Y, s): - """ Laplace kernel. - - Arguments: - X: of shape (n_sample, n_feature). - Y: of shape (n_center, n_feature). - s: kernel bandwidth. - - Returns: - kernel matrix of shape (n_sample, n_center). - """ - assert s > 0 - - d2 = K.clip(D2(X, Y), 0, None) - d = K.sqrt(d2) - G = K.exp(- d / s) - return G + """ Laplace kernel. + + Arguments: + X: of shape (n_sample, n_feature). + Y: of shape (n_center, n_feature). + s: kernel bandwidth. + + Returns: + kernel matrix of shape (n_sample, n_center). + """ + assert s > 0 + + d2 = K.clip(D2(X, Y), 0, None) + d = K.sqrt(d2) + G = K.exp(- d / s) + return G def Cauchy(X, Y, s): - """ Cauchy kernel. - - Arguments: - X: of shape (n_sample, n_feature). - Y: of shape (n_center, n_feature). - s: kernel bandwidth. - - Returns: - kernel matrix of shape (n_sample, n_center). - """ - assert s > 0 - - d2 = D2(X, Y) - s2 = np.float32(s**2) - G = 1 / K.exp( 1 + K.clip(d2, 0, None) / s2) - return G + """ Cauchy kernel. + + Arguments: + X: of shape (n_sample, n_feature). + Y: of shape (n_center, n_feature). + s: kernel bandwidth. + + Returns: + kernel matrix of shape (n_sample, n_center). + """ + assert s > 0 + + d2 = D2(X, Y) + s2 = np.float32(s**2) + G = 1 / K.exp( 1 + K.clip(d2, 0, None) / s2) + return G import tensorflow as tf def ReLu(X,Y,s): - #Francis Bach - Breaking the Curse of Dimensionality with Convex Neural Networks - s = np.float32(s) - XX = K.sum(K.square(X), axis = 1, keepdims=True) - YY = K.sum(K.square(Y), axis = 1, keepdims=True) - XX = K.reshape(XX, (K.shape(X)[0], 1)) - YY = K.reshape(YY, (1,K.shape(Y)[0])) - - xx = K.sqrt(XX/s+np.float32(1.0)) - yy = K.sqrt(YY/s+np.float32(1.0)) - - XY = K.dot(X, K.transpose(Y)) - - cos = (XY/s + np.float32(1.0)) / (K.dot(xx,yy)) # - cos = K.clip(cos,-1,1) - delta = tf.acos(cos) - pi = np.float32(3.14159265359) - d = K.int_shape(X)[1] - return K.dot(xx,yy)*((pi-delta)*K.cos(delta)+K.sin(delta))/(2*d*pi) #dot product, eg. macht nur das erste sinn. + #Francis Bach - Breaking the Curse of Dimensionality with Convex Neural Networks + s = np.float32(s) + XX = K.sum(K.square(X), axis = 1, keepdims=True) + if Y is X: + YY = XX + else: + YY = K.sum(K.square(Y), axis = 1, keepdims=True) + XX = K.reshape(XX, (K.shape(X)[0], 1)) + YY = K.reshape(YY, (1,K.shape(Y)[0])) + + xx = K.sqrt(XX/s+np.float32(1.0)) + yy = K.sqrt(YY/s+np.float32(1.0)) + + XY = K.dot(X, K.transpose(Y)) + + cos = (XY/s + np.float32(1.0)) / (K.dot(xx,yy)) # + cos = K.clip(cos,-1,1) + delta = tf.acos(cos) + pi = np.float32(3.14159265359) + d = K.int_shape(X)[1] + return K.dot(xx,yy)*((pi-delta)*K.cos(delta)+K.sin(delta))/(2*d*pi) #dot product, eg. macht nur das erste sinn. def DeepKernel(X,Y,s): - # Zhang, Lee and Jordan - l1 regularized Neural Networks are Improperly Learniable in Polynomial Time - XY = K.dot(X, K.transpose(Y)) - XX = K.sqrt(K.sum(K.square(X), axis = 1, keepdims=True)) - YY = K.sqrt(K.sum(K.square(Y), axis = 1, keepdims=True)) - XX = K.reshape(XX, (K.shape(X)[0], 1)) - YY = K.reshape(YY, (1,K.shape(Y)[0])) - XY = XY/K.dot(XX,YY) - XY = K.clip(XY,-1,1) - for i in range(s): - XY = np.float(1)/(np.float32(2)-XY) - XY = K.clip(XY,0,1) - return XY + # Zhang, Lee and Jordan - l1 regularized Neural Networks are Improperly Learniable in Polynomial Time + XY = K.dot(X, K.transpose(Y)) + XX = K.sqrt(K.sum(K.square(X), axis = 1, keepdims=True)) + if Y is X: + YY = XX + else: + YY = K.sqrt(K.sum(K.square(Y), axis = 1, keepdims=True)) + XX = K.reshape(XX, (K.shape(X)[0], 1)) + YY = K.reshape(YY, (1,K.shape(Y)[0])) + XY = XY/K.dot(XX,YY) + XY = K.clip(XY,-1,1) + for i in range(s): + XY = np.float(1)/(np.float32(2)-XY) + XY = K.clip(XY,0,1) + return XY def CompositionalFeedForwardKernel(X,Y,s): - # Amit Daniely, Roy Frostig, Yoram Singer - Toward Deeper Understanding of Neural Networks -- The Power of Initialization and a Dual View on Expressivity - s = np.float32(s) - - XX = K.sqrt(K.sum(K.square(X), axis = 1, keepdims=True)) - YY = K.sqrt(K.sum(K.square(Y), axis = 1, keepdims=True)) - XX = K.reshape(XX, (K.shape(X)[0], 1)) - YY = K.reshape(YY, (1,K.shape(Y)[0])) - - XY = K.dot(X, K.transpose(Y)) - XY = tf.div(XY,K.dot(XX,YY)) - XY = K.clip(XY,-1,1) - - pi = np.float32(3.14159265359) - d = K.int_shape(X)[1] - XY = XY/d - for i in range(1): - XY = (K.sqrt(1-K.square(XY))+tf.multiply((pi-tf.acos(XY)),XY))/pi - #XY = K.clip(XY,-1,1) - return XY + # Amit Daniely, Roy Frostig, Yoram Singer - Toward Deeper Understanding of Neural Networks -- The Power of Initialization and a Dual View on Expressivity + s = np.float32(s) + + XX = K.sqrt(K.sum(K.square(X), axis = 1, keepdims=True)) + if Y is X: + YY = XX + else: + YY = K.sqrt(K.sum(K.square(Y), axis = 1, keepdims=True)) + + XX = K.reshape(XX, (K.shape(X)[0], 1)) + YY = K.reshape(YY, (1,K.shape(Y)[0])) + + XY = K.dot(X, K.transpose(Y)) + XY = tf.div(XY,K.dot(XX,YY)) + XY = K.clip(XY,-1,1) + + pi = np.float32(3.14159265359) + d = K.int_shape(X)[1] + XY = XY/d + for i in range(1): + XY = (K.sqrt(1-K.square(XY))+tf.multiply((pi-tf.acos(XY)),XY))/pi + #XY = K.clip(XY,-1,1) + return XY def ArcCosine(X,Y,s): - # Youngmin Cho, Lawrence K. Saul - Kernel Methods for Deep Learning - # k(x,y) = \frac 1 \pi ||x||\cdot||y|| \cdot \left( \sin \theta + (\pi - \theta)\cos \theta\right) - s = np.float32(s) - - XX = K.sqrt(K.sum(K.square(X), axis = 1, keepdims=True)) - YY = K.sqrt(K.sum(K.square(Y), axis = 1, keepdims=True)) - XX = K.reshape(XX, (K.shape(X)[0], 1)) - YY = K.reshape(YY, (1,K.shape(Y)[0])) - - XY = K.dot(X, K.transpose(Y)) - XY = tf.div(XY,K.dot(XX,YY)) - XY = K.clip(XY,-1,1) - theta = tf.acos(XY) - pi = np.float32(3.14159265359) - return (K.dot(XX,YY) * (K.sin(theta) + (pi-theta) * K.cos(theta)))/pi - + # Youngmin Cho, Lawrence K. Saul - Kernel Methods for Deep Learning + # k(x,y) = \frac 1 \pi ||x||\cdot||y|| \cdot \left( \sin \theta + (\pi - \theta)\cos \theta\right) + if s<=1: + XX = K.sqrt(K.sum(K.square(X), axis = 1, keepdims=True)) + if Y is X: + YY = XX + else: + YY = K.sqrt(K.sum(K.square(Y), axis = 1, keepdims=True)) + XX = K.reshape(XX, (K.shape(X)[0], 1)) + YY = K.reshape(YY, (1,K.shape(Y)[0])) + + XY = K.dot(X, K.transpose(Y)) + XY = tf.div(XY,K.dot(XX,YY)) + XY = K.clip(XY,-1,1) + theta = tf.acos(XY) + pi = np.float32(3.14159265359) + + return (K.dot(XX,YY) * (K.sin(theta) + (pi-theta) * K.cos(theta)))/pi + else: + XX = tf.diag_part(ArcCosine(X,X,s-1)) + YY = tf.diag_part(ArcCosine(Y,Y,s-1)) + XY = ArcCosine(X,Y,s-1) + XY = tf.div(XY,K.sqrt(K.dot(XX,YY))) + XY = K.clip(XY,-1,1) + theta = tf.acos(XY) + pi = np.float32(3.14159265359) + + return (K.sqrt(K.dot(XX,YY)) * (K.sin(theta) + (pi-theta) * K.cos(theta)))/pi # xs = [] # for i in range(output_row): diff --git a/run_expr.py b/run_expr.py index 8d361e8..69419ba 100644 --- a/run_expr.py +++ b/run_expr.py @@ -29,6 +29,9 @@ from optimizers import PSGD, SGD import tensorflow as tf tf.logging.set_verbosity(tf.logging.ERROR) +import os +os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2' + assert StrictVersion(keras.__version__) >= StrictVersion('2.0.8'), \ "Requires Keras (>=2.0.8)." @@ -99,7 +102,7 @@ s = np.max(np.linalg.norm(x_train,2,axis=1))**2 kernel = lambda x,y: kernels.CompositionalFeedForwardKernel(x, y, s) elif args_dict['kernel'] == 'ArcCosine': - s = np.max(np.linalg.norm(x_train,2,axis=1))**2 + s = 2 kernel = lambda x,y: kernels.ArcCosine(x, y, s) else: raise Exception("Unknown kernel function - %s. \ From 8f4dfa6a9749a256fa413c5586d7eed003c8c36e Mon Sep 17 00:00:00 2001 From: Lukas Pfahler Date: Wed, 6 Dec 2017 11:16:41 +0100 Subject: [PATCH 38/77] new kernels --- kernels.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/kernels.py b/kernels.py index 7bdd781..6ffa274 100644 --- a/kernels.py +++ b/kernels.py @@ -169,6 +169,8 @@ def ArcCosine(X,Y,s): else: XX = tf.diag_part(ArcCosine(X,X,s-1)) YY = tf.diag_part(ArcCosine(Y,Y,s-1)) + XX = K.reshape(XX, (K.shape(X)[0], 1)) + YY = K.reshape(YY, (1,K.shape(Y)[0])) XY = ArcCosine(X,Y,s-1) XY = tf.div(XY,K.sqrt(K.dot(XX,YY))) XY = K.clip(XY,-1,1) From e3c87eb3e13c9e44e7917e0e1e6e6f78744152c1 Mon Sep 17 00:00:00 2001 From: Lukas Pfahler Date: Wed, 6 Dec 2017 11:19:31 +0100 Subject: [PATCH 39/77] new kernels --- kernels.py | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/kernels.py b/kernels.py index 6ffa274..90365c0 100644 --- a/kernels.py +++ b/kernels.py @@ -167,17 +167,20 @@ def ArcCosine(X,Y,s): return (K.dot(XX,YY) * (K.sin(theta) + (pi-theta) * K.cos(theta)))/pi else: - XX = tf.diag_part(ArcCosine(X,X,s-1)) - YY = tf.diag_part(ArcCosine(Y,Y,s-1)) + XX = K.sqrt(tf.diag_part(ArcCosine(X,X,s-1))) + if Y is X: + YY = XX + else: + YY = K.sqrt(tf.diag_part(ArcCosine(Y,Y,s-1))) XX = K.reshape(XX, (K.shape(X)[0], 1)) YY = K.reshape(YY, (1,K.shape(Y)[0])) XY = ArcCosine(X,Y,s-1) - XY = tf.div(XY,K.sqrt(K.dot(XX,YY))) + XY = tf.div(XY,K.dot(XX,YY)) XY = K.clip(XY,-1,1) theta = tf.acos(XY) pi = np.float32(3.14159265359) - return (K.sqrt(K.dot(XX,YY)) * (K.sin(theta) + (pi-theta) * K.cos(theta)))/pi + return (K.dot(XX,YY) * (K.sin(theta) + (pi-theta) * K.cos(theta)))/pi # xs = [] # for i in range(output_row): From 64bd6fbb1b9cf95f6ca32d985832e48f73278c76 Mon Sep 17 00:00:00 2001 From: Lukas Pfahler Date: Wed, 6 Dec 2017 11:22:51 +0100 Subject: [PATCH 40/77] new kernels --- utils.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/utils.py b/utils.py index af9170f..31f0421 100644 --- a/utils.py +++ b/utils.py @@ -100,11 +100,11 @@ def asm_eigenpro_f(feat, phi, M, k, tau, in_rkhs=False): kfeat = KernelEmbedding(phi, feat, input_shape=(D, ))(x) model = Model(x, kfeat) - fmap = lambda _x: model.predict(_x, batch_size=1024) + fmap = lambda _x: model.predict(_x, batch_size=256) _s, _V, _sk = rsvd(feat, fmap, M, k) # phi is a feature map else: model = Model(x, phi(x)) - fmap = lambda _x: model.predict(_x, batch_size=1024) + fmap = lambda _x: model.predict(_x, batch_size=256) _s, _V, _sk = rsvd(feat, fmap, M, k) # phi is a feature map _s, _sk, _V = _s[:k], _s[-1], _V[:, :k] print("SVD time: %.2f, Eigenvalue ratio: %.2f %.2f %.8f" % From b5197d815bf343426a057970cd617456639df014 Mon Sep 17 00:00:00 2001 From: Lukas Pfahler Date: Wed, 6 Dec 2017 11:33:41 +0100 Subject: [PATCH 41/77] new kernels --- kernels.py | 14 ++++++++------ utils.py | 4 ++-- 2 files changed, 10 insertions(+), 8 deletions(-) diff --git a/kernels.py b/kernels.py index 90365c0..b790714 100644 --- a/kernels.py +++ b/kernels.py @@ -167,12 +167,14 @@ def ArcCosine(X,Y,s): return (K.dot(XX,YY) * (K.sin(theta) + (pi-theta) * K.cos(theta)))/pi else: - XX = K.sqrt(tf.diag_part(ArcCosine(X,X,s-1))) - if Y is X: - YY = XX - else: - YY = K.sqrt(tf.diag_part(ArcCosine(Y,Y,s-1))) - XX = K.reshape(XX, (K.shape(X)[0], 1)) + XX = K.sqrt(tf.map_fn(lambda x : ArcCosine(x,x,s-1),XX)) + YY = K.sqrt(tf.map_fn(lambda x : ArcCosine(x,x,s-1),XX)) + # XX = K.sqrt(tf.diag_part(ArcCosine(X,X,s-1))) + # if Y is X: + # YY = XX + # else: + # YY = K.sqrt(tf.diag_part(ArcCosine(Y,Y,s-1))) + XX = K.reshape(XX, (K.shape(X)[0],1)) YY = K.reshape(YY, (1,K.shape(Y)[0])) XY = ArcCosine(X,Y,s-1) XY = tf.div(XY,K.dot(XX,YY)) diff --git a/utils.py b/utils.py index 31f0421..af9170f 100644 --- a/utils.py +++ b/utils.py @@ -100,11 +100,11 @@ def asm_eigenpro_f(feat, phi, M, k, tau, in_rkhs=False): kfeat = KernelEmbedding(phi, feat, input_shape=(D, ))(x) model = Model(x, kfeat) - fmap = lambda _x: model.predict(_x, batch_size=256) + fmap = lambda _x: model.predict(_x, batch_size=1024) _s, _V, _sk = rsvd(feat, fmap, M, k) # phi is a feature map else: model = Model(x, phi(x)) - fmap = lambda _x: model.predict(_x, batch_size=256) + fmap = lambda _x: model.predict(_x, batch_size=1024) _s, _V, _sk = rsvd(feat, fmap, M, k) # phi is a feature map _s, _sk, _V = _s[:k], _s[-1], _V[:, :k] print("SVD time: %.2f, Eigenvalue ratio: %.2f %.2f %.8f" % From c9de74de498013ab92a90acfc45ff8b0dc761d72 Mon Sep 17 00:00:00 2001 From: Lukas Pfahler Date: Wed, 6 Dec 2017 11:33:58 +0100 Subject: [PATCH 42/77] new kernels --- kernels.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/kernels.py b/kernels.py index b790714..7608c29 100644 --- a/kernels.py +++ b/kernels.py @@ -167,8 +167,8 @@ def ArcCosine(X,Y,s): return (K.dot(XX,YY) * (K.sin(theta) + (pi-theta) * K.cos(theta)))/pi else: - XX = K.sqrt(tf.map_fn(lambda x : ArcCosine(x,x,s-1),XX)) - YY = K.sqrt(tf.map_fn(lambda x : ArcCosine(x,x,s-1),XX)) + XX = K.sqrt(tf.map_fn(lambda x : ArcCosine(x,x,s-1),X)) + YY = K.sqrt(tf.map_fn(lambda x : ArcCosine(x,x,s-1),Y)) # XX = K.sqrt(tf.diag_part(ArcCosine(X,X,s-1))) # if Y is X: # YY = XX From e4cabc7d4485e013d58508807ab2a7e804d85958 Mon Sep 17 00:00:00 2001 From: Lukas Pfahler Date: Wed, 6 Dec 2017 11:44:26 +0100 Subject: [PATCH 43/77] new kernels --- kernels.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/kernels.py b/kernels.py index 7608c29..2f36d4b 100644 --- a/kernels.py +++ b/kernels.py @@ -167,8 +167,8 @@ def ArcCosine(X,Y,s): return (K.dot(XX,YY) * (K.sin(theta) + (pi-theta) * K.cos(theta)))/pi else: - XX = K.sqrt(tf.map_fn(lambda x : ArcCosine(x,x,s-1),X)) - YY = K.sqrt(tf.map_fn(lambda x : ArcCosine(x,x,s-1),Y)) + XX = K.sqrt(tf.map_fn((lambda x : ArcCosine(x,x,s-1)),X,back_prop=False)) + YY = K.sqrt(tf.map_fn((lambda x : ArcCosine(x,x,s-1)),Y,back_prop=False)) # XX = K.sqrt(tf.diag_part(ArcCosine(X,X,s-1))) # if Y is X: # YY = XX From 7c8c8446d78601b5ebfdee5beebde178bf1bc07d Mon Sep 17 00:00:00 2001 From: Lukas Pfahler Date: Wed, 6 Dec 2017 11:47:22 +0100 Subject: [PATCH 44/77] new kernels --- kernels.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/kernels.py b/kernels.py index 2f36d4b..b547241 100644 --- a/kernels.py +++ b/kernels.py @@ -167,8 +167,8 @@ def ArcCosine(X,Y,s): return (K.dot(XX,YY) * (K.sin(theta) + (pi-theta) * K.cos(theta)))/pi else: - XX = K.sqrt(tf.map_fn((lambda x : ArcCosine(x,x,s-1)),X,back_prop=False)) - YY = K.sqrt(tf.map_fn((lambda x : ArcCosine(x,x,s-1)),Y,back_prop=False)) + XX = K.sqrt(tf.map_fn((lambda x : ArcCosine(tf.expand_dims(t,0),tf.expand_dims(x,0),s-1)),X,back_prop=False)) + YY = K.sqrt(tf.map_fn((lambda x : ArcCosine(tf.expand_dims(t,0),tf.expand_dims(x,0),s-1)),Y,back_prop=False)) # XX = K.sqrt(tf.diag_part(ArcCosine(X,X,s-1))) # if Y is X: # YY = XX From 2d30eb6f68c1e96ff703cb6148ead74c4fc4bc2e Mon Sep 17 00:00:00 2001 From: Lukas Pfahler Date: Wed, 6 Dec 2017 11:47:36 +0100 Subject: [PATCH 45/77] new kernels --- kernels.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/kernels.py b/kernels.py index b547241..6e931a4 100644 --- a/kernels.py +++ b/kernels.py @@ -167,8 +167,8 @@ def ArcCosine(X,Y,s): return (K.dot(XX,YY) * (K.sin(theta) + (pi-theta) * K.cos(theta)))/pi else: - XX = K.sqrt(tf.map_fn((lambda x : ArcCosine(tf.expand_dims(t,0),tf.expand_dims(x,0),s-1)),X,back_prop=False)) - YY = K.sqrt(tf.map_fn((lambda x : ArcCosine(tf.expand_dims(t,0),tf.expand_dims(x,0),s-1)),Y,back_prop=False)) + XX = K.sqrt(tf.map_fn((lambda x : ArcCosine(tf.expand_dims(x,0),tf.expand_dims(x,0),s-1)),X,back_prop=False)) + YY = K.sqrt(tf.map_fn((lambda x : ArcCosine(tf.expand_dims(x,0),tf.expand_dims(x,0),s-1)),Y,back_prop=False)) # XX = K.sqrt(tf.diag_part(ArcCosine(X,X,s-1))) # if Y is X: # YY = XX From 65c4a21069001802536a35c93524bdf90e71570b Mon Sep 17 00:00:00 2001 From: Lukas Pfahler Date: Wed, 6 Dec 2017 11:51:27 +0100 Subject: [PATCH 46/77] new kernels --- kernels.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/kernels.py b/kernels.py index 6e931a4..02e40ed 100644 --- a/kernels.py +++ b/kernels.py @@ -167,8 +167,8 @@ def ArcCosine(X,Y,s): return (K.dot(XX,YY) * (K.sin(theta) + (pi-theta) * K.cos(theta)))/pi else: - XX = K.sqrt(tf.map_fn((lambda x : ArcCosine(tf.expand_dims(x,0),tf.expand_dims(x,0),s-1)),X,back_prop=False)) - YY = K.sqrt(tf.map_fn((lambda x : ArcCosine(tf.expand_dims(x,0),tf.expand_dims(x,0),s-1)),Y,back_prop=False)) + XX = K.sqrt(tf.map_fn((lambda x : ArcCosine(tf.expand_dims(x,0),tf.expand_dims(x,0),s-1)),X,back_prop=False,swap_memory=True,parallel_iterations=1024)) + YY = K.sqrt(tf.map_fn((lambda x : ArcCosine(tf.expand_dims(x,0),tf.expand_dims(x,0),s-1)),Y,back_prop=False,swap_memory=True,parallel_iterations=1024)) # XX = K.sqrt(tf.diag_part(ArcCosine(X,X,s-1))) # if Y is X: # YY = XX From 2503b14c6a5fc4fe3454d8787405e347ab614c63 Mon Sep 17 00:00:00 2001 From: Lukas Pfahler Date: Wed, 6 Dec 2017 12:00:27 +0100 Subject: [PATCH 47/77] new kernels --- kernels.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/kernels.py b/kernels.py index 02e40ed..99015f0 100644 --- a/kernels.py +++ b/kernels.py @@ -168,7 +168,10 @@ def ArcCosine(X,Y,s): return (K.dot(XX,YY) * (K.sin(theta) + (pi-theta) * K.cos(theta)))/pi else: XX = K.sqrt(tf.map_fn((lambda x : ArcCosine(tf.expand_dims(x,0),tf.expand_dims(x,0),s-1)),X,back_prop=False,swap_memory=True,parallel_iterations=1024)) - YY = K.sqrt(tf.map_fn((lambda x : ArcCosine(tf.expand_dims(x,0),tf.expand_dims(x,0),s-1)),Y,back_prop=False,swap_memory=True,parallel_iterations=1024)) + if Y is X: + YY = XX: + else: + YY = K.sqrt(tf.map_fn((lambda x : ArcCosine(tf.expand_dims(x,0),tf.expand_dims(x,0),s-1)),Y,back_prop=False,swap_memory=True,parallel_iterations=1024)) # XX = K.sqrt(tf.diag_part(ArcCosine(X,X,s-1))) # if Y is X: # YY = XX From f4c2f0f7506f626c2592569128e8ad47e04bad84 Mon Sep 17 00:00:00 2001 From: Lukas Pfahler Date: Wed, 6 Dec 2017 12:03:25 +0100 Subject: [PATCH 48/77] new kernels --- kernels.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernels.py b/kernels.py index 99015f0..50c1fba 100644 --- a/kernels.py +++ b/kernels.py @@ -169,7 +169,7 @@ def ArcCosine(X,Y,s): else: XX = K.sqrt(tf.map_fn((lambda x : ArcCosine(tf.expand_dims(x,0),tf.expand_dims(x,0),s-1)),X,back_prop=False,swap_memory=True,parallel_iterations=1024)) if Y is X: - YY = XX: + YY = XX else: YY = K.sqrt(tf.map_fn((lambda x : ArcCosine(tf.expand_dims(x,0),tf.expand_dims(x,0),s-1)),Y,back_prop=False,swap_memory=True,parallel_iterations=1024)) # XX = K.sqrt(tf.diag_part(ArcCosine(X,X,s-1))) From 030b0a1f43611afa35eabad3346621e9d560d0c9 Mon Sep 17 00:00:00 2001 From: Lukas Pfahler Date: Wed, 6 Dec 2017 12:20:39 +0100 Subject: [PATCH 49/77] new kernels --- kernels.py | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/kernels.py b/kernels.py index 50c1fba..94a8c77 100644 --- a/kernels.py +++ b/kernels.py @@ -167,16 +167,21 @@ def ArcCosine(X,Y,s): return (K.dot(XX,YY) * (K.sin(theta) + (pi-theta) * K.cos(theta)))/pi else: - XX = K.sqrt(tf.map_fn((lambda x : ArcCosine(tf.expand_dims(x,0),tf.expand_dims(x,0),s-1)),X,back_prop=False,swap_memory=True,parallel_iterations=1024)) - if Y is X: - YY = XX - else: - YY = K.sqrt(tf.map_fn((lambda x : ArcCosine(tf.expand_dims(x,0),tf.expand_dims(x,0),s-1)),Y,back_prop=False,swap_memory=True,parallel_iterations=1024)) + # XX = K.sqrt(tf.map_fn((lambda x : ArcCosine(tf.expand_dims(x,0),tf.expand_dims(x,0),s-1)),X,back_prop=False,swap_memory=True,parallel_iterations=1024)) + # if Y is X: + # YY = XX + # else: + # YY = K.sqrt(tf.map_fn((lambda x : ArcCosine(tf.expand_dims(x,0),tf.expand_dims(x,0),s-1)),Y,back_prop=False,swap_memory=True,parallel_iterations=1024)) # XX = K.sqrt(tf.diag_part(ArcCosine(X,X,s-1))) # if Y is X: # YY = XX # else: # YY = K.sqrt(tf.diag_part(ArcCosine(Y,Y,s-1))) + XX = K.sqrt(K.sum(K.square(X), axis = 1, keepdims=True)) + if Y is X: + YY = XX + else: + YY = K.sqrt(K.sum(K.square(Y), axis = 1, keepdims=True)) XX = K.reshape(XX, (K.shape(X)[0],1)) YY = K.reshape(YY, (1,K.shape(Y)[0])) XY = ArcCosine(X,Y,s-1) From a03f80491af658fdaefe03bec0a6d03e4b6599e9 Mon Sep 17 00:00:00 2001 From: Lukas Pfahler Date: Wed, 6 Dec 2017 12:55:14 +0100 Subject: [PATCH 50/77] new kernels --- kernels.py | 10 ---------- run_expr.py | 2 +- 2 files changed, 1 insertion(+), 11 deletions(-) diff --git a/kernels.py b/kernels.py index 94a8c77..2b40273 100644 --- a/kernels.py +++ b/kernels.py @@ -167,16 +167,6 @@ def ArcCosine(X,Y,s): return (K.dot(XX,YY) * (K.sin(theta) + (pi-theta) * K.cos(theta)))/pi else: - # XX = K.sqrt(tf.map_fn((lambda x : ArcCosine(tf.expand_dims(x,0),tf.expand_dims(x,0),s-1)),X,back_prop=False,swap_memory=True,parallel_iterations=1024)) - # if Y is X: - # YY = XX - # else: - # YY = K.sqrt(tf.map_fn((lambda x : ArcCosine(tf.expand_dims(x,0),tf.expand_dims(x,0),s-1)),Y,back_prop=False,swap_memory=True,parallel_iterations=1024)) - # XX = K.sqrt(tf.diag_part(ArcCosine(X,X,s-1))) - # if Y is X: - # YY = XX - # else: - # YY = K.sqrt(tf.diag_part(ArcCosine(Y,Y,s-1))) XX = K.sqrt(K.sum(K.square(X), axis = 1, keepdims=True)) if Y is X: YY = XX diff --git a/run_expr.py b/run_expr.py index 69419ba..0a18262 100644 --- a/run_expr.py +++ b/run_expr.py @@ -102,7 +102,7 @@ s = np.max(np.linalg.norm(x_train,2,axis=1))**2 kernel = lambda x,y: kernels.CompositionalFeedForwardKernel(x, y, s) elif args_dict['kernel'] == 'ArcCosine': - s = 2 + s = 3 kernel = lambda x,y: kernels.ArcCosine(x, y, s) else: raise Exception("Unknown kernel function - %s. \ From 88958fd53770541e83e9708792d0a040d44ab41f Mon Sep 17 00:00:00 2001 From: Lukas Pfahler Date: Wed, 6 Dec 2017 14:56:45 +0100 Subject: [PATCH 51/77] new kernels --- kernels.py | 6 +++--- run_expr.py | 6 ++++-- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/kernels.py b/kernels.py index 2b40273..8878130 100644 --- a/kernels.py +++ b/kernels.py @@ -151,15 +151,15 @@ def ArcCosine(X,Y,s): # Youngmin Cho, Lawrence K. Saul - Kernel Methods for Deep Learning # k(x,y) = \frac 1 \pi ||x||\cdot||y|| \cdot \left( \sin \theta + (\pi - \theta)\cos \theta\right) if s<=1: - XX = K.sqrt(K.sum(K.square(X), axis = 1, keepdims=True)) + XX = K.sqrt(1+K.sum(K.square(X), axis = 1, keepdims=True)) if Y is X: YY = XX else: - YY = K.sqrt(K.sum(K.square(Y), axis = 1, keepdims=True)) + YY = K.sqrt(1+K.sum(K.square(Y), axis = 1, keepdims=True)) XX = K.reshape(XX, (K.shape(X)[0], 1)) YY = K.reshape(YY, (1,K.shape(Y)[0])) - XY = K.dot(X, K.transpose(Y)) + XY = K.dot(X, K.transpose(Y))+1 XY = tf.div(XY,K.dot(XX,YY)) XY = K.clip(XY,-1,1) theta = tf.acos(XY) diff --git a/run_expr.py b/run_expr.py index 0a18262..d4595de 100644 --- a/run_expr.py +++ b/run_expr.py @@ -48,6 +48,8 @@ parser = argparse.ArgumentParser(description='Run EigenPro tests.') parser.add_argument('--kernel', type=str, default='Gaussian', help='kernel function (e.g. Gaussian, Laplace, and Cauchy)') +parser.add_argument('--depth', type=int, default=1, + help='the depth of the neural network that is mimiced by the kernel (if supported by kernel)') args = parser.parse_args() args_dict = vars(args) @@ -96,13 +98,13 @@ s = np.max(np.linalg.norm(x_train,2,axis=1))**2 kernel = lambda x,y: kernels.ReLu(x, y, s) elif args_dict['kernel'] == 'DeepKernel': - s = 2 + s = int(args_dict["depth"]) kernel = lambda x,y: kernels.DeepKernel(x, y, s) elif args_dict['kernel'] == 'CompositionalFeedForwardKernel': s = np.max(np.linalg.norm(x_train,2,axis=1))**2 kernel = lambda x,y: kernels.CompositionalFeedForwardKernel(x, y, s) elif args_dict['kernel'] == 'ArcCosine': - s = 3 + s = int(args_dict["depth"]) kernel = lambda x,y: kernels.ArcCosine(x, y, s) else: raise Exception("Unknown kernel function - %s. \ From 6c4be48850ed68cff8dbe50df659208335c70da8 Mon Sep 17 00:00:00 2001 From: Lukas Pfahler Date: Wed, 6 Dec 2017 15:03:29 +0100 Subject: [PATCH 52/77] new kernels --- kernels.py | 8 ++--- run_expr.py | 99 +++++++++++++++++++++++++++-------------------------- 2 files changed, 55 insertions(+), 52 deletions(-) diff --git a/kernels.py b/kernels.py index 8878130..f5261e7 100644 --- a/kernels.py +++ b/kernels.py @@ -147,19 +147,19 @@ def CompositionalFeedForwardKernel(X,Y,s): #XY = K.clip(XY,-1,1) return XY -def ArcCosine(X,Y,s): +def ArcCosine(X,Y,s,bias=1.0): # Youngmin Cho, Lawrence K. Saul - Kernel Methods for Deep Learning # k(x,y) = \frac 1 \pi ||x||\cdot||y|| \cdot \left( \sin \theta + (\pi - \theta)\cos \theta\right) if s<=1: - XX = K.sqrt(1+K.sum(K.square(X), axis = 1, keepdims=True)) + XX = K.sqrt(bias+K.sum(K.square(X), axis = 1, keepdims=True)) if Y is X: YY = XX else: - YY = K.sqrt(1+K.sum(K.square(Y), axis = 1, keepdims=True)) + YY = K.sqrt(bias+K.sum(K.square(Y), axis = 1, keepdims=True)) XX = K.reshape(XX, (K.shape(X)[0], 1)) YY = K.reshape(YY, (1,K.shape(Y)[0])) - XY = K.dot(X, K.transpose(Y))+1 + XY = K.dot(X, K.transpose(Y))+bias XY = tf.div(XY,K.dot(XX,YY)) XY = K.clip(XY,-1,1) theta = tf.acos(XY) diff --git a/run_expr.py b/run_expr.py index d4595de..9021234 100644 --- a/run_expr.py +++ b/run_expr.py @@ -17,12 +17,12 @@ from distutils.version import StrictVersion from keras.layers import Dense, Input from keras.models import Model -from keras.datasets import cifar10 from keras import backend as K import kernels import mnist import utils +import cifar from backend_extra import hasGPU from layers import KernelEmbedding, RFF @@ -59,28 +59,31 @@ M = 4800 # (EigenPro) subsample size k = 160 # (EigenPro) top-k eigensystem -num_classes = 10 # number of classes +# num_classes = 10 # number of classes -(x_train, y_train), (x_test, y_test) = cifar10.load_data() +# (x_train, y_train), (x_test, y_test) = cifar10.load_data() -print(x_train.shape[0], 'train samples') -print(x_test.shape[0], 'test samples') -print(type(x_train)) +# print(x_train.shape[0], 'train samples') +# print(x_test.shape[0], 'test samples') +# print(type(x_train)) -x_train = x_train.reshape((x_train.shape[0],-1)) -x_test = x_test.reshape((x_test.shape[0],-1)) -print('x_train shape:', x_train.shape) -print(x_train[0],x_train.dtype) -n, D = x_train.shape # (n_sample, n_feature) -x_train = np.divide(x_train,255.0) -x_test = np.divide(x_test,255.0) +# x_train = x_train.reshape((x_train.shape[0],-1)) +# x_test = x_test.reshape((x_test.shape[0],-1)) +# print('x_train shape:', x_train.shape) +# print(x_train[0],x_train.dtype) +# n, D = x_train.shape # (n_sample, n_feature) +# x_train = np.divide(x_train,255.0) +# x_test = np.divide(x_test,255.0) -d = np.int32(n / 2) * 2 # number of random features -# convert class vectors to binary class matrices -y_train = keras.utils.to_categorical(y_train, num_classes) -y_test = keras.utils.to_categorical(y_test, num_classes) +# d = np.int32(n / 2) * 2 # number of random features + +# # convert class vectors to binary class matrices +# y_train = keras.utils.to_categorical(y_train, num_classes) +# y_test = keras.utils.to_categorical(y_test, num_classes) + +num_classes,x_train,x_test,y_train,y_test,R,d = cifar() if args_dict['kernel'] == 'Gaussian': s = sqrt(D) # kernel bandwidth @@ -101,7 +104,7 @@ s = int(args_dict["depth"]) kernel = lambda x,y: kernels.DeepKernel(x, y, s) elif args_dict['kernel'] == 'CompositionalFeedForwardKernel': - s = np.max(np.linalg.norm(x_train,2,axis=1))**2 + s = R kernel = lambda x,y: kernels.CompositionalFeedForwardKernel(x, y, s) elif args_dict['kernel'] == 'ArcCosine': s = int(args_dict["depth"]) @@ -161,38 +164,38 @@ x_test=utils.add_index(x_test)) -# Assemble SGD trainer. -rff_weights = np.float32( # for Gaussian kernel - np.sqrt(2. / (2 * 5 ** 2)) # s = 5 - * np.random.randn(D, d>>1)) -input_shape = (D,) -x = Input(shape=input_shape, dtype='float32', name='feat') -rf_f = RFF(rff_weights, input_shape=input_shape) -y = Dense(num_classes, input_shape=(d,), - activation='linear', - kernel_initializer='zeros', - use_bias=False)(rf_f(x)) -model = Model(x, y) +# # Assemble SGD trainer. +# rff_weights = np.float32( # for Gaussian kernel +# np.sqrt(2. / (2 * 5 ** 2)) # s = 5 +# * np.random.randn(D, d>>1)) +# input_shape = (D,) +# x = Input(shape=input_shape, dtype='float32', name='feat') +# rf_f = RFF(rff_weights, input_shape=input_shape) +# y = Dense(num_classes, input_shape=(d,), +# activation='linear', +# kernel_initializer='zeros', +# use_bias=False)(rf_f(x)) +# model = Model(x, y) -model.compile(loss='mse', - optimizer=SGD(eta=eta), - metrics=['accuracy']) -trainers['SGD with random Fourier feature'] = Trainer( - model=model, x_train = x_train, x_test=x_test) +# model.compile(loss='mse', +# optimizer=SGD(eta=eta), +# metrics=['accuracy']) +# trainers['SGD with random Fourier feature'] = Trainer( +# model=model, x_train = x_train, x_test=x_test) -# Assemble EigenPro trainer. -f, scale, _ = utils.asm_eigenpro_f( - x_train, rf_f, M, k, .25) -y = Dense(num_classes, input_shape=(d,), - activation='linear', - kernel_initializer='zeros', - use_bias=False)(rf_f(x)) -model = Model(x, y) -model.compile(loss='mse', - optimizer=SGD(eta=scale*eta, eigenpro_f=f), - metrics=['accuracy']) -trainers['EigenPro with random Fourier feature'] = Trainer( - model=model, x_train = x_train, x_test=x_test) +# # Assemble EigenPro trainer. +# f, scale, _ = utils.asm_eigenpro_f( +# x_train, rf_f, M, k, .25) +# y = Dense(num_classes, input_shape=(d,), +# activation='linear', +# kernel_initializer='zeros', +# use_bias=False)(rf_f(x)) +# model = Model(x, y) +# model.compile(loss='mse', +# optimizer=SGD(eta=scale*eta, eigenpro_f=f), +# metrics=['accuracy']) +# trainers['EigenPro with random Fourier feature'] = Trainer( +# model=model, x_train = x_train, x_test=x_test) # Start training. for name, trainer in trainers.items(): From fed317a0da7c6879cb607027687182a320d45c3d Mon Sep 17 00:00:00 2001 From: Lukas Pfahler Date: Wed, 6 Dec 2017 15:06:16 +0100 Subject: [PATCH 53/77] new kernels --- mnist.py | 8 ++++++-- run_expr.py | 2 +- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/mnist.py b/mnist.py index 02e614b..d2be4c8 100644 --- a/mnist.py +++ b/mnist.py @@ -11,7 +11,7 @@ def unit_range_normalize(X): SX = (X - min_) / diff_ return SX -def load(): +def mnist(): # input image dimensions img_rows, img_cols = 28, 28 @@ -30,4 +30,8 @@ def load(): print(x_train.shape[0], 'train samples') print(x_test.shape[0], 'test samples') - return (x_train, y_train), (x_test, y_test) + R = np.max(np.linalg.norm(x_train,2,axis=1))**2 + num_classes = 10 + n, D = x_train.shape # (n_sample, n_feature) + d = np.int32(n / 2) * 2 # number of random features + return num_classes,x_train,x_test,y_train,y_test,n,R,d diff --git a/run_expr.py b/run_expr.py index 9021234..eb5386e 100644 --- a/run_expr.py +++ b/run_expr.py @@ -83,7 +83,7 @@ # y_train = keras.utils.to_categorical(y_train, num_classes) # y_test = keras.utils.to_categorical(y_test, num_classes) -num_classes,x_train,x_test,y_train,y_test,R,d = cifar() +num_classes,x_train,x_test,y_train,y_test,n,R,d = cifar() if args_dict['kernel'] == 'Gaussian': s = sqrt(D) # kernel bandwidth From d2240cf2322cd3a91c753aabce82e7a341374708 Mon Sep 17 00:00:00 2001 From: Lukas Pfahler Date: Wed, 6 Dec 2017 15:06:31 +0100 Subject: [PATCH 54/77] new kernels --- cifar.py | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) create mode 100644 cifar.py diff --git a/cifar.py b/cifar.py new file mode 100644 index 0000000..474a99c --- /dev/null +++ b/cifar.py @@ -0,0 +1,28 @@ +from keras.datasets import cifar10 +import numpy as np + +def cifar(): + num_classes = 10 # number of classes + + (x_train, y_train), (x_test, y_test) = cifar10.load_data() + + print(x_train.shape[0], 'train samples') + print(x_test.shape[0], 'test samples') + print(type(x_train)) + + x_train = x_train.reshape((x_train.shape[0],-1)) + x_test = x_test.reshape((x_test.shape[0],-1)) + print('x_train shape:', x_train.shape) + print(x_train[0],x_train.dtype) + n, D = x_train.shape # (n_sample, n_feature) + x_train = np.divide(x_train,255.0) + x_test = np.divide(x_test,255.0) + + + d = np.int32(n / 2) * 2 # number of random features + + # convert class vectors to binary class matrices + y_train = keras.utils.to_categorical(y_train, num_classes) + y_test = keras.utils.to_categorical(y_test, num_classes) + R = np.max(np.linalg.norm(x_train,2,axis=1))**2 + return num_classes,x_train,x_test,y_train,y_test,n,R,d \ No newline at end of file From 4eae0a4090c6b230915a5f74739ee82fed2dbd08 Mon Sep 17 00:00:00 2001 From: Lukas Pfahler Date: Wed, 6 Dec 2017 15:08:02 +0100 Subject: [PATCH 55/77] new kernels --- cifar.py | 3 ++- mnist.py | 2 +- run_expr.py | 2 +- 3 files changed, 4 insertions(+), 3 deletions(-) diff --git a/cifar.py b/cifar.py index 474a99c..5e0a776 100644 --- a/cifar.py +++ b/cifar.py @@ -1,7 +1,8 @@ +import keras from keras.datasets import cifar10 import numpy as np -def cifar(): +def load(): num_classes = 10 # number of classes (x_train, y_train), (x_test, y_test) = cifar10.load_data() diff --git a/mnist.py b/mnist.py index d2be4c8..b8c16f6 100644 --- a/mnist.py +++ b/mnist.py @@ -11,7 +11,7 @@ def unit_range_normalize(X): SX = (X - min_) / diff_ return SX -def mnist(): +def load(): # input image dimensions img_rows, img_cols = 28, 28 diff --git a/run_expr.py b/run_expr.py index eb5386e..cd826a5 100644 --- a/run_expr.py +++ b/run_expr.py @@ -83,7 +83,7 @@ # y_train = keras.utils.to_categorical(y_train, num_classes) # y_test = keras.utils.to_categorical(y_test, num_classes) -num_classes,x_train,x_test,y_train,y_test,n,R,d = cifar() +num_classes,x_train,x_test,y_train,y_test,n,R,d = mnist.load() if args_dict['kernel'] == 'Gaussian': s = sqrt(D) # kernel bandwidth From 08f1eda5ee374bdab60ad757000920203e389a9f Mon Sep 17 00:00:00 2001 From: Lukas Pfahler Date: Wed, 6 Dec 2017 15:10:28 +0100 Subject: [PATCH 56/77] new kernels --- cifar.py | 17 +++++++++-------- kernels.py | 2 +- mnist.py | 2 +- run_expr.py | 6 +++--- 4 files changed, 14 insertions(+), 13 deletions(-) diff --git a/cifar.py b/cifar.py index 5e0a776..87556e4 100644 --- a/cifar.py +++ b/cifar.py @@ -4,21 +4,22 @@ def load(): num_classes = 10 # number of classes - (x_train, y_train), (x_test, y_test) = cifar10.load_data() - print(x_train.shape[0], 'train samples') - print(x_test.shape[0], 'test samples') - print(type(x_train)) + # print(x_train.shape[0], 'train samples') + # print(x_test.shape[0], 'test samples') + # print(type(x_train)) x_train = x_train.reshape((x_train.shape[0],-1)) x_test = x_test.reshape((x_test.shape[0],-1)) - print('x_train shape:', x_train.shape) - print(x_train[0],x_train.dtype) + # print('x_train shape:', x_train.shape) + # print(x_train[0],x_train.dtype) n, D = x_train.shape # (n_sample, n_feature) x_train = np.divide(x_train,255.0) x_test = np.divide(x_test,255.0) - + print("Load CIFAR10 dataset.") + print(x_train.shape[0], 'train samples') + print(x_test.shape[0], 'test samples') d = np.int32(n / 2) * 2 # number of random features @@ -26,4 +27,4 @@ def load(): y_train = keras.utils.to_categorical(y_train, num_classes) y_test = keras.utils.to_categorical(y_test, num_classes) R = np.max(np.linalg.norm(x_train,2,axis=1))**2 - return num_classes,x_train,x_test,y_train,y_test,n,R,d \ No newline at end of file + return num_classes,x_train,x_test,y_train,y_test,n,D,R,d \ No newline at end of file diff --git a/kernels.py b/kernels.py index f5261e7..8fe768d 100644 --- a/kernels.py +++ b/kernels.py @@ -104,7 +104,7 @@ def ReLu(X,Y,s): return K.dot(xx,yy)*((pi-delta)*K.cos(delta)+K.sin(delta))/(2*d*pi) #dot product, eg. macht nur das erste sinn. -def DeepKernel(X,Y,s): +def InversePolynomial(X,Y,s): # Zhang, Lee and Jordan - l1 regularized Neural Networks are Improperly Learniable in Polynomial Time XY = K.dot(X, K.transpose(Y)) XX = K.sqrt(K.sum(K.square(X), axis = 1, keepdims=True)) diff --git a/mnist.py b/mnist.py index b8c16f6..8adddfd 100644 --- a/mnist.py +++ b/mnist.py @@ -34,4 +34,4 @@ def load(): num_classes = 10 n, D = x_train.shape # (n_sample, n_feature) d = np.int32(n / 2) * 2 # number of random features - return num_classes,x_train,x_test,y_train,y_test,n,R,d + return num_classes,x_train,x_test,y_train,y_test,n,D,R,d diff --git a/run_expr.py b/run_expr.py index cd826a5..4bdcb3c 100644 --- a/run_expr.py +++ b/run_expr.py @@ -83,7 +83,7 @@ # y_train = keras.utils.to_categorical(y_train, num_classes) # y_test = keras.utils.to_categorical(y_test, num_classes) -num_classes,x_train,x_test,y_train,y_test,n,R,d = mnist.load() +num_classes,x_train,x_test,y_train,y_test,n,D,R,d = mnist.load() if args_dict['kernel'] == 'Gaussian': s = sqrt(D) # kernel bandwidth @@ -100,9 +100,9 @@ print("AAAH",np.linalg.norm(x_train,2,axis=1).shape) s = np.max(np.linalg.norm(x_train,2,axis=1))**2 kernel = lambda x,y: kernels.ReLu(x, y, s) -elif args_dict['kernel'] == 'DeepKernel': +elif args_dict['kernel'] == 'InversePolynomial': s = int(args_dict["depth"]) - kernel = lambda x,y: kernels.DeepKernel(x, y, s) + kernel = lambda x,y: kernels.InversePolynomial(x, y, s) elif args_dict['kernel'] == 'CompositionalFeedForwardKernel': s = R kernel = lambda x,y: kernels.CompositionalFeedForwardKernel(x, y, s) From e4b3b96f0683756e2659dce3b20572a62f916b77 Mon Sep 17 00:00:00 2001 From: Lukas Pfahler Date: Wed, 6 Dec 2017 15:21:42 +0100 Subject: [PATCH 57/77] new kernels --- cifar.py | 1 + mnist.py | 13 ++++++++----- run_expr.py | 3 +-- 3 files changed, 10 insertions(+), 7 deletions(-) diff --git a/cifar.py b/cifar.py index 87556e4..59f19cf 100644 --- a/cifar.py +++ b/cifar.py @@ -17,6 +17,7 @@ def load(): n, D = x_train.shape # (n_sample, n_feature) x_train = np.divide(x_train,255.0) x_test = np.divide(x_test,255.0) + print("Load CIFAR10 dataset.") print(x_train.shape[0], 'train samples') print(x_test.shape[0], 'test samples') diff --git a/mnist.py b/mnist.py index 8adddfd..7d6252c 100644 --- a/mnist.py +++ b/mnist.py @@ -1,6 +1,6 @@ import numpy as np - -from keras.datasets.mnist import load_data +import keras +from keras.datasets.mnist import load_data as mnist_data def unit_range_normalize(X): @@ -16,7 +16,7 @@ def load(): img_rows, img_cols = 28, 28 # the data, shuffled and split between train and test sets - (x_train, y_train), (x_test, y_test) = load_data() + (x_train, y_train), (x_test, y_test) = mnist_data() x_train = x_train.reshape(x_train.shape[0], img_rows * img_cols) x_test = x_test.reshape(x_test.shape[0], img_rows * img_cols) @@ -29,9 +29,12 @@ def load(): print("Load MNIST dataset.") print(x_train.shape[0], 'train samples') print(x_test.shape[0], 'test samples') - - R = np.max(np.linalg.norm(x_train,2,axis=1))**2 + # convert class vectors to binary class matrices num_classes = 10 + y_train = keras.utils.to_categorical(y_train, num_classes) + y_test = keras.utils.to_categorical(y_test, num_classes) + R = np.max(np.linalg.norm(x_train,2,axis=1))**2 + n, D = x_train.shape # (n_sample, n_feature) d = np.int32(n / 2) * 2 # number of random features return num_classes,x_train,x_test,y_train,y_test,n,D,R,d diff --git a/run_expr.py b/run_expr.py index 4bdcb3c..6442fb2 100644 --- a/run_expr.py +++ b/run_expr.py @@ -67,8 +67,7 @@ # print(x_test.shape[0], 'test samples') # print(type(x_train)) -# x_train = x_train.reshape((x_train.shape[0],-1)) -# x_test = x_test.reshape((x_test.shape[0],-1)) + # print('x_train shape:', x_train.shape) # print(x_train[0],x_train.dtype) # n, D = x_train.shape # (n_sample, n_feature) From 726637c891fbe0796ffb54a92d74c8824675c4c7 Mon Sep 17 00:00:00 2001 From: Lukas Pfahler Date: Wed, 6 Dec 2017 15:26:41 +0100 Subject: [PATCH 58/77] new kernels --- run_expr.py | 25 +------------------------ 1 file changed, 1 insertion(+), 24 deletions(-) diff --git a/run_expr.py b/run_expr.py index 6442fb2..a571663 100644 --- a/run_expr.py +++ b/run_expr.py @@ -59,30 +59,7 @@ M = 4800 # (EigenPro) subsample size k = 160 # (EigenPro) top-k eigensystem -# num_classes = 10 # number of classes - -# (x_train, y_train), (x_test, y_test) = cifar10.load_data() - -# print(x_train.shape[0], 'train samples') -# print(x_test.shape[0], 'test samples') -# print(type(x_train)) - - -# print('x_train shape:', x_train.shape) -# print(x_train[0],x_train.dtype) -# n, D = x_train.shape # (n_sample, n_feature) -# x_train = np.divide(x_train,255.0) -# x_test = np.divide(x_test,255.0) - - - -# d = np.int32(n / 2) * 2 # number of random features - -# # convert class vectors to binary class matrices -# y_train = keras.utils.to_categorical(y_train, num_classes) -# y_test = keras.utils.to_categorical(y_test, num_classes) - -num_classes,x_train,x_test,y_train,y_test,n,D,R,d = mnist.load() +num_classes,x_train,x_test,y_train,y_test,n,D,R,d = cifar.load() if args_dict['kernel'] == 'Gaussian': s = sqrt(D) # kernel bandwidth From 31fcce495d942130ec35bdfe948db9125ba8bad6 Mon Sep 17 00:00:00 2001 From: Lukas Pfahler Date: Wed, 6 Dec 2017 15:31:13 +0100 Subject: [PATCH 59/77] new kernels --- kernels.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/kernels.py b/kernels.py index 8fe768d..531a7e1 100644 --- a/kernels.py +++ b/kernels.py @@ -167,14 +167,14 @@ def ArcCosine(X,Y,s,bias=1.0): return (K.dot(XX,YY) * (K.sin(theta) + (pi-theta) * K.cos(theta)))/pi else: - XX = K.sqrt(K.sum(K.square(X), axis = 1, keepdims=True)) + XX = K.sqrt(bias+K.sum(K.square(X), axis = 1, keepdims=True)) if Y is X: YY = XX else: - YY = K.sqrt(K.sum(K.square(Y), axis = 1, keepdims=True)) + YY = K.sqrt(bias+K.sum(K.square(Y), axis = 1, keepdims=True)) XX = K.reshape(XX, (K.shape(X)[0],1)) YY = K.reshape(YY, (1,K.shape(Y)[0])) - XY = ArcCosine(X,Y,s-1) + XY = ArcCosine(X,Y,s-1)+bias XY = tf.div(XY,K.dot(XX,YY)) XY = K.clip(XY,-1,1) theta = tf.acos(XY) From 3b137116c2136bacac89fe45653a51ab0c37009b Mon Sep 17 00:00:00 2001 From: Lukas Pfahler Date: Wed, 6 Dec 2017 15:35:59 +0100 Subject: [PATCH 60/77] new kernels --- run_expr.py | 48 ------------------------------------------------ 1 file changed, 48 deletions(-) diff --git a/run_expr.py b/run_expr.py index a571663..64c086f 100644 --- a/run_expr.py +++ b/run_expr.py @@ -108,20 +108,6 @@ x, index = utils.separate_index(ix) # features, sample_id kfeat = KernelEmbedding(kernel, x_train, input_shape=(D,))(x) - -# y = Dense(num_classes, input_shape=(n,), -# activation='linear', -# kernel_initializer='zeros', -# use_bias=False)(kfeat) - -# model = Model(ix, y) -# model.compile(loss='mse', -# optimizer=PSGD(pred_t=y, index_t=index, eta=eta), -# metrics=['accuracy']) -# trainers['Pegasos'] = Trainer(model=model, -# x_train = utils.add_index(x_train), -# x_test=utils.add_index(x_test)) - # Assemble kernel EigenPro trainer. embed = Model(ix, kfeat) y = Dense(num_classes, input_shape=(n,), @@ -139,40 +125,6 @@ x_train = utils.add_index(x_train), x_test=utils.add_index(x_test)) - -# # Assemble SGD trainer. -# rff_weights = np.float32( # for Gaussian kernel -# np.sqrt(2. / (2 * 5 ** 2)) # s = 5 -# * np.random.randn(D, d>>1)) -# input_shape = (D,) -# x = Input(shape=input_shape, dtype='float32', name='feat') -# rf_f = RFF(rff_weights, input_shape=input_shape) -# y = Dense(num_classes, input_shape=(d,), -# activation='linear', -# kernel_initializer='zeros', -# use_bias=False)(rf_f(x)) -# model = Model(x, y) - -# model.compile(loss='mse', -# optimizer=SGD(eta=eta), -# metrics=['accuracy']) -# trainers['SGD with random Fourier feature'] = Trainer( -# model=model, x_train = x_train, x_test=x_test) - -# # Assemble EigenPro trainer. -# f, scale, _ = utils.asm_eigenpro_f( -# x_train, rf_f, M, k, .25) -# y = Dense(num_classes, input_shape=(d,), -# activation='linear', -# kernel_initializer='zeros', -# use_bias=False)(rf_f(x)) -# model = Model(x, y) -# model.compile(loss='mse', -# optimizer=SGD(eta=scale*eta, eigenpro_f=f), -# metrics=['accuracy']) -# trainers['EigenPro with random Fourier feature'] = Trainer( -# model=model, x_train = x_train, x_test=x_test) - # Start training. for name, trainer in trainers.items(): print("") From 8e50c0beac51d25438c830d3e5e9d1727d161365 Mon Sep 17 00:00:00 2001 From: Lukas Pfahler Date: Wed, 6 Dec 2017 15:42:02 +0100 Subject: [PATCH 61/77] new kernels --- kernels.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernels.py b/kernels.py index 531a7e1..62dbc61 100644 --- a/kernels.py +++ b/kernels.py @@ -147,7 +147,7 @@ def CompositionalFeedForwardKernel(X,Y,s): #XY = K.clip(XY,-1,1) return XY -def ArcCosine(X,Y,s,bias=1.0): +def ArcCosine(X,Y,s,bias=0.0): # Youngmin Cho, Lawrence K. Saul - Kernel Methods for Deep Learning # k(x,y) = \frac 1 \pi ||x||\cdot||y|| \cdot \left( \sin \theta + (\pi - \theta)\cos \theta\right) if s<=1: From 9434d3a676932070b1aeb90ce65112cec00abd24 Mon Sep 17 00:00:00 2001 From: Lukas Pfahler Date: Wed, 6 Dec 2017 17:20:12 +0100 Subject: [PATCH 62/77] new kernels --- imdb.py | 47 +++++++++++++++++++++++++++++++++++++++++++++++ kernels.py | 1 - run_expr.py | 3 ++- 3 files changed, 49 insertions(+), 2 deletions(-) create mode 100644 imdb.py diff --git a/imdb.py b/imdb.py new file mode 100644 index 0000000..b13c2e9 --- /dev/null +++ b/imdb.py @@ -0,0 +1,47 @@ +import keras +import numpy as np +from keras.datasets import imdb +from scipy.sparse import csr_matrix +def load(): + (x_train, y_train), (x_test, y_test) = imdb.load_data(num_words=10000, + skip_top=30, + maxlen=None) + indptr = [0] + indices = [] + data = [] + vocabulary = {} + for d in x_train: + for index in d: + indices.append(index) + data.append(1) + indptr.append(len(indices)) + x_train = csr_matrix((data, indices, indptr), dtype=int).toarray() + + indptr = [0] + indices = [] + data = [] + vocabulary = {} + for d in x_test: + for index in d: + indices.append(index) + data.append(1) + indptr.append(len(indices)) + x_test = csr_matrix((data, indices, indptr), dtype=int).toarray() + print("Load MNIST dataset.") + print(x_train.shape[0], 'train samples') + print(x_test.shape[0], 'test samples') + print(x_train.shape[1],"features") + #x_train = keras.preprocessing.text.one_hot(x_train,10000) + print(x_train[0]) + #x_test = keras.preprocessing.text.one_hot(x_test,10000) + n, D = x_train.shape # (n_sample, n_feature) + d = np.int32(n / 2) * 2 # number of random features + num_classes = 2 + # convert class vectors to binary class matrices + y_train = keras.utils.to_categorical(y_train, num_classes) + y_test = keras.utils.to_categorical(y_test, num_classes) + R = np.max(np.linalg.norm(x_train,2,axis=1))**2 + # x_train/=R + # x_test/=R + # R = 1.0 + return num_classes,x_train,x_test,y_train,y_test,n,D,R,d \ No newline at end of file diff --git a/kernels.py b/kernels.py index 62dbc61..99b705f 100644 --- a/kernels.py +++ b/kernels.py @@ -13,7 +13,6 @@ def D2(X, Y): pointwise distances (n_sample, n_center). """ XX = K.sum(K.square(X), axis = 1, keepdims=True) - print(XX) if X is Y: YY = XX else: diff --git a/run_expr.py b/run_expr.py index 64c086f..d10858f 100644 --- a/run_expr.py +++ b/run_expr.py @@ -23,6 +23,7 @@ import mnist import utils import cifar +import imdb from backend_extra import hasGPU from layers import KernelEmbedding, RFF @@ -59,7 +60,7 @@ M = 4800 # (EigenPro) subsample size k = 160 # (EigenPro) top-k eigensystem -num_classes,x_train,x_test,y_train,y_test,n,D,R,d = cifar.load() +num_classes,x_train,x_test,y_train,y_test,n,D,R,d = imdb.load() if args_dict['kernel'] == 'Gaussian': s = sqrt(D) # kernel bandwidth From 4d3bb55a73df24b2dba6377db1411fa0692591c5 Mon Sep 17 00:00:00 2001 From: Lukas Pfahler Date: Wed, 6 Dec 2017 22:46:33 +0100 Subject: [PATCH 63/77] new kernels --- kernels.py | 3 +++ run_expr.py | 3 ++- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/kernels.py b/kernels.py index 99b705f..232e378 100644 --- a/kernels.py +++ b/kernels.py @@ -23,6 +23,9 @@ def D2(X, Y): - 2 * XY return d2 +def Linear(X,Y): + return K.dot(X,K.transpose(Y)) + def Gaussian(X, Y, s): """ Gaussian kernel. diff --git a/run_expr.py b/run_expr.py index d10858f..32e56b5 100644 --- a/run_expr.py +++ b/run_expr.py @@ -69,7 +69,8 @@ elif args_dict['kernel'] == 'Laplace': s = np.sqrt(10, dtype=np.float32) kernel = lambda x,y: kernels.Laplace(x, y, s) - +elif args_dict['kernel'] == 'Linear': + kernel = lambda x,y: kernels.Linear(x, y) elif args_dict['kernel'] == 'Cauchy': s = np.sqrt(40, dtype=np.float32) kernel = lambda x,y: kernels.Cauchy(x, y, s) From 00c691bae04486d96306faaa1765f05a037a18fe Mon Sep 17 00:00:00 2001 From: Lukas Pfahler Date: Thu, 7 Dec 2017 19:53:37 +0100 Subject: [PATCH 64/77] new kernels --- dnn.py | 103 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++ imdb.py | 10 +++--- 2 files changed, 108 insertions(+), 5 deletions(-) create mode 100644 dnn.py diff --git a/dnn.py b/dnn.py new file mode 100644 index 0000000..63741f4 --- /dev/null +++ b/dnn.py @@ -0,0 +1,103 @@ +'''Train a simple deep CNN on the CIFAR10 small images dataset. +GPU run command with Theano backend (with TensorFlow, the GPU is automatically used): + THEANO_FLAGS=mode=FAST_RUN,device=gpu,floatx=float32 python cifar10_cnn.py +It gets down to 0.65 test logloss in 25 epochs, and down to 0.55 after 50 epochs. +(it's still underfitting at that point, though). +''' + +from __future__ import print_function +import keras +from keras.datasets import cifar10 +from keras.preprocessing.image import ImageDataGenerator +from keras.models import Sequential +from keras.layers import Dense, Dropout, Activation, Flatten +from keras.layers import Conv2D, MaxPooling2D +import numpy.linalg +from numpy.linalg import svd +import mnist +import cifar +import imdb +batch_size = 16 +epochs = 50 +data_augmentation = False + +# The data, shuffled and split between train and test sets: +num_classes,x_train,x_test,y_train,y_test,n,D,R,d = imdb.load() + +model = Sequential() +#model.add(Flatten(input_shape=x_train.shape[1:])) +layer1 = Dense(256,input_shape=(x_train.shape[1],))# +#Conv2D(32, (3, 3),strides=(3,3),padding='same', +# ) +model.add(layer1) +# model.add(Activation('relu')) +# model.add(Dropout(0.5)) +# layer2 = Dense(128) +#Conv2D(32, (3, 3)) +# model.add(layer2) +# model.add(Activation('relu')) +# model.add(Dropout(0.5)) +#model.add(MaxPooling2D(pool_size=(2, 2))) +#model.add(Dropout(0.25)) +# layer3 = Dense(64)#Conv2D(64, (3, 3), padding='same') +# model.add(layer3) +# model.add(Activation('relu')) +#model.add(Dropout(0.5)) +#model.add(Dense(100)) +#model.add(Activation('relu')) +layer5 = Dense(num_classes) +model.add(layer5) +model.add(Activation('softmax')) +model.summary() +# initiate RMSprop optimizer +opt = keras.optimizers.sgd(lr=0.01) + +# Let's train the model using RMSprop +model.compile(loss='categorical_crossentropy', + optimizer=opt, + metrics=['accuracy']) + + +num_classes,x_train,x_test,y_train,y_test,n,D,R,d = imdb.load() +from numpy.random import shuffle + + +model.fit(x_train, y_train, + batch_size=batch_size, + epochs=epochs, + validation_data=(x_test, y_test), + shuffle=True, + verbose=2) + + +# spectrum1 = svd(layer1.get_weights()[0],compute_uv=False) +# spectrum2 = svd(layer2.get_weights()[0],compute_uv=False) +# spectrum3 = svd(layer3.get_weights()[0],compute_uv=False) +# #spectrum4 = svd(numpy.reshape(layer4.get_weights()[0],(64*9,64)),compute_uv=False) +# spectrum5 = svd(layer5.get_weights()[0],compute_uv=False) + +# numpy.set_printoptions(threshold=numpy.nan) +# #print(layer1.get_weights()[0].dot(layer1.get_weights()[0].transpose())) +# W = layer1.get_weights()[0].transpose() +# print(W.shape) +# print(W.dot(W.transpose()).shape) +# W = W/(numpy.linalg.norm(W,axis=1,ord=2).reshape(W.shape[0],1)) +# print(-numpy.sort(-(W.dot(W.transpose())).flatten())[W.shape[0]:W.shape[0]*3:2]) +# W = layer2.get_weights()[0].transpose() +# W = W/(numpy.linalg.norm(W,axis=1,ord=2).reshape(W.shape[0],1)) +# print(-numpy.sort(-(W.dot(W.transpose())).flatten())[W.shape[0]:W.shape[0]*3:2]) +# W = layer3.get_weights()[0].transpose() +# W = W/(numpy.linalg.norm(W,axis=1,ord=2).reshape(W.shape[0],1)) +# print(-numpy.sort(-(W.dot(W.transpose())).flatten())[W.shape[0]:W.shape[0]*3:2]) +# #print(spectrum4) +# W = layer5.get_weights()[0].transpose() +# W = W/(numpy.linalg.norm(W,axis=1,ord=2).reshape(W.shape[0],1)) +# print(-numpy.sort(-(W.dot(W.transpose())).flatten())[W.shape[0]:W.shape[0]*3:2]) + +# print(spectrum1) +# print(spectrum2) +# print(spectrum3) +# print(spectrum5) + +#preds = model.predict_on_batch(x_test) +#print("done",preds[0]) \ No newline at end of file diff --git a/imdb.py b/imdb.py index b13c2e9..c36cf24 100644 --- a/imdb.py +++ b/imdb.py @@ -13,9 +13,9 @@ def load(): for d in x_train: for index in d: indices.append(index) - data.append(1) + data.append(1.0/len(d)) indptr.append(len(indices)) - x_train = csr_matrix((data, indices, indptr), dtype=int).toarray() + x_train = csr_matrix((data, indices, indptr), dtype=float).toarray() indptr = [0] indices = [] @@ -24,10 +24,10 @@ def load(): for d in x_test: for index in d: indices.append(index) - data.append(1) + data.append(1.0/len(d)) indptr.append(len(indices)) - x_test = csr_matrix((data, indices, indptr), dtype=int).toarray() - print("Load MNIST dataset.") + x_test = csr_matrix((data, indices, indptr), dtype=float).toarray() + print("Load IMDB dataset.") print(x_train.shape[0], 'train samples') print(x_test.shape[0], 'test samples') print(x_train.shape[1],"features") From d6f23c9fd5041141ea7edfbe3515393f55347e2f Mon Sep 17 00:00:00 2001 From: Lukas Pfahler Date: Fri, 8 Dec 2017 10:16:20 +0100 Subject: [PATCH 65/77] new kernels --- kernels.py | 3 ++- run_expr.py | 4 ++-- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/kernels.py b/kernels.py index 232e378..d7ee5f1 100644 --- a/kernels.py +++ b/kernels.py @@ -144,7 +144,7 @@ def CompositionalFeedForwardKernel(X,Y,s): pi = np.float32(3.14159265359) d = K.int_shape(X)[1] XY = XY/d - for i in range(1): + for i in range(s): XY = (K.sqrt(1-K.square(XY))+tf.multiply((pi-tf.acos(XY)),XY))/pi #XY = K.clip(XY,-1,1) return XY @@ -152,6 +152,7 @@ def CompositionalFeedForwardKernel(X,Y,s): def ArcCosine(X,Y,s,bias=0.0): # Youngmin Cho, Lawrence K. Saul - Kernel Methods for Deep Learning # k(x,y) = \frac 1 \pi ||x||\cdot||y|| \cdot \left( \sin \theta + (\pi - \theta)\cos \theta\right) + #bias should be as large as R if s<=1: XX = K.sqrt(bias+K.sum(K.square(X), axis = 1, keepdims=True)) if Y is X: diff --git a/run_expr.py b/run_expr.py index 32e56b5..2db394c 100644 --- a/run_expr.py +++ b/run_expr.py @@ -86,12 +86,12 @@ kernel = lambda x,y: kernels.CompositionalFeedForwardKernel(x, y, s) elif args_dict['kernel'] == 'ArcCosine': s = int(args_dict["depth"]) - kernel = lambda x,y: kernels.ArcCosine(x, y, s) + kernel = lambda x,y: kernels.ArcCosine(x, y, s,R) else: raise Exception("Unknown kernel function - %s. \ Try Gaussian, Laplace, or Cauchy" % args_dict['kernel']) - +print("Using ",args_dict["kernel"]) trainers = collections.OrderedDict() Trainer = collections.namedtuple('Trainer', ['model', 'x_train', 'x_test']) From 5cfc2a144467049faf35362536286c3be2115976 Mon Sep 17 00:00:00 2001 From: Lukas Pfahler Date: Fri, 8 Dec 2017 10:18:38 +0100 Subject: [PATCH 66/77] new kernels --- run_expr.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/run_expr.py b/run_expr.py index 2db394c..5a15250 100644 --- a/run_expr.py +++ b/run_expr.py @@ -49,6 +49,8 @@ parser = argparse.ArgumentParser(description='Run EigenPro tests.') parser.add_argument('--kernel', type=str, default='Gaussian', help='kernel function (e.g. Gaussian, Laplace, and Cauchy)') +parser.add_argument('--dataset', type=str, default='mnist', + help='dataset used for evaluation (e.g. mnist, cifar10, imdb (more to follow))') parser.add_argument('--depth', type=int, default=1, help='the depth of the neural network that is mimiced by the kernel (if supported by kernel)') args = parser.parse_args() @@ -60,7 +62,12 @@ M = 4800 # (EigenPro) subsample size k = 160 # (EigenPro) top-k eigensystem -num_classes,x_train,x_test,y_train,y_test,n,D,R,d = imdb.load() +if args_dict['dataset'] == "mnist": + num_classes,x_train,x_test,y_train,y_test,n,D,R,d = mnist.load() +elif args_dict['dataset'] == "cifar10": + num_classes,x_train,x_test,y_train,y_test,n,D,R,d = cifar.load() +elif args_dict['dataset'] == "imdb": + num_classes,x_train,x_test,y_train,y_test,n,D,R,d = imdb.load() if args_dict['kernel'] == 'Gaussian': s = sqrt(D) # kernel bandwidth From 1703139534033956f366c5fecede45601b5c1897 Mon Sep 17 00:00:00 2001 From: Lukas Pfahler Date: Fri, 8 Dec 2017 15:04:07 +0100 Subject: [PATCH 67/77] new kernels --- kernels.py | 52 ++++++++++++++++++++++++++++++++++++---------------- mnist.py | 16 +++++++++++++--- run_expr.py | 14 +++++++++----- utils.py | 11 +++++++++-- 4 files changed, 67 insertions(+), 26 deletions(-) diff --git a/kernels.py b/kernels.py index d7ee5f1..f515380 100644 --- a/kernels.py +++ b/kernels.py @@ -185,19 +185,39 @@ def ArcCosine(X,Y,s,bias=0.0): return (K.dot(XX,YY) * (K.sin(theta) + (pi-theta) * K.cos(theta)))/pi - # xs = [] - # for i in range(output_row): - # for j in range(output_col): - # slice_row = slice(i * stride_row, - # i * stride_row + kernel_size[0]) - # slice_col = slice(j * stride_col, - # j * stride_col + kernel_size[1]) - # if data_format == 'channels_first': - # xs.append(reshape(inputs[:, :, slice_row, slice_col], - # (1, -1, feature_dim))) - # else: - # xs.append(reshape(inputs[:, slice_row, slice_col, :], - # (1, -1, feature_dim))) - - # x_aggregate = concatenate(xs, axis=0) - # output = batch_dot(x_aggregate, kernel) + +def Convolution(X,Y,kernel_size=(3,3)): + xs = [] + ys = [] + print("before reshape",K.int_shape(X)) + x = K.reshape(X,(-1,28,28)) + y = K.reshape(Y,(-1,28,28)) + print("after reshape",K.int_shape(x)) + output_row = K.int_shape(x)[1]-kernel_size[0]+1 + output_col = K.int_shape(x)[2]-kernel_size[1]+1 + feature_dim = kernel_size[0]*kernel_size[1] + data_format = 'channels_last' + for i in range(output_row): + for j in range(output_col): + slice_row = slice(i * 1, + i * 1 + kernel_size[0]) + slice_col = slice(j * 1, + j * 1 + kernel_size[1]) + if data_format == 'channels_first': + xs.append(K.reshape(x[:, slice_row, slice_col], + (1, -1,feature_dim))) + else: + xs.append(K.reshape(x[:, slice_row, slice_col], + (-1,1,feature_dim))) + ys.append(K.reshape(y[:, slice_row, slice_col], + (-1,1,feature_dim))) + + x_aggregate =K.concatenate(xs, axis=1) + y_aggregate =K.concatenate(ys, axis=1) + #1st dimension: example. 2nd dimension: position in output, 3rd dimension: position in patch + print("after patching", K.int_shape(x_aggregate),K.shape(x_aggregate)) + XY = tf.einsum("ijk,ljk->ilj",x_aggregate,y_aggregate)/feature_dim + xy = (K.sqrt(1-K.square(XY))+tf.multiply((pi-tf.acos(XY)),XY))/pi + print("After einsum",K.int_shape(XY)) + hidden = tf.reduce_sum(xy,2)/(output_row*output_col) + return (K.sqrt(1-K.square(hidden))+tf.multiply((pi-tf.acos(hidden)),hidden))/pi diff --git a/mnist.py b/mnist.py index 7d6252c..94719e4 100644 --- a/mnist.py +++ b/mnist.py @@ -11,13 +11,19 @@ def unit_range_normalize(X): SX = (X - min_) / diff_ return SX -def load(): +def prod(x): + s = 1 + for xx in x: + s*=xx + return s + +def load(convolution=False): # input image dimensions img_rows, img_cols = 28, 28 # the data, shuffled and split between train and test sets (x_train, y_train), (x_test, y_test) = mnist_data() - + #if not convolution: x_train = x_train.reshape(x_train.shape[0], img_rows * img_cols) x_test = x_test.reshape(x_test.shape[0], img_rows * img_cols) @@ -29,12 +35,16 @@ def load(): print("Load MNIST dataset.") print(x_train.shape[0], 'train samples') print(x_test.shape[0], 'test samples') + print(x_train[0]) # convert class vectors to binary class matrices num_classes = 10 y_train = keras.utils.to_categorical(y_train, num_classes) y_test = keras.utils.to_categorical(y_test, num_classes) R = np.max(np.linalg.norm(x_train,2,axis=1))**2 - n, D = x_train.shape # (n_sample, n_feature) + n = x_train.shape[0] # (n_sample, n_feature) + D = prod(x_train.shape[1:]) d = np.int32(n / 2) * 2 # number of random features + + print(x_train[0].shape) return num_classes,x_train,x_test,y_train,y_test,n,D,R,d diff --git a/run_expr.py b/run_expr.py index 5a15250..f12457a 100644 --- a/run_expr.py +++ b/run_expr.py @@ -89,21 +89,24 @@ s = int(args_dict["depth"]) kernel = lambda x,y: kernels.InversePolynomial(x, y, s) elif args_dict['kernel'] == 'CompositionalFeedForwardKernel': - s = R + s = int(args_dict["depth"]) kernel = lambda x,y: kernels.CompositionalFeedForwardKernel(x, y, s) elif args_dict['kernel'] == 'ArcCosine': s = int(args_dict["depth"]) - kernel = lambda x,y: kernels.ArcCosine(x, y, s,R) + kernel = lambda x,y: kernels.ArcCosine(x, y, s, R) +elif args_dict['kernel'] == 'Convolution': + s = int(args_dict["depth"]) + kernel = lambda x,y: kernels.Convolution(x, y) else: raise Exception("Unknown kernel function - %s. \ Try Gaussian, Laplace, or Cauchy" % args_dict['kernel']) -print("Using ",args_dict["kernel"]) +print("Using ",args_dict["kernel"], "with Depth" ,args_dict["depth"]) trainers = collections.OrderedDict() Trainer = collections.namedtuple('Trainer', ['model', 'x_train', 'x_test']) - +print(x_train.shape) # Calculate step size and (Primal) EigenPro preconditioner. kf, scale, s0 = utils.asm_eigenpro_f( @@ -113,7 +116,7 @@ # # Assemble Pegasos trainer. input_shape = (D+1,) # n_feature, (sample) index -ix = Input(shape=input_shape, dtype='float32', name='indexed-feat') +ix = Input(shape=(input_shape), dtype='float32', name='indexed-feat') x, index = utils.separate_index(ix) # features, sample_id kfeat = KernelEmbedding(kernel, x_train, input_shape=(D,))(x) @@ -122,6 +125,7 @@ y = Dense(num_classes, input_shape=(n,), activation='linear', kernel_initializer='zeros', + kernel_regularizer=keras.regularizers.l2(0.1), use_bias=False)(kfeat) model = Model(ix, y) model.compile(loss='mse', diff --git a/utils.py b/utils.py index af9170f..eeed22a 100644 --- a/utils.py +++ b/utils.py @@ -19,6 +19,7 @@ def add_index(X): matrix of shape (n_sample, n_feat+1). """ inx = np.reshape(np.arange(X.shape[0]), (-1, 1)) + print("inx",inx.shape) return np.hstack([X, inx]) def separate_index(IX): @@ -54,6 +55,7 @@ def rsvd(X, phi, M, k): sk: (k+1)-th largest eigenvalue of phi(X). """ n, _ = X.shape + print("rsvd",X.shape) index = np.random.choice(n, M, replace=False) A = phi(X[index]) @@ -69,7 +71,11 @@ def rsvd(X, phi, M, k): sk = np.sqrt(n / M) * S1[k] V = VT1[:k].T return s, V, sk - +def prod(x): + s = 1 + for xx in x: + s*=xx + return s def asm_eigenpro_f(feat, phi, M, k, tau, in_rkhs=False): """Assemble eigenpro map and calculate step size scale factor such that the update rule, @@ -91,7 +97,8 @@ def asm_eigenpro_f(feat, phi, M, k, tau, in_rkhs=False): """ start = time.time() - n, D = feat.shape + n = feat.shape[0] + D = prod(feat.shape[1:]) x = Input(shape=(D,), dtype='float32', name='feat') if in_rkhs: if n >= 10**5: From d8cf26a75a31edfbca3ebd491ba2db3fe50dcce8 Mon Sep 17 00:00:00 2001 From: Lukas Pfahler Date: Fri, 8 Dec 2017 15:04:54 +0100 Subject: [PATCH 68/77] new kernels --- kernels.py | 1 + 1 file changed, 1 insertion(+) diff --git a/kernels.py b/kernels.py index f515380..a847bdf 100644 --- a/kernels.py +++ b/kernels.py @@ -189,6 +189,7 @@ def ArcCosine(X,Y,s,bias=0.0): def Convolution(X,Y,kernel_size=(3,3)): xs = [] ys = [] + pi = np.float32(3.14159265359) print("before reshape",K.int_shape(X)) x = K.reshape(X,(-1,28,28)) y = K.reshape(Y,(-1,28,28)) From 3fc9608cb7d146fbff973b3ee58bee57615d2df6 Mon Sep 17 00:00:00 2001 From: Lukas Pfahler Date: Fri, 8 Dec 2017 15:08:31 +0100 Subject: [PATCH 69/77] new kernels --- kernels.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/kernels.py b/kernels.py index a847bdf..460dbfa 100644 --- a/kernels.py +++ b/kernels.py @@ -198,12 +198,13 @@ def Convolution(X,Y,kernel_size=(3,3)): output_col = K.int_shape(x)[2]-kernel_size[1]+1 feature_dim = kernel_size[0]*kernel_size[1] data_format = 'channels_last' + stride = 2 for i in range(output_row): for j in range(output_col): - slice_row = slice(i * 1, - i * 1 + kernel_size[0]) - slice_col = slice(j * 1, - j * 1 + kernel_size[1]) + slice_row = slice(i * stride, + i * stride + kernel_size[0]) + slice_col = slice(j * stride, + j * stride + kernel_size[1]) if data_format == 'channels_first': xs.append(K.reshape(x[:, slice_row, slice_col], (1, -1,feature_dim))) From eb206b471b80709fd5d844624939b9c1c46462c9 Mon Sep 17 00:00:00 2001 From: Lukas Pfahler Date: Fri, 8 Dec 2017 15:12:29 +0100 Subject: [PATCH 70/77] new kernels --- kernels.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/kernels.py b/kernels.py index 460dbfa..a64329f 100644 --- a/kernels.py +++ b/kernels.py @@ -186,7 +186,7 @@ def ArcCosine(X,Y,s,bias=0.0): return (K.dot(XX,YY) * (K.sin(theta) + (pi-theta) * K.cos(theta)))/pi -def Convolution(X,Y,kernel_size=(3,3)): +def Convolution(X,Y,kernel_size=(4,4)): xs = [] ys = [] pi = np.float32(3.14159265359) @@ -194,8 +194,8 @@ def Convolution(X,Y,kernel_size=(3,3)): x = K.reshape(X,(-1,28,28)) y = K.reshape(Y,(-1,28,28)) print("after reshape",K.int_shape(x)) - output_row = K.int_shape(x)[1]-kernel_size[0]+1 - output_col = K.int_shape(x)[2]-kernel_size[1]+1 + output_row = (K.int_shape(x)[1]-kernel_size[0])/2+1 + output_col = (K.int_shape(x)[2]-kernel_size[1])/2+1 feature_dim = kernel_size[0]*kernel_size[1] data_format = 'channels_last' stride = 2 From 71d2b0a5223b6c38f4e2308b998b7dcb165a3b87 Mon Sep 17 00:00:00 2001 From: Lukas Pfahler Date: Fri, 8 Dec 2017 15:12:55 +0100 Subject: [PATCH 71/77] new kernels --- kernels.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/kernels.py b/kernels.py index a64329f..80eb79d 100644 --- a/kernels.py +++ b/kernels.py @@ -194,8 +194,8 @@ def Convolution(X,Y,kernel_size=(4,4)): x = K.reshape(X,(-1,28,28)) y = K.reshape(Y,(-1,28,28)) print("after reshape",K.int_shape(x)) - output_row = (K.int_shape(x)[1]-kernel_size[0])/2+1 - output_col = (K.int_shape(x)[2]-kernel_size[1])/2+1 + output_row = (K.int_shape(x)[1]-kernel_size[0])>>1+1 + output_col = (K.int_shape(x)[2]-kernel_size[1])>>1+1 feature_dim = kernel_size[0]*kernel_size[1] data_format = 'channels_last' stride = 2 From 549413924a331749f7de756b48ef16c10dd867c0 Mon Sep 17 00:00:00 2001 From: Lukas Pfahler Date: Fri, 8 Dec 2017 15:16:41 +0100 Subject: [PATCH 72/77] new kernels --- utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/utils.py b/utils.py index eeed22a..c4c7b80 100644 --- a/utils.py +++ b/utils.py @@ -101,7 +101,7 @@ def asm_eigenpro_f(feat, phi, M, k, tau, in_rkhs=False): D = prod(feat.shape[1:]) x = Input(shape=(D,), dtype='float32', name='feat') if in_rkhs: - if n >= 10**5: + if n >= 10**4: _s, _V = nystrom_kernel_svd(feat, phi, M, k) # phi is k(x, y) else: kfeat = KernelEmbedding(phi, feat, From 8be78df00efe47c58c9cd2b325632ade9678260d Mon Sep 17 00:00:00 2001 From: Lukas Pfahler Date: Fri, 8 Dec 2017 15:24:30 +0100 Subject: [PATCH 73/77] new kernels --- run_expr.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/run_expr.py b/run_expr.py index f12457a..c33b4c3 100644 --- a/run_expr.py +++ b/run_expr.py @@ -125,7 +125,7 @@ y = Dense(num_classes, input_shape=(n,), activation='linear', kernel_initializer='zeros', - kernel_regularizer=keras.regularizers.l2(0.1), + #kernel_regularizer=keras.regularizers.l2(0.1), use_bias=False)(kfeat) model = Model(ix, y) model.compile(loss='mse', From 68d19bbdfd5acb0521554956bb942f4e641406ae Mon Sep 17 00:00:00 2001 From: Lukas Pfahler Date: Fri, 8 Dec 2017 15:33:34 +0100 Subject: [PATCH 74/77] new kernels --- kernels.py | 1 + run_expr.py | 5 +++++ 2 files changed, 6 insertions(+) diff --git a/kernels.py b/kernels.py index 80eb79d..ee08115 100644 --- a/kernels.py +++ b/kernels.py @@ -219,6 +219,7 @@ def Convolution(X,Y,kernel_size=(4,4)): #1st dimension: example. 2nd dimension: position in output, 3rd dimension: position in patch print("after patching", K.int_shape(x_aggregate),K.shape(x_aggregate)) XY = tf.einsum("ijk,ljk->ilj",x_aggregate,y_aggregate)/feature_dim + #normalization is missing...crap this is never going to work. why isn't acos crashing? xy = (K.sqrt(1-K.square(XY))+tf.multiply((pi-tf.acos(XY)),XY))/pi print("After einsum",K.int_shape(XY)) hidden = tf.reduce_sum(xy,2)/(output_row*output_col) diff --git a/run_expr.py b/run_expr.py index c33b4c3..b12e070 100644 --- a/run_expr.py +++ b/run_expr.py @@ -64,11 +64,16 @@ if args_dict['dataset'] == "mnist": num_classes,x_train,x_test,y_train,y_test,n,D,R,d = mnist.load() + x_train/=R + x_test/=R + R = 1 elif args_dict['dataset'] == "cifar10": num_classes,x_train,x_test,y_train,y_test,n,D,R,d = cifar.load() elif args_dict['dataset'] == "imdb": num_classes,x_train,x_test,y_train,y_test,n,D,R,d = imdb.load() + + if args_dict['kernel'] == 'Gaussian': s = sqrt(D) # kernel bandwidth kernel = lambda x,y: kernels.Gaussian(x, y, s) From fee6becd0e3a7761c259a9b387911900acb0f2b4 Mon Sep 17 00:00:00 2001 From: Lukas Pfahler Date: Fri, 8 Dec 2017 15:54:40 +0100 Subject: [PATCH 75/77] new kernels --- kernels.py | 17 +++++++++++++++-- run_expr.py | 3 --- 2 files changed, 15 insertions(+), 5 deletions(-) diff --git a/kernels.py b/kernels.py index ee08115..2078c42 100644 --- a/kernels.py +++ b/kernels.py @@ -187,12 +187,24 @@ def ArcCosine(X,Y,s,bias=0.0): def Convolution(X,Y,kernel_size=(4,4)): + XX = K.sqrt(K.sum(K.square(X), axis = 1, keepdims=False)) + if Y is X: + YY = XX + else: + YY = K.sqrt(K.sum(K.square(Y), axis = 1, keepdims=False)) + + # XX = K.reshape(XX, (K.shape(X)[0])) + # YY = K.reshape(YY, (K.shape(Y)[0], 1)) + print(K.int_shape(X),K.int_shape(XX)) + Xnorm = K.dot(tf.diag(1.0/XX),X) + Ynorm = K.dot(tf.diag(1.0/YY),Y) + xs = [] ys = [] pi = np.float32(3.14159265359) print("before reshape",K.int_shape(X)) - x = K.reshape(X,(-1,28,28)) - y = K.reshape(Y,(-1,28,28)) + x = K.reshape(Xnorm,(-1,28,28)) + y = K.reshape(Ynorm,(-1,28,28)) print("after reshape",K.int_shape(x)) output_row = (K.int_shape(x)[1]-kernel_size[0])>>1+1 output_col = (K.int_shape(x)[2]-kernel_size[1])>>1+1 @@ -220,6 +232,7 @@ def Convolution(X,Y,kernel_size=(4,4)): print("after patching", K.int_shape(x_aggregate),K.shape(x_aggregate)) XY = tf.einsum("ijk,ljk->ilj",x_aggregate,y_aggregate)/feature_dim #normalization is missing...crap this is never going to work. why isn't acos crashing? + #I should be able to figure this out...just some angles...hate thinking in tensors... xy = (K.sqrt(1-K.square(XY))+tf.multiply((pi-tf.acos(XY)),XY))/pi print("After einsum",K.int_shape(XY)) hidden = tf.reduce_sum(xy,2)/(output_row*output_col) diff --git a/run_expr.py b/run_expr.py index b12e070..4acf7c3 100644 --- a/run_expr.py +++ b/run_expr.py @@ -64,9 +64,6 @@ if args_dict['dataset'] == "mnist": num_classes,x_train,x_test,y_train,y_test,n,D,R,d = mnist.load() - x_train/=R - x_test/=R - R = 1 elif args_dict['dataset'] == "cifar10": num_classes,x_train,x_test,y_train,y_test,n,D,R,d = cifar.load() elif args_dict['dataset'] == "imdb": From 9b401f3a3dc1234a0bd5d0645d759849e025d03d Mon Sep 17 00:00:00 2001 From: Lukas Pfahler Date: Sat, 9 Dec 2017 15:35:41 +0100 Subject: [PATCH 76/77] new kernels --- kernels.py | 26 +++++++++++++------------- mnist.py | 3 ++- 2 files changed, 15 insertions(+), 14 deletions(-) diff --git a/kernels.py b/kernels.py index 2078c42..61662dd 100644 --- a/kernels.py +++ b/kernels.py @@ -187,24 +187,24 @@ def ArcCosine(X,Y,s,bias=0.0): def Convolution(X,Y,kernel_size=(4,4)): - XX = K.sqrt(K.sum(K.square(X), axis = 1, keepdims=False)) - if Y is X: - YY = XX - else: - YY = K.sqrt(K.sum(K.square(Y), axis = 1, keepdims=False)) - - # XX = K.reshape(XX, (K.shape(X)[0])) - # YY = K.reshape(YY, (K.shape(Y)[0], 1)) - print(K.int_shape(X),K.int_shape(XX)) - Xnorm = K.dot(tf.diag(1.0/XX),X) - Ynorm = K.dot(tf.diag(1.0/YY),Y) + # XX = K.sqrt(K.sum(K.square(X), axis = 1, keepdims=False)) + # if Y is X: + # YY = XX + # else: + # YY = K.sqrt(K.sum(K.square(Y), axis = 1, keepdims=False)) + + # # XX = K.reshape(XX, (K.shape(X)[0])) + # # YY = K.reshape(YY, (K.shape(Y)[0], 1)) + # print(K.int_shape(X),K.int_shape(XX)) + # Xnorm = K.dot(tf.diag(1.0/XX),X) + # Ynorm = K.dot(tf.diag(1.0/YY),Y) xs = [] ys = [] pi = np.float32(3.14159265359) print("before reshape",K.int_shape(X)) - x = K.reshape(Xnorm,(-1,28,28)) - y = K.reshape(Ynorm,(-1,28,28)) + x = K.reshape(X,(-1,28,28)) + y = K.reshape(Y,(-1,28,28)) print("after reshape",K.int_shape(x)) output_row = (K.int_shape(x)[1]-kernel_size[0])>>1+1 output_col = (K.int_shape(x)[2]-kernel_size[1])>>1+1 diff --git a/mnist.py b/mnist.py index 94719e4..3e9114d 100644 --- a/mnist.py +++ b/mnist.py @@ -45,6 +45,7 @@ def load(convolution=False): n = x_train.shape[0] # (n_sample, n_feature) D = prod(x_train.shape[1:]) d = np.int32(n / 2) * 2 # number of random features - + x_train = keras.utils.normalize(x_train) + x_test = keras.utils.normalize(x_test) print(x_train[0].shape) return num_classes,x_train,x_test,y_train,y_test,n,D,R,d From 45aa60f5b93cefb007b4f3d1f06e07d81edf367f Mon Sep 17 00:00:00 2001 From: Lukas Pfahler Date: Sat, 9 Dec 2017 16:16:07 +0100 Subject: [PATCH 77/77] new kernels --- mnist.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/mnist.py b/mnist.py index 3e9114d..fc776b0 100644 --- a/mnist.py +++ b/mnist.py @@ -45,7 +45,7 @@ def load(convolution=False): n = x_train.shape[0] # (n_sample, n_feature) D = prod(x_train.shape[1:]) d = np.int32(n / 2) * 2 # number of random features - x_train = keras.utils.normalize(x_train) - x_test = keras.utils.normalize(x_test) + x_train = keras.utils.normalize(x_train,axis=1) + x_test = keras.utils.normalize(x_test,axis=1) print(x_train[0].shape) return num_classes,x_train,x_test,y_train,y_test,n,D,R,d