From b618a3cfe86d3c214b8ba446c9d8b780201fe387 Mon Sep 17 00:00:00 2001 From: karllab41 Date: Fri, 26 Aug 2016 15:49:39 -0700 Subject: [PATCH 1/2] Current commit to new branch jvspace --- .../imgtxt_algorithms/jvspace/JVSoptim.ipynb | 553 ++++++++++++++++++ .../jvspace/load_entire_dataset.py | 78 +++ attalos/imgtxt_algorithms/jvspace/models.py | 247 ++++++++ attalos/imgtxt_algorithms/jvspace/setops.py | 54 ++ 4 files changed, 932 insertions(+) create mode 100644 attalos/imgtxt_algorithms/jvspace/JVSoptim.ipynb create mode 100644 attalos/imgtxt_algorithms/jvspace/load_entire_dataset.py create mode 100644 attalos/imgtxt_algorithms/jvspace/models.py create mode 100644 attalos/imgtxt_algorithms/jvspace/setops.py diff --git a/attalos/imgtxt_algorithms/jvspace/JVSoptim.ipynb b/attalos/imgtxt_algorithms/jvspace/JVSoptim.ipynb new file mode 100644 index 0000000..622efdc --- /dev/null +++ b/attalos/imgtxt_algorithms/jvspace/JVSoptim.ipynb @@ -0,0 +1,553 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "# Core imports\n", + "import numpy as np\n", + "import scipy.sparse as sprs\n", + "import sys\n", + "import shutil\n", + "import os\n", + "import tarfile\n", + "import argparse\n", + "import gzip\n", + "from scipy.special import expit as sigmoid\n", + "\n", + "# Evaluation imports\n", + "from oct2py import octave\n", + "octave.addpath('../../evaluation/')\n", + "\n", + "# Python 2 - 3 imports\n", + "import six\n", + "\n", + "# Tensorflow imports\n", + "import tensorflow as tf\n", + "from tensorflow.contrib import learn as learn\n", + "from tensorflow.contrib import layers \n", + "import tflearn\n", + "\n", + "# Attalos specific imports\n", + "sys.path.append('/home/kni/local-kni/attalos')\n", + "from setops import replaceword, union, difference, intersection\n", + "from load_entire_dataset import load_entire_dataset, load_entire_dataset_di\n", + "import attalos.imgtxt_algorithms.util.readw2v as readw2v\n", + "from attalos.imgtxt_algorithms.util.readw2v import initVo, readvocab\n", + "from attalos.evaluation.evaluation import Evaluation\n", + "from models import imageWmodel, updateVoX, updateVoSum, get_batch, get_batch_image\n", + "from models import adaptWords, costYViVo\n", + "\n", + "# Display\n", + "from IPython.display import clear_output\n", + "import matplotlib.pylab as plt\n", + "%matplotlib inline " + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Parameter Definitions\n", + "1. `epochs` = number of epochs\n", + "2. `bsize` = batch size\n", + "3. `updateVo` = None/updateVoSum/updateVoX <- types of update\n", + "4. `nsampims` = None/negativeims <-- sample from independent words / words from one image\n", + "5. `initWVVo` = True/False <-- initialize Vo with word vectors or randomly\n", + "6. `learnrate` = learning rate\n", + "7. `datadir` = path to data directory. Need to replace: ``" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "epochs = 5000\n", + "bsize = 1024\n", + "updateVo = 'updateVoX' # 'updateVoSum' # None/'updateVoSum/updateVoX'\n", + "nsampims = None #'negativeims'\n", + "initWVVo = True\n", + "learnrate=0.1\n", + "minlearnrate = 1.0e-6\n", + "wweight = 0.3\n", + "hidden_units=[2048, 1024,200]\n", + "datadir='/data/fs4/teams/attalos/features/'" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Load in the *image* dataset\n", + "\n", + "#### Load datasets \n", + "The function `load_entire_dataset` will load an entire dataset in. Here,\n", + "- `dataset` can be `yfcc`, `iaprtc12`, or `espgame` \n", + "- `split` can be `train` or `test`\n", + "\n", + "#### The data that has been loaded in\n", + "- `x**`, image features\n", + " - `xTr`, training features\n", + " - `xTe`, testing features\n", + " - `xVa`, validation features\n", + "- `y**`, labels\n", + "- `d**`, dictionary of word labels\n", + "- `****list`, list of images originally being used\n", + "- `trHot` is used as a one hot encoding object so that the validation has the same hot encoding\n", + "\n", + "#### You need to fill in `datadir`" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "xTr, yTr, dTr, trainlist, dirTr, trHot = load_entire_dataset_di('iaprtc12', datadir=datadir, split='train')\n", + "xVa, yVa, dVa, validlist, dirVa, _ = load_entire_dataset_di('iaprtc12', datadir=datadir, split='test', allhot=trHot)\n", + "xTe, yTe, dTe, _, testlist, dirTe = load_entire_dataset_di('espgame', datadir=datadir, split='test')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Read vocabulary form word2vec file" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "w2vfile = readw2v.ReadW2V('/local_data/kni/data/vectors-phrase.bin')\n", + "wordvecs = w2vfile.readlines(100000)\n", + "# Require rescale of word vectors to avoid NaNs\n", + "for word in wordvecs.keys():\n", + " wordvecs[word] *= 0.1\n", + "Wd, Id = readvocab('/local_data/kni/data/vectors-phrase.vocab')" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "collapsed": true + }, + "source": [ + "## Joint Vocabulary and Custom Set Operations\n", + "\n", + "Recall that the dictionary between training and testing is different. The set operations that I have implemented below \n", + "- `VoU` is the union of word vectors in both training and test set\n", + "- `VoD` is the set difference between test set and training set\n", + "\n", + "Additionally, you will notice that I have only used the first one hundred thousand words above. Apparently, there are some words the image corpus labels that aren't actually in the word vectors, which can be remedied by replacing your dictionary with equivalent words. The below are words that are replaced with those found in word2vec." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "# Replace words in the dictionary that aren't in word2vec space\n", + "replaceword(dTr, 'bedcover', 'bedding')\n", + "replaceword(dTr, 'tussock', 'turf')\n", + "replaceword(dTr, 'tee-shirt', 'shirts')\n", + "replaceword(dTr, 'table-cloth', 'tablecloth')\n", + "replaceword(dTr, 'cobblestone', 'stones')\n", + "\n", + "# Cannot use Python set operations as we require indices and sorting\n", + "dUnion, iUtr, iUte = union(dTr, dTe)\n", + "dDiff, iDte = difference(dTe, dTr)\n", + "dXsect, iXtr, iXte = intersection( dTr, dTe )\n", + "\n", + "VoU = initVo(wordvecs, dUnion)\n", + "VoD = initVo(wordvecs, dDiff)\n", + "\n", + "print '----------------------------------'\n", + "print 'Union:{}, Xsect:{}, dTe-dTr:{}'.format(len(dUnion),len(dXsect),len(dDiff))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Initialization" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Tensorflow Instantiation" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "config = tf.ConfigProto()\n", + "config.gpu_options.allow_growth = True\n", + "# sess = tf.InteractiveSession(config=config)\n", + "graph = tf.Graph().as_default()\n", + "sess = tf.Session(config=config)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "# Initialize Vo to word vectors or randomly\n", + "Vo = initVo(wordvecs, dTr)\n", + " \n", + "# Use sum model or cross-entropy model\n", + "inputs,pvecs,nvecs,wvecs,wcorr,preds,imloss,wdloss,loss,opt,lrate = imageWmodel(hidden_units=hidden_units, \n", + " vec_size=Vo.shape[1])\n", + "\n", + "# Tensorflow Initialization\n", + "init_op = tf.initialize_all_variables()\n", + "saver = tf.train.Saver()\n", + "sess.run(init_op)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Neural Networks" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Update $V_o$\n", + "\n", + "Both functions take in:\n", + "- The input batch vectors ($v_{in}$: `vin`)\n", + "- The positive vectors ($V_p$: `pVecs`)\n", + "- The negative vectors ($V_n$: `nVecs`)\n", + "- The indices in $V_o$ of the positive vectors ($V_p[i]$: `vpindex`)\n", + "- The indices in $V_o$ of the negative vectors ($V_n[i]$: `vnindex`)\n", + "- The output vectors $V_o$, `Vo`." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Begin Training" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "lossvals = []\n", + "losstrain = []\n", + "lossvalid = []\n", + "\n", + "# Consistent validation batches\n", + "valididx = np.random.choice(range(len(yVa)), size=1024, replace=False, p=None)\n", + "\n", + "\n", + "Cmat = sigmoid(Vo.dot(VoD.T))\n", + "for epoch in range(epochs):\n", + " \n", + " randidx = np.random.permutation(len(yTr))\n", + " yTrr = yTr[randidx]\n", + " xTrr = xTr[randidx]\n", + " \n", + " for b in range(0,len(yTr),bsize):\n", + " \n", + " # Get a batch\n", + " xBatch = xTrr[b:b+bsize]\n", + " pVecs, nVecs, vpindex, vnindex = get_batch( yTrr[b:b+bsize], Vo, [5,5] )\n", + " \n", + " # Run the image updates\n", + " _, lossval, vin = sess.run([opt,loss,preds], \n", + " feed_dict={inputs:xBatch, pvecs: pVecs, \n", + " nvecs: nVecs, lrate:learnrate,\n", + " wvecs: VoD, wcorr: Cmat[vpindex].transpose(1,0,2)})\n", + " \n", + " # Run the word updates\n", + " lossvals += [lossval]\n", + " if updateVo == 'updateVoX':\n", + " Vo = updateVoX(vin, pVecs, nVecs, vpindex, vnindex, Vo, learnrate=learnrate)\n", + " VoU = np.array(list(Vo)+list(VoD))\n", + " Cmat = sigmoid(VoD.dot(VoU.T))\n", + " VoD = adaptWords( VoD, np.array( list(Vo)+list(VoD) ), Cmat, wordlr=learnrate )\n", + " Cmat = sigmoid(Vo.dot(VoD.T))\n", + " \n", + " # Printout status\n", + " sys.stdout.write(\"\\rEpoch {}/{}: loss={}\".format(epoch, epochs, lossval))\n", + " \n", + " # Validation\n", + " pVecs, nVecs, vpindex, vnindex = get_batch( yVa[valididx], Vo, [5,5] )\n", + " vapred, valossval = sess.run([preds,imloss], feed_dict={inputs:xVa[valididx], pvecs: pVecs, nvecs: nVecs})\n", + " yHat = sigmoid(vapred.dot(Vo.T))\n", + " precision,recall,f1score = Evaluation( yVa[valididx], yHat, 5).evaluate()\n", + " outstring = '\\rEpoch: {}, LR: {}, Train/Val: {}/{}, P: {}, R: {}\\n'.format(epoch,learnrate,lossval,\n", + " lossval,precision,recall)\n", + " sys.stdout.write(outstring)\n", + " \n", + " # Keep track of training and validation loss\n", + " losstrain += [lossval]\n", + " lossvalid += [valossval]\n", + " \n", + " # Learning rate updating\n", + " if len(losstrain) and len(losstrain) % 150 == 0:\n", + " learnrate*=0.9\n", + " if learnrate < minlearnrate:\n", + " learnrate=minlearnrate\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Final Word Vector Tuning\n", + "\n", + "### Given the image vectors, tune the word vectors to match based on original word vector correlation\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Either run nonlinear or linear (below).\n", + "\n", + "#### Nonlinear optimization\n", + "\n", + "Nonlinear optimization has a cost function of:\n", + "\n", + "$$\\mathcal{L} = \\frac{1}{N} C_{i,o} \\log \\sigma ( V_i^T V_o ) + (1 - C_{i,o}) \\log \\sigma (V_i^T V_o )$$\n", + "\n", + "Here, $C_{i,o}$ is the *original* correlation between word *i* and word *o*. We may wish to adapt the nonlinearity that Kyle uses as the final layer for the sum of word vectors. \n", + "\n", + "Let $V_{oD}$ be the output vectors of the set difference between training and testing. Since we're in numpy, the updates to maximize are:\n", + "\n", + "$$\\frac{\\partial \\mathcal{L}^+}{ \\partial V_{oD} } = C \\left( 1 - \\sigma\\left( V_{oD} V_{oU} \\right) \\right) \\cdot V_{oU}$$ \n", + "\n", + "$$\\frac{\\partial \\mathcal{L}^-}{ \\partial V_{oD} } = (C - 1) \\left( 1 - \\sigma\\left( 1 - V_{oD} V_{oU} \\right) \\right) \\cdot V_{oU}$$\n", + "\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Linear optimization\n", + "\n", + "Linear matrix optimization is done here:\n", + "\n", + "Let $V_{oD}$ be the set difference output vectors between training set and testing set. That is, the vectors that have *not* been updated in the image optimization. We know that the semantic concepts are correlated with the original correlation matrix $C_m$, and at minimum, an unseen word $v_i$ is correlated with an optimized word from the image training corpus $v_o$ with linear correlation $c_{i,o}$, the $(i, j)^{th}$ entry in $C_m$. \n", + "\n", + "Similarly, in the absence of any image data, we know that the vectors are also correlated with each other with that same correlation. If we only optimize the unseen word vectors, then with $V_{oU}$ being the union of the *original* vectors of image labels (before image optimization) and words not in the image label set, then:\n", + "\n", + "$$C_m = V_{oD} \\cdot V_{oU}$$\n", + "\n", + "With $\\hat{V}_{oU}$ being the union of the *updated* vectors of image labels (after image optimization) and words not in the image label set, the solution to the inverse problem is then:\n", + "\n", + "$$\\hat{V}_{oD} = C_m \\hat{V}_{oU} \\left( \\hat{V}_{oU}^T \\hat{V}_{oU} \\right)^{-1} $$\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "VoD = initVo(wordvecs, dDiff)\n", + "VoU = initVo(wordvecs, dUnion)\n", + "\n", + "# word parameter optimization\n", + "wordlr = 1.0e-4\n", + "nonlinear = False\n", + "\n", + "if nonlinear:\n", + " Cmat = sigmoid( VoD.dot(VoU.T) )\n", + " VoUnew = np.array( list(Vo)+list(VoD) )\n", + " # New full vectors. Assumes that Vo is optimized through the image space\n", + " for epoch in range(epochs):\n", + " VoD = adaptWords( VoD, VoUnew, Cmat, wordlr=wordlr)\n", + " sys.stdout.write('\\r{}, Word Adaptation Cost = {}'.format(epoch, costYViVo(Cmat, VoD, VoUnew)))\n", + " VoDnew = VoD\n", + "else:\n", + " # Linear optimization\n", + " Cmat = 0.3*VoD.dot(VoU.T)\n", + " VoUnew = np.array( list(Vo)+list(VoD) )\n", + " VoDnew = Cmat.dot(VoUnew).dot(np.linalg.inv(VoUnew.T.dot(VoUnew)))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Assign new $V_{oD}^{new}$ to optimized value for set difference $V_{oD}$.\n", + "\n", + "$V_{o Te}= V_{o Tr} ( D[ Tr\\cap Te ] ) \\cup V_{o Te} ( D[Te]-D[Tr] )$" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "newVo = np.zeros((len(dTe),200))\n", + "newVo[iXte] = Vo[iXtr]\n", + "newVo[iDte] = VoD\n", + "newVo[iDte] = wweight*VoDnew" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Evaluation" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "def getsplit(splitname):\n", + " # Returns data, labels, output vectors, image list, directory of images, and dictionary\n", + " if splitname=='train':\n", + " return xTr, yTr, Vo, trainlist, dirTr, dTr\n", + " elif splitname=='valid':\n", + " return xVa, yVa, Vo, validlist, dirVal, dTr\n", + " else:\n", + " return xTe, yTe, newVo, testlist, dirTe, dTe" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "splitname='test'\n", + "xEv, yEv, VE, evallist, dirEv, dEv = getsplit(splitname)\n", + "\n", + "plt.plot(np.array(losstrain))\n", + "plt.plot(np.array(lossvalid),'r')\n", + "plt.xlabel('Epoch Number')\n", + "plt.ylabel('Loss Value')\n", + "plt.legend(['Training Loss', 'Validation Loss'])\n", + "plt.title('Epoch = {}'.format(epoch))\n", + "prediction = sess.run(preds, feed_dict={inputs:xEv})\n", + "yHat = sigmoid(prediction.dot(VE.T))\n", + "\n", + "evaluated = Evaluation(yEv, yHat, k=5)\n", + "evaluated.evaluate()\n", + "\n", + "from oct2py import octave\n", + "octave.addpath('../../evaluation/')\n", + "[precision, recall, f1score] = octave.evaluate(yEv.T, yHat.T, 5)\n", + "print \"P: {},R: {},F1: {}\".format(precision,recall,f1score)\n", + "\n", + "print 2*(precision*recall) / (precision+recall)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Save variables" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "modelname='save-name.model'\n", + "\n", + "save_path = saver.save(sess, modelname)\n", + "\n", + "print \"Saved model to {}\".format(save_path)\n", + "\n", + "modelinfo = modelname+'.info.npz'\n", + "np.savez( modelinfo, lossvals=lossvals, epoch=epoch, Vo=Vo)\n", + "\n", + "print \"Saved model information to {}\".format(modelinfo)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "sess.close()" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 2", + "language": "python", + "name": "python2" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 2 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython2", + "version": "2.7.12" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} diff --git a/attalos/imgtxt_algorithms/jvspace/load_entire_dataset.py b/attalos/imgtxt_algorithms/jvspace/load_entire_dataset.py new file mode 100644 index 0000000..6332c88 --- /dev/null +++ b/attalos/imgtxt_algorithms/jvspace/load_entire_dataset.py @@ -0,0 +1,78 @@ +import numpy as np +from attalos.dataset.dataset import Dataset +from attalos.dataset.transformers.onehot import OneHot + + +def load_entire_dataset(dataset, split='test'): + + if dataset=='yfcc': + image_feature_file = '/local_data/yonas/yfcc_vgg/yfcc100m_dataset-0.modified3.valid_only.hdf5' + text_feature_file = '/local_data/yonas/yfcc_metadata/yfcc_text.json.gz' + # all_train_dataset = Dataset(image_feature_file, text_feature_file) + imdir = '/data/fs4/datasets/iaprtc-12/images/images/' + elif dataset=='iaprtc12': + imdata = np.load('/data/fs4/datasets/iaprtc-12/iaprtc12-inria.npz') + imdir = '/data/fs4/datasets/iaprtc-12/images/images/' + if split=='train': + x = imdata['xTr'] + y = imdata['yTr'] + else: + x = imdata['xTe'] + y = imdata['yTe'] + imD = imdata['D'] + trainlist = imdata['trainlist'] + testlist = imdata['testlist'] + imdata.files + elif dataset=='espgame': + imdata = np.load('/data/fs4/datasets/espgame/espgame-inria.npz') + imdir = '/data/fs4/datasets/espgame/ESP-ImageSet/images/' + if split=='train': + x = imdata['xTr'] + y = imdata['yTr'] + else: + x = imdata['xTe'] + y = imdata['yTe'] + imD = imdata['D'] + trainlist = imdata['trainlist'] + testlist = imdata['testlist'] + imdata.files + return x, y, imD, trainlist, testlist, imdir + + +def load_entire_dataset_di(dataset, datadir='/data/fs4/teams/attalos/features/', split='test', allhot=None): + + if dataset == 'iaprtc12': + if split=='train': + imdata=datadir+'image/iaprtc_train_20160816_inception.hdf5' + txdata=datadir+'text/iaprtc_train_20160816_text.json.gz' + else: + imdata=datadir+'image/iaprtc_test_20160816_inception.hdf5' + txdata=datadir+'text/iaprtc_test_20160816_text.json.gz' + imdir = '/data/fs4/datasets/iaprtc-12/images/images/' + if dataset == 'espgame': + if split=='train': + imdata=datadir+'image/espgame_train_20160816_inception.hdf5' + txdata=datadir+'text/espgame_train_20160816_text.json.gz' + else: + imdata=datadir+'image/espgame_test_20160823_inception.hdf5' + txdata=datadir+'text/espgame_test_20160823_text.json.gz' + imdir = '/data/fs4/datasets/espgame/ESP-ImageSet/images/' + + # Training data + data = Dataset(imdata, txdata) + + # Training image features + x = np.array( data.image_feats ) + + # Image lists (in the order of the images in the dataset) + imlist = [imid for imid in data.image_ids] + + # One hot encoding + if not allhot: + allhot = OneHot([data]) + y = np.array([ allhot.get_multiple( data.text_feats[imid] ) for imid in data.image_ids ]) + + # Dictionaries of the one hot encoding + dTr = [imid for imid in allhot.data_mapping] + + return x, y, dTr, imlist, imdir, allhot diff --git a/attalos/imgtxt_algorithms/jvspace/models.py b/attalos/imgtxt_algorithms/jvspace/models.py new file mode 100644 index 0000000..99ae1e2 --- /dev/null +++ b/attalos/imgtxt_algorithms/jvspace/models.py @@ -0,0 +1,247 @@ +import numpy as np +import tensorflow as tf +import tflearn + + +## NUMPY rewrites +############################################################ +def sigmoid( x ): + if x.shape: + x[x>500.0]=500.0 + x[x<-500.0]=-500.0 + else: + if x>500.0: + x=500.0 + if x<-500.0: + x=-500.0 + return 1.0 / (1 + np.exp(-x)) + + +## IMAGE COST FUNCTIONS +############################################################ +# Negative samples and loss (3D tensor function) +def meanlogsig_3d( f, V ): + return tf.reduce_mean( tf.log( tf.sigmoid( tf.reduce_sum(f*V, reduction_indices=2) ) ) ) +def w2vloss( f, pVecs, nVecs ): + tfpos = meanlogsig_3d(f, pVecs) + tfneg = meanlogsig_3d(-f, nVecs) + return -(tfpos + tfneg) + +# Sum of word vectors (2D tensor function) +def meanlogsig_2d( f, V ): + return tf.reduce_mean( tf.log( tf.sigmoid( tf.reduce_sum( f*V, reduction_indices=1 ) ) ) ) +def w2vsumloss( f, pVec, nVec ): + return -( meanlogsig_2d(f, pVec) + meanlogsig_2d(-f, nVec) ) + + +## NEURAL NETWORK MODELS +############################################################ +# Cross-correlation image model +def imageXmodel(input_size=2048, vec_size=200, hidden_units=[]): + ''' + imagemodel( input_size, vec_size, hidden_units ) + ''' + pVecs = tf.placeholder(tf.float32, shape=[None, None, vec_size], name='pvecs') + nVecs = tf.placeholder(tf.float32, shape=[None, None, vec_size], name='nvecs') + inputs = tf.placeholder(tf.float32, shape=[None, input_size], name='input') + learning_rate = tf.placeholder(tf.float32, shape=[]) + + # Iterate through the hidden units list and connect the graph + layer_i = inputs + for i, hidden in enumerate(hidden_units): + # layer_i = tflearn.fully_connected(layer_i, hidden, activation='sigmoid', name='fc'+str(i)) + layer_i = tf.contrib.layers.relu(layer_i, hidden) + # layer_i = tflearn.layers.normalization.batch_normalization(layer_i) + layer_i = tf.contrib.layers.batch_norm(layer_i) + # prediction = tflearn.fully_connected(layer_i, vec_size, activation='sigmoid', name='output') + # prediction = tflearn.fully_connected(layer_i, vec_size, activation='linear', name='output') + prediction = tf.contrib.layers.linear(layer_i, vec_size) + + # Loss function and optimizer to be used + loss = w2vloss(prediction,pVecs,nVecs) + optimizer = tf.train.GradientDescentOptimizer(learning_rate=learning_rate).minimize(loss) + + # Return actual variables + return inputs, pVecs, nVecs, prediction, loss, optimizer, learning_rate + +# Sum of word vectors image model +def imagesummodel(input_size=2048, vec_size=200, hidden_units=[]): + ''' + imagesummodel( input_size, vec_size, hidden_units ) + ''' + pVecs = tf.placeholder(tf.float32, shape=[None, None, vec_size], name='pvecs') + nVecs = tf.placeholder(tf.float32, shape=[None, None, vec_size], name='nvecs') + inputs = tf.placeholder(tf.float32, shape=[None, input_size], name='input') + + meanP = tf.reduce_mean(pVecs, reduction_indices=0) + meanN = tf.reduce_mean(nVecs, reduction_indices=0) + + learning_rate = tf.placeholder(tf.float32, shape=[]) + + # Iterate through the hidden units list and connect the graph + layer_i = inputs + for i, hidden in enumerate(hidden_units): + layer_i = tflearn.fully_connected(layer_i, hidden, activation='relu', name='fc'+str(i)) + prediction = tflearn.fully_connected(layer_i, vec_size, activation='linear', name='output') + + # Loss function and optimizer to be used + loss = w2vsumloss(prediction,meanP,meanN) + optimizer = tf.train.GradientDescentOptimizer(learning_rate=learning_rate).minimize(loss) + + # Return actual variables + return inputs, pVecs, nVecs, prediction, loss, optimizer, learning_rate + +# Negative samples and loss (3D tensor function) +# +# Let B be the batch size, N be the number of positive/negative samples, and d be the feature dimension +# +# Input C: +# batch_size x neg_samps x vocabulary_sample_size +# Feature input from neural network f: +# batch_size x ftr_dimensions +# Input V_o: +# neg_samps x ftr_dimensions +def wordloss( C, f, V ): + if type(C) == np.ndarray: + dots = C*np.log(sigmoid(f.dot(V.T)))+(1-C)*np.log(1-sigmoid(f.dot(V.T))) + return -dots.mean() + else: + dots = C*tf.log(tf.sigmoid(tf.matmul(f,V)))+(1-C)*tf.log(tf.sigmoid(-tf.matmul(f,V))) + return -tf.reduce_mean(dots) + +def w2vloss( f, pVecs, nVecs ): + tfpos = meanlogsig_3d(f, pVecs) + tfneg = meanlogsig_3d(-f, nVecs) + return -(tfpos + tfneg) + +def imageWmodel(input_size=2048, vec_size=200, hidden_units=[]): + ''' + imagemodel( input_size, vec_size, hidden_units ) + ''' + pVecs = tf.placeholder(tf.float32, shape=[None, None, vec_size], name='pvecs') + nVecs = tf.placeholder(tf.float32, shape=[None, None, vec_size], name='nvecs') + wVecs = tf.placeholder(tf.float32, shape=[None, vec_size], name='wvecs') + CorrW = tf.placeholder(tf.float32, shape=[None, None, None], name='word_correlations') + inputs = tf.placeholder(tf.float32, shape=[None, input_size], name='input') + learning_rate = tf.placeholder(tf.float32, shape=[]) + + # Iterate through the hidden units list and connect the graph + layer_i = inputs + for i, hidden in enumerate(hidden_units): + layer_i = tflearn.fully_connected(layer_i, hidden, activation='relu', name='fc'+str(i)) + prediction = tflearn.fully_connected(layer_i, vec_size, activation='sigmoid', name='output') + + # Loss function and optimizer to be used + imloss = w2vloss(prediction,pVecs,nVecs) + wdloss = wordloss( CorrW, prediction, tf.transpose(wVecs) ) + loss = imloss+wdloss + optimizer = tf.train.GradientDescentOptimizer(learning_rate=learning_rate).minimize(loss) + # optimizer = tf.train.AdagradOptimizer(learning_rate=learning_rate).minimize(loss) + + # Return actual variables + return inputs, pVecs, nVecs, wVecs, CorrW, prediction, imloss, wdloss, loss, optimizer, learning_rate + + +## UPDATE LAST LAYER (VO) +############################################################ +# If using sum of word vectors, use updateVoSum +def updateVoSum(vin, pVecs, nVecs, vpindex, vnindex, Vo, learnrate=0.01): + pVecs = pVecs.mean(axis=0) + nVecs = nVecs.mean(axis=0) + vinp = ( (1 - sigmoid(( pVecs*vin).sum(axis=1)))*vin.T ).T + vinn = - ( sigmoid(( nVecs*vin).sum(axis=1))*vin.T ).T + for i, (vpi, vni) in enumerate(zip(vpindex,vnindex)): + Vo[ vpindex[i] ] += learnrate*vinp[i] + Vo[ vnindex[i] ] += learnrate*vinn[i] + return Vo +# If using X-correlation, use updateVoX +def updateVoX(vin, pVecs, nVecs, vpindex, vnindex, Vo, learnrate=0.01): + for i, (vpi, vni) in enumerate(zip(vpindex, vnindex)): + Vo[vpi]+=learnrate*np.outer(1 - sigmoid(Vo[vpi].dot(vin[i])),vin[i]) + Vo[vni]-=learnrate*np.outer(sigmoid(Vo[vni].dot(vin[i])),vin[i]) + return Vo + +# def updateWordX(vin, pVecs, nVecs, vpindex, vnindex, VoU, Cmat, learnrate=0.01): +# for i, (vpi, vni) in enumerate(zip(vpindex, vnindex)): +# VoU[vpi]+=0.01*np.outer(1 - sigmoid(Vo[vpi].dot(vin[i])),vin[i]) +# VoU[vni]-=0.01*np.outer(sigmoid(Vo[vni].dot(vin[i])),vin[i]) +# return VoU + + +## GET IMAGE BATCH +############################################################ +# Get batch from randomly sampling from one hots +def get_batch(pBatch, Vo, numSamps=[5,10]): + + nBatch = 1.0 - pBatch + # pVecs = pBatch.dot(Vo) + # nVecs = nBatch.dot(Vo) + + Vpbatch = np.zeros((len(pBatch), numSamps[0], 200)) + Vnbatch = np.zeros((len(nBatch), numSamps[1], 200)) + vpia = []; vnia = []; + for i,unisamp in enumerate(pBatch): + vpi = np.random.choice( range(len(unisamp)) , size=numSamps[0], p=1.0*unisamp/sum(unisamp)) + Vpbatch[i] = Vo[ vpi ] + vpia += [vpi] + + for i,unisamp in enumerate(nBatch): + vni = np.random.choice( range(len(unisamp)) , size=numSamps[1], p=1.0*unisamp/sum(unisamp)) + Vnbatch[i] = Vo[ vni ] + vnia += [vni] + + Vpbatch = Vpbatch.transpose(1,0,2) + Vnbatch = Vnbatch.transpose(1,0,2) + + return Vpbatch, Vnbatch, vpia, vnia +# Get batch from randomly sample from one hots but single images +def get_batch_image(pBatch, nBatch, Vo, numSamps=[5,10]): + + # nBatch = 1.0 - pBatch + # pVecs = pBatch.dot(Vo) + # nVecs = nBatch.dot(Vo) + + Vpbatch = np.zeros((len(pBatch), numSamps[0], 200)) + Vnbatch = np.zeros((len(nBatch), numSamps[1], 200)) + vpia = []; vnia = []; + for i,unisamp in enumerate(pBatch): + vpi = np.random.choice( range(len(unisamp)) , size=numSamps[0], p=1.0*unisamp/sum(unisamp)) + Vpbatch[i] = Vo[ vpi ] + vpia += [vpi] + + for i,unisamp in enumerate(nBatch): + vni = np.random.choice( range(len(unisamp)) , size=numSamps[1], p=1.0*unisamp/sum(unisamp)) + Vnbatch[i] = Vo[ vni ] + vnia += [vni] + + Vpbatch = Vpbatch.transpose(1,0,2) + Vnbatch = Vnbatch.transpose(1,0,2) + + return Vpbatch, Vnbatch, vpia, vnia + + +## WORD OPTIMIZATION +############################################################ +# word parameter optimization +wordlr = 1e-4 + +# Nonlinear optimization +def costYViVo(Y, Vi, Vo): + xcorrs = Vi.dot(Vo.T) + fullcost = Y*np.log(sigmoid( xcorrs )) + (1-Y)*np.log(sigmoid(1-xcorrs)) + return -fullcost.mean() + +def costYXcorr(Y, xcorrs): + fullcost = Y*np.log(sigmoid( xcorrs )) + (1-Y)*np.log(sigmoid(1-xcorrs)) + return -fullcost.mean() + +# New full vectors. Assumes that Vo is optimized through the image space +def adaptWords( VoD, VoUnew, Cmat, wordlr=1.0e-4 ): + xCorrs = VoD.dot(VoUnew.T) + # b4cost= costYXcorr(Cmat, xCorrs) + VoD+= (wordlr*Cmat*(1-sigmoid(xCorrs))).dot(VoUnew) + VoD+= (wordlr*(Cmat-1)*(1-sigmoid(1-xCorrs))).dot(VoUnew) + return VoD + + + diff --git a/attalos/imgtxt_algorithms/jvspace/setops.py b/attalos/imgtxt_algorithms/jvspace/setops.py new file mode 100644 index 0000000..0edf49f --- /dev/null +++ b/attalos/imgtxt_algorithms/jvspace/setops.py @@ -0,0 +1,54 @@ +def intersection(list1, list2): + setintersection = [] + list1index = [] + list2index = [] + for word in list1: + if word in list2: + setintersection += [word] + list1index += [list(list1).index(word)] + list2index += [list(list2).index(word)] + return setintersection, list1index, list2index + +# Union will keep the order of the original training list +def union(list1, list2): + setunion = [] + list1index = [] + list2index = [] + for word in list(list1)+list(list2): + if word in setunion: + continue + else: + setunion += [word] + if word in list1: + list1index += [list(list1).index(word)] + else: + list1index += [-1] + if word in list2: + list2index += [list(list2).index(word)] + else: + list2index += [-1] + return setunion, list1index, list2index + +def difference(list1, list2): + setdiff = [] + list1index = [] + for word in list1: + if word not in list2: + setdiff+=[word] + list1index += [list(list1).index(word)] + return setdiff, list1index + +def replaceword(dictionary, wordsearch, wordreplace): + found = False + if wordreplace in dictionary: + print "Warning: {} already in dictionary. Nothing changed".format(wordreplace) + return found + for i,word in enumerate(dictionary): + if word == wordsearch: + dictionary[i]=wordreplace + found = True + if found: + print 'Found and replaced {} at index {} with {}'.format(wordsearch, i, wordreplace) + else: + print 'Did not find {} in provided dictionary'.format(wordsearch) + return found \ No newline at end of file From e2da829afb386423c194d3c3279140e67741e902 Mon Sep 17 00:00:00 2001 From: karllab41 Date: Fri, 9 Sep 2016 14:10:31 +0000 Subject: [PATCH 2/2] Added utility function with initVo --- attalos/imgtxt_algorithms/util/readw2v.py | 26 +++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/attalos/imgtxt_algorithms/util/readw2v.py b/attalos/imgtxt_algorithms/util/readw2v.py index ede717e..7bbf40d 100644 --- a/attalos/imgtxt_algorithms/util/readw2v.py +++ b/attalos/imgtxt_algorithms/util/readw2v.py @@ -2,6 +2,11 @@ import array import numpy as np +class dictitem: + def __init__(self, idx, count): + self.idx = idx + self.count = np.int64(count) + # Read word2vec from Google's word2vec C code into Python dictionary class ReadW2V(): @@ -65,3 +70,24 @@ def words(self): return self.vectors.keys() +# Read from a vocab file generated by word2vec +def readvocab(vocabname): + lines = open(vocabname, 'r').read().splitlines() + idxdict = [] + worddict = dict() + for i,line in enumerate(lines): + word = line.split()[0] + worddict[word] = dictitem( i, line.split()[1] ) + idxdict.append(word) + return worddict, idxdict + + +# Initialize a matrix according to an ordered list: the dictionary +def initVo(wordvecs, dictionary): + Vo = np.random.randn(len(dictionary),len(wordvecs['hello'])) + for i, word in enumerate(dictionary): + if wordvecs.has_key(word): + Vo[i] = wordvecs[word] + else: + print "{} is not in the dictionary".format(word) + return Vo