mcrisc · ShivamSharma1997 · Dec 23, 2017 · Dec 23, 2017
diff --git a/lexdecomp/compmodel.py b/lexdecomp/compmodel.py
@@ -58,7 +58,8 @@ def conv_layer(in_data):
         pooled_outputs.append(pooled)
 
     # concatenating feature maps
-    features = tf.concat(3, pooled_outputs)
+    features = tf.concat(pooled_outputs,axis = 3)
+#    features = [pooled_outputs[0], pooled_outputs[1], pooled_outputs[2]]
     total_feature_maps = len(FILTER_SIZES) * NUM_FEATURE_MAPS
     features_flat = tf.reshape(features, [-1, total_feature_maps])
     # features_flat.shape = [batch, total_feature_maps]
@@ -88,7 +89,8 @@ def inference(questions, sentences, keep_prob):
     """
     question_features = conv_layer(questions)
     sentence_features = conv_layer(sentences)
-    features = tf.concat(1, [question_features, sentence_features])
+    features = tf.concat([question_features, sentence_features],axis=1)
+#    features = [1, question_features, sentence_features]
     scores = hidden_layer(features, keep_prob)
     return scores
 

diff --git a/lexdecomp/compmodel.pyc b/lexdecomp/compmodel.pyc
diff --git a/lexdecomp/train.py b/lexdecomp/train.py
@@ -78,12 +78,15 @@ def run_training(training_data, dev_data, test_data, model_dir):
         [None, EMBEDDING_SIZE, max_sentence, IN_CHANNELS])
     labels = tf.placeholder(tf.float32, [None])
     keep_prob = tf.placeholder(tf.float32, name='keep_prob')
-
+    
     # building the graph
+    print('HERE!-0')
     logits = compmodel.inference(questions, sentences, keep_prob)
+    print('HERE!-1')
     loss = compmodel.loss(logits, labels)
+    print('HERE!-2')
     train_op = compmodel.training(loss)
-
+    print('HERE!')
     saver = tf.train.Saver()
     bestdev_model_file = Path(model_dir, 'best-dev_model.ckpt').as_posix()
 
@@ -154,27 +157,33 @@ def evaluate(session, dataset, data_label, model_label):
 
 
 def main():
-    parser = argparse.ArgumentParser(
-        description='Trains the sentence composition model (CNN).')
-    parser.add_argument('training', help='training set (.hdf5)')
-    parser.add_argument('dev', help='dev set (.hdf5)')
-    parser.add_argument('test', help='test set (.hdf5)')
-    parser.add_argument('model_dir', help='directory to save models')
-    args = parser.parse_args()
-
+#    parser = argparse.ArgumentParser(
+#        description='Trains the sentence composition model (CNN).')
+#    parser.add_argument('training', help='training set (.hdf5)')
+#    parser.add_argument('dev', help='dev set (.hdf5)')
+#    parser.add_argument('test', help='test set (.hdf5)')
+#    parser.add_argument('model_dir', help='directory to save models')
+#    args = parser.parse_args()
+#    args.append('../train-filtered.hdf5')
+#    args.append('../dev-filtered.hdf5')
+#    args.append('../test-filtered.hdf5')
+#    args.append('../saved-model')
     # checking model directory
-    model_dir = Path(args.model_dir)
+#    print ('Yes1')
+    model_dir = Path('../saved-model')
     if not model_dir.exists():
         model_dir.mkdir()
 
     # data files
-    training_data = h5py.File(args.training)
-    dev_data = h5py.File(args.dev)
-    test_data = h5py.File(args.test)
+#    print ('Yes2')
+    training_data = h5py.File('../train-filtered.hdf5')
+    dev_data = h5py.File('../dev-filtered.hdf5')
+    test_data = h5py.File('../test-filtered.hdf5')
 
     try:
+#        print ('Yes3')
         run_training(training_data, dev_data, test_data,
-                     args.model_dir)
+                     '../saved-model')
     finally:
         training_data.close()
         dev_data.close()

diff --git a/tools/text2numpy.py b/tools/text2numpy.py
@@ -1,3 +1,4 @@
+import sys
 import argparse
 import re
 import os

diff --git a/tools/word2vec2text.py b/tools/word2vec2text.py
@@ -56,12 +56,16 @@ def memorymap(filename):
             if end == -1:
                 break
             wordbytes = mvec[pos:end]
-            word = wordbytes.decode('utf-8', errors='replace').strip()
-            # reading the corresponding vector
-            pos = end + 1
-            end = pos + byte_offset
-            vector = array('f', mvec[pos:end])
-            if vocabulary is not None and word not in vocabulary:
-                continue  # skip word if not in vocabulary
-            print(word, ' '.join(map(str, vector)), file=fout)
+            try:
+                word = wordbytes.decode('utf-8', errors='replace').strip()
+                # reading the corresponding vector
+                pos = end + 1
+                end = pos + byte_offset
+                vector = array('f', mvec[pos:end])
+                if vocabulary is not None and word not in vocabulary:
+                    continue  # skip word if not in vocabulary
+                else:
+                    print(word, ' '.join(map(str, vector)), file=fout)
+            except:
+                continue
         print('finished')
diff --git a/trec-qa/code.py b/trec-qa/code.py
@@ -0,0 +1,7 @@
+# -*- coding: utf-8 -*-
+
+f = open('train-filtered.tsv')
+
+lines = f.readlines()
+
+lines = [line.split('\t') for line in lines]