From 01de7bf2ae315872e4ae6b9ca2fae5e4a420afa6 Mon Sep 17 00:00:00 2001
From: Shivam Sharma <15UCC035@lnmiit.ac.in>
Date: Sat, 23 Dec 2017 19:03:57 +0530
Subject: [PATCH 1/2] Updates 23/12/2017

---
 lexdecomp/compmodel.py  |   8 +++++---
 lexdecomp/compmodel.pyc | Bin 0 -> 4026 bytes
 lexdecomp/train.py      |  39 ++++++++++++++++++++++++---------------
 tools/text2numpy.py     |   1 +
 tools/word2vec2text.py  |  20 ++++++++++++--------
 trec-qa/code.py         |   7 +++++++
 6 files changed, 49 insertions(+), 26 deletions(-)
 create mode 100644 lexdecomp/compmodel.pyc
 create mode 100644 trec-qa/code.py
diff --git a/lexdecomp/compmodel.py b/lexdecomp/compmodel.py
index 738120d..453a084 100644
--- a/lexdecomp/compmodel.py
+++ b/lexdecomp/compmodel.py
@@ -35,7 +35,7 @@ def conv_layer(in_data):
         embedding_size = in_data.shape[1]
         sequence_length = in_data.shape[2]
         in_channels = in_data.shape[3]
-    elif hasattr(in_data, 'get_shape'):  # a TensorFlow placeholder
+    if hasattr(in_data, 'get_shape'):  # a TensorFlow placeholder
         embedding_size = in_data.get_shape()[1].value
         sequence_length = in_data.get_shape()[2].value
         in_channels = in_data.get_shape()[3].value
@@ -58,7 +58,8 @@ def conv_layer(in_data):
         pooled_outputs.append(pooled)
 
     # concatenating feature maps
-    features = tf.concat(3, pooled_outputs)
+    features = tf.concat(pooled_outputs,axis = 3)
+#    features = [pooled_outputs[0], pooled_outputs[1], pooled_outputs[2]]
     total_feature_maps = len(FILTER_SIZES) * NUM_FEATURE_MAPS
     features_flat = tf.reshape(features, [-1, total_feature_maps])
     # features_flat.shape = [batch, total_feature_maps]
@@ -88,7 +89,8 @@ def inference(questions, sentences, keep_prob):
     """
     question_features = conv_layer(questions)
     sentence_features = conv_layer(sentences)
-    features = tf.concat(1, [question_features, sentence_features])
+    features = tf.concat([question_features, sentence_features],axis=1)
+#    features = [1, question_features, sentence_features]
     scores = hidden_layer(features, keep_prob)
     return scores
 
diff --git a/lexdecomp/compmodel.pyc b/lexdecomp/compmodel.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..d32a0f11e15c1795f10b20302e82cc15fc231020
GIT binary patch
literal 4026
zcmaJ^-EJGl6`oyElt@uhtT?S)x9KK8Q5ul-;}i}e1Z`wVP6gO(u~ckSsj(h%hvX{z
zmz`PJD#)8EFVO-;(Z}drFMH7!=ySBaPQP=8OUZR3NjsXEJ#)^P^YfjvwSTR&p8w|u
zzZr<xPYwS+!=ojJP>55EjtE^5JL1$8VNHZ}5iW>PO`Kxh5T{r!if=%<Ai^b{G(_0s
z$)X5bJXsRqGEbT!Z1bc=JbRrL67|p#;!x>95%rb!%vgC^rADO#<qfhV&vay>EcKEs
zRPk1;bpisOdy)2{B#%`><e?|EC%uDicavt}jA7_SsrOu_BTpGm##>(J!Orf(d#x|W
z#^ic$ds`N-qw}q-7;VeG-rjw<^I&V|<DK14Ua%;LfUGiBSCXuXQ94uw>9`$dT5p>|
zMri~oxim%<DRc<4oV1G4)`?1WRy>ZgSDq<NRzxz^UPmi6UwR*2qj{<ZW0^(+?d|UD
zez*y149EbjM`aX;Y@g2ynUC*nwW4qF(~V$15xF#w`=WniJ)l_w&jOwUJQ~gv6Gu!i
zy21!Cs*7n&7)P98iLoXoa47rw(s3>wF|A*F3oo<x>7XM(5Zbp3G@mYrNnK19#E^VS
z67`QsY@wuqM<v{to@a4Mw~(<njAEE*%aD57grPbg{qYa<^VeS(K0KDW;#p`ea@8R^
z^A3jY7f*x7NBbvV_(#Fh{a^PC-PsJy68=4tFl#?nOlXQS9YA&%q*;;3m?@xyHZnD4
z5#+--ljfrbmiIDkuu_4Jey7YT)+*_%FoZ|#7Li10K#oaOtWEpc5muL&zEaU>Y&2i2
zA4#or2fKm?O(!UvZ5ZblhFFqe0xNnQOWndC+;!)D{I;C|Id%;XZL<^qj2*ZDPK2i}
zT~T~O^E39^ohf^l7Bz8a>Q-;+Sky%NygCS%;ovv?+7*{z<%&CS|LnNk0tec%Ozal;
z&=y#=g+V+t;Ck_UM~no8%Im)ixa{!7-vpuo7C<ymwAI9AP5jms(}tKX3Yh+ENt`uF
z_5?5ifIw7Le~3sR2(aVQb*D>A+^UW&i^-ChERyOJn&1G{7Q_Y)e}6isYKsXFhOq@N
zCOjdo-zCSXO~JS>&Q`=^nHW1?|4BS2EmsNW@CNdxxb7NV4(}0Nx2G$t)GaY-6Rb^E
z#0W}(ZdFWH5&z_9{Z9=0c^+RN`ZbUz8~)!U&|Y?~3SdND8ZVSa0(3n5?B%kM3HAfp
z+yfKfAuxpaXo@%Q%f2*&@uqk5^wH7b;c@r#pm%(Fw8^JmYV~zV*c8Mn9hvdVP4BoH
z9Q<;>+dX>HdtoIM%9KT_Z{lq-BjaF*EG`QbBr@0btj~bM!yI)?6E_|y6R>*vGpuIX
zdr7H{*H@m3fb9kD(k+u5hA9ep;o(lwy9R!FEC*_w#i1%VQMD-|U^&lo$r;Je@NECd
z@gdK?qMT(U>`)Fd<ydOqaKVcD1RYFyF5^<Mq9?$WqoT+PPUOeOPfm_}Hek=EeDmvc
zgzlE6oXwP{4?;vewq-hIJfRE^pb;+;`8vq6EM{t%=PC`Wg8(8^!$J7Mr~j;Aq1a9x
zlExb|WBHWo2seb#n?CdjE*P0e43I4=4T47dWOtwHs86uPDw4JgFV|dWi1Di^G~<9@
z6R28FtpAKs8#duvem5kbq6vnvG&dm|iDG1n<%KHlVx_k*h{ZK$1;6h&YtFm){hr&d
zH?ZDt)}6b~`|hgqfpgcrRa*zehC3kh6Q_9v`v(?)J%AFD8z_jXg_OJvKi#HERugBa
zNf?1VC}u9DrR`U)>e-wI-~(I$CCF?&Yw%eh4A(Q_Qy~@L15if$mInJk91tM+0dx!!
zz#@VGKpge~LqMD*=8R21ndJtQSt6AQzM4P^a-IGOhO5dxEaUj%K8%;5=%agJ3vO}7
zUX^(tN5s5l+92mrNDRMHDi87^>)Vu|pzu8!HfW##<mmRvtUg77Po>MJ8tUJ{z|j~M
zS&jhne@Gidlt`15vHw9uptGSt%y-B^{yiG*(<=&6s?rBf_Itgf9)}HsTac=<KmooM
zsc(c731mTfkRdmik)3k1@tQ6b*v*@wkMMUKp{+;*w%#Gunl3MD4d=GoM!ZpR-Ef+C
z-b3thuS7<m^gh6&X|QFMyoK1})`{j-PlPz6Agfo{kD7-i#}mZ?!lD5OlT-9hF<d$L
zt&Y~~pk+lbN3hQ>?vNV(CJj8JR?7bvLx*%^_yW3XYNj-=s%}|ZAKU^}MNomaHndw2
z&UdZnen!0Ud{?7FQ8jOOpJ8F!lv<muxaM%ClB#q2eS`&UhE@d4CFc%hE$wh((gf92
zaQeck%P=vl+3QeA(SN~kfJUjL-jTirsKNGzMV8D~qsXACOl>P1%Rc(CH_K+65y_rs
zW5vr06-Jy*{R?JczaH*aHFQ=GZOOI#_B{1EuQHGXc>hDqU9R;HC5^vBH>PBOtP+J+
zmk9y_pr4J{pB&p(FFR$m8G0*84Bd3s8+_qE0?(ItG$o~2Tz7tCyF@ZQZE^ULkLKp5
zNQI?c;aK~_@|Wn?(96IJ_-3m>5?~0;E*+PeJ~$4b2e^fssN-@f50Q*6YY|~)%S|d1
z#R)o!vpyOIZB)*I$sa}3ly?|iNJmB&7$=d%#Vp@tEDJ8rOwN$8AIjucxrq`~_JWa$
z2_ECSN!fCXk1cv>bA^n;0)Zj3Gl#TAQkiLx<u@4hiVp35j59XAop;?e)TQ^F4wqg^
zz90y*K@hN`Z9|6dh*vfYuHcnB2+m*1TK`iTNJYPk;Ytslz5Q80N`Gp<Ie$)>r>P=0
UoOXT9Y1Ep{=Dp^6v(;?>7hWBQt^fc4

literal 0
HcmV?d00001

diff --git a/lexdecomp/train.py b/lexdecomp/train.py
index a8f4a77..b379343 100644
--- a/lexdecomp/train.py
+++ b/lexdecomp/train.py
@@ -78,12 +78,15 @@ def run_training(training_data, dev_data, test_data, model_dir):
         [None, EMBEDDING_SIZE, max_sentence, IN_CHANNELS])
     labels = tf.placeholder(tf.float32, [None])
     keep_prob = tf.placeholder(tf.float32, name='keep_prob')
-
+    
     # building the graph
+    print('HERE!-0')
     logits = compmodel.inference(questions, sentences, keep_prob)
+    print('HERE!-1')
     loss = compmodel.loss(logits, labels)
+    print('HERE!-2')
     train_op = compmodel.training(loss)
-
+    print('HERE!')
     saver = tf.train.Saver()
     bestdev_model_file = Path(model_dir, 'best-dev_model.ckpt').as_posix()
 
@@ -154,27 +157,33 @@ def evaluate(session, dataset, data_label, model_label):
 
 
 def main():
-    parser = argparse.ArgumentParser(
-        description='Trains the sentence composition model (CNN).')
-    parser.add_argument('training', help='training set (.hdf5)')
-    parser.add_argument('dev', help='dev set (.hdf5)')
-    parser.add_argument('test', help='test set (.hdf5)')
-    parser.add_argument('model_dir', help='directory to save models')
-    args = parser.parse_args()
-
+#    parser = argparse.ArgumentParser(
+#        description='Trains the sentence composition model (CNN).')
+#    parser.add_argument('training', help='training set (.hdf5)')
+#    parser.add_argument('dev', help='dev set (.hdf5)')
+#    parser.add_argument('test', help='test set (.hdf5)')
+#    parser.add_argument('model_dir', help='directory to save models')
+#    args = parser.parse_args()
+#    args.append('../train-filtered.hdf5')
+#    args.append('../dev-filtered.hdf5')
+#    args.append('../test-filtered.hdf5')
+#    args.append('../saved-model')
     # checking model directory
-    model_dir = Path(args.model_dir)
+#    print ('Yes1')
+    model_dir = Path('../saved-model')
     if not model_dir.exists():
         model_dir.mkdir()
 
     # data files
-    training_data = h5py.File(args.training)
-    dev_data = h5py.File(args.dev)
-    test_data = h5py.File(args.test)
+#    print ('Yes2')
+    training_data = h5py.File('../train-filtered.hdf5')
+    dev_data = h5py.File('../dev-filtered.hdf5')
+    test_data = h5py.File('../test-filtered.hdf5')
 
     try:
+#        print ('Yes3')
         run_training(training_data, dev_data, test_data,
-                     args.model_dir)
+                     '../saved-model')
     finally:
         training_data.close()
         dev_data.close()
diff --git a/tools/text2numpy.py b/tools/text2numpy.py
index 9eb7f52..ef22767 100644
--- a/tools/text2numpy.py
+++ b/tools/text2numpy.py
@@ -1,3 +1,4 @@
+import sys
 import argparse
 import re
 import os
diff --git a/tools/word2vec2text.py b/tools/word2vec2text.py
index 6ebbb4a..5a0a31c 100644
--- a/tools/word2vec2text.py
+++ b/tools/word2vec2text.py
@@ -56,12 +56,16 @@ def memorymap(filename):
             if end == -1:
                 break
             wordbytes = mvec[pos:end]
-            word = wordbytes.decode('utf-8', errors='replace').strip()
-            # reading the corresponding vector
-            pos = end + 1
-            end = pos + byte_offset
-            vector = array('f', mvec[pos:end])
-            if vocabulary is not None and word not in vocabulary:
-                continue  # skip word if not in vocabulary
-            print(word, ' '.join(map(str, vector)), file=fout)
+            try:
+                word = wordbytes.decode('utf-8', errors='replace').strip()
+                # reading the corresponding vector
+                pos = end + 1
+                end = pos + byte_offset
+                vector = array('f', mvec[pos:end])
+                if vocabulary is not None and word not in vocabulary:
+                    continue  # skip word if not in vocabulary
+                else:
+                    print(word, ' '.join(map(str, vector)), file=fout)
+            except:
+                continue
         print('finished')
diff --git a/trec-qa/code.py b/trec-qa/code.py
new file mode 100644
index 0000000..fbbf933
--- /dev/null
+++ b/trec-qa/code.py
@@ -0,0 +1,7 @@
+# -*- coding: utf-8 -*-
+
+f = open('train-filtered.tsv')
+
+lines = f.readlines()
+
+lines = [line.split('\t') for line in lines]
\ No newline at end of file

From eb02f2541f4a3907bc7d690b64d89d995c6a2e65 Mon Sep 17 00:00:00 2001
From: Shivam Sharma <15ucc035@lnmiit.ac.in>
Date: Sat, 23 Dec 2017 19:42:48 +0530
Subject: [PATCH 2/2] Update compmodel.py

---
 lexdecomp/compmodel.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lexdecomp/compmodel.py b/lexdecomp/compmodel.py
index 453a084..5bd7a44 100644
--- a/lexdecomp/compmodel.py
+++ b/lexdecomp/compmodel.py
@@ -35,7 +35,7 @@ def conv_layer(in_data):
         embedding_size = in_data.shape[1]
         sequence_length = in_data.shape[2]
         in_channels = in_data.shape[3]
-    if hasattr(in_data, 'get_shape'):  # a TensorFlow placeholder
+    elif hasattr(in_data, 'get_shape'):  # a TensorFlow placeholder
         embedding_size = in_data.get_shape()[1].value
         sequence_length = in_data.get_shape()[2].value
         in_channels = in_data.get_shape()[3].value