Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 4 additions & 2 deletions lexdecomp/compmodel.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,8 @@ def conv_layer(in_data):
pooled_outputs.append(pooled)

# concatenating feature maps
features = tf.concat(3, pooled_outputs)
features = tf.concat(pooled_outputs,axis = 3)
# features = [pooled_outputs[0], pooled_outputs[1], pooled_outputs[2]]
total_feature_maps = len(FILTER_SIZES) * NUM_FEATURE_MAPS
features_flat = tf.reshape(features, [-1, total_feature_maps])
# features_flat.shape = [batch, total_feature_maps]
Expand Down Expand Up @@ -88,7 +89,8 @@ def inference(questions, sentences, keep_prob):
"""
question_features = conv_layer(questions)
sentence_features = conv_layer(sentences)
features = tf.concat(1, [question_features, sentence_features])
features = tf.concat([question_features, sentence_features],axis=1)
# features = [1, question_features, sentence_features]
scores = hidden_layer(features, keep_prob)
return scores

Expand Down
Binary file added lexdecomp/compmodel.pyc
Binary file not shown.
39 changes: 24 additions & 15 deletions lexdecomp/train.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,12 +78,15 @@ def run_training(training_data, dev_data, test_data, model_dir):
[None, EMBEDDING_SIZE, max_sentence, IN_CHANNELS])
labels = tf.placeholder(tf.float32, [None])
keep_prob = tf.placeholder(tf.float32, name='keep_prob')

# building the graph
print('HERE!-0')
logits = compmodel.inference(questions, sentences, keep_prob)
print('HERE!-1')
loss = compmodel.loss(logits, labels)
print('HERE!-2')
train_op = compmodel.training(loss)

print('HERE!')
saver = tf.train.Saver()
bestdev_model_file = Path(model_dir, 'best-dev_model.ckpt').as_posix()

Expand Down Expand Up @@ -154,27 +157,33 @@ def evaluate(session, dataset, data_label, model_label):


def main():
parser = argparse.ArgumentParser(
description='Trains the sentence composition model (CNN).')
parser.add_argument('training', help='training set (.hdf5)')
parser.add_argument('dev', help='dev set (.hdf5)')
parser.add_argument('test', help='test set (.hdf5)')
parser.add_argument('model_dir', help='directory to save models')
args = parser.parse_args()

# parser = argparse.ArgumentParser(
# description='Trains the sentence composition model (CNN).')
# parser.add_argument('training', help='training set (.hdf5)')
# parser.add_argument('dev', help='dev set (.hdf5)')
# parser.add_argument('test', help='test set (.hdf5)')
# parser.add_argument('model_dir', help='directory to save models')
# args = parser.parse_args()
# args.append('../train-filtered.hdf5')
# args.append('../dev-filtered.hdf5')
# args.append('../test-filtered.hdf5')
# args.append('../saved-model')
# checking model directory
model_dir = Path(args.model_dir)
# print ('Yes1')
model_dir = Path('../saved-model')
if not model_dir.exists():
model_dir.mkdir()

# data files
training_data = h5py.File(args.training)
dev_data = h5py.File(args.dev)
test_data = h5py.File(args.test)
# print ('Yes2')
training_data = h5py.File('../train-filtered.hdf5')
dev_data = h5py.File('../dev-filtered.hdf5')
test_data = h5py.File('../test-filtered.hdf5')

try:
# print ('Yes3')
run_training(training_data, dev_data, test_data,
args.model_dir)
'../saved-model')
finally:
training_data.close()
dev_data.close()
Expand Down
1 change: 1 addition & 0 deletions tools/text2numpy.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import sys
import argparse
import re
import os
Expand Down
20 changes: 12 additions & 8 deletions tools/word2vec2text.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,12 +56,16 @@ def memorymap(filename):
if end == -1:
break
wordbytes = mvec[pos:end]
word = wordbytes.decode('utf-8', errors='replace').strip()
# reading the corresponding vector
pos = end + 1
end = pos + byte_offset
vector = array('f', mvec[pos:end])
if vocabulary is not None and word not in vocabulary:
continue # skip word if not in vocabulary
print(word, ' '.join(map(str, vector)), file=fout)
try:
word = wordbytes.decode('utf-8', errors='replace').strip()
# reading the corresponding vector
pos = end + 1
end = pos + byte_offset
vector = array('f', mvec[pos:end])
if vocabulary is not None and word not in vocabulary:
continue # skip word if not in vocabulary
else:
print(word, ' '.join(map(str, vector)), file=fout)
except:
continue
print('finished')
7 changes: 7 additions & 0 deletions trec-qa/code.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
# -*- coding: utf-8 -*-

f = open('train-filtered.tsv')

lines = f.readlines()

lines = [line.split('\t') for line in lines]