-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy path__init__.py
More file actions
75 lines (63 loc) · 1.96 KB
/
__init__.py
File metadata and controls
75 lines (63 loc) · 1.96 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
import subprocess, os, sys, threading, Queue
from collections import namedtuple
import output_parser
includes_dir = os.path.dirname(os.path.abspath(__file__))
parser_path = os.path.join(includes_dir,'stanford-parser.jar')
model_path = os.path.join(includes_dir, 'englishPCFG.ser')
command = 'java -mx150m -cp "%s": edu.stanford.nlp.parser.lexparser.LexicalizedParser -sentences newline -outputFormat wordsAndTags,penn,typedDependencies "%s" -' % (parser_path, model_path)
handle = subprocess.Popen(command,
bufsize=1,
stdin=subprocess.PIPE,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
shell=True
)
#for returning results the chunk-reader sees between threads
results = Queue.Queue()
def chunk_reader(handle):
buffered = ''
triplet = []
while True:
line = handle.readline()
if line.strip() == '':
if len(triplet) == 2:
results.put(triplet + [buffered.strip()])
triplet = []
else:
triplet.append(buffered.strip())
buffered = ''
else:
buffered += line
#prevent OS buffers from being filled
#and save errors in case something dies
runlog = []
def ignore(handle):
while True:
line = handle.readline()
if len(runlog) > 1000:
runlog.pop(0)
runlog.append(line)
t = threading.Thread(target=chunk_reader, args=(handle.stdout,))
t.daemon = True
t.start()
t = threading.Thread(target=ignore, args=(handle.stderr,))
t.daemon = True
t.start()
def checker(h):
h.wait()
print 'The parser subprocess has quit!'
print 'Return code:', h.returncode
print 'stderr:'
print ''.join(runlog)
t = threading.Thread(target=checker, args=(handle,))
t.daemon = True
t.start()
Parse = namedtuple('Parse', ['wordlist', 'tree', 'dependency_list'])
def parse(sentence, parse_output=True):
#sentences are line buffered
sentence = sentence.replace('\n', ' ').strip()
#no need to flush this since the handle is line buffered
handle.stdin.write('%s\n' % sentence)
if not parse_output:
return results.get()
return Parse(*output_parser.parse_triplet(*results.get()))