-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathdocxreq.py
More file actions
168 lines (137 loc) · 5.15 KB
/
docxreq.py
File metadata and controls
168 lines (137 loc) · 5.15 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
"""
Extract requirements from semi-structured MS Word (.docx) document
"""
try:
from xml.etree.cElementTree import XML
except ImportError:
from xml.etree.ElementTree import XML
import zipfile
import doorstop
import readline
import argparse
from doorstop.common import DoorstopError
# .docx parser utility functions
def get_xml_tree(docxpath):
document = zipfile.ZipFile(docxpath)
xml_content = document.read('word/document.xml')
document.close()
return XML(xml_content)
# Doorstop wrapper functions
def wrapdoorstop(func):
def wrapper(*args, **kwargs):
try:
doc = func(*args, **kwargs)
except DoorstopError as exc:
raise exc
else:
return doc
return wrapper
@wrapdoorstop
def _create(tree, path, value, parent):
return tree.create_document(path=path, value=value, parent=parent)
@wrapdoorstop
def _find(tree, path, value, parent):
return tree.find_document(value)
#Processing functions: read .docx and entry to Doorstop
def _read_next(parg_iterator):
next_parg = next(parg_iterator)
return ''.join([x for x in next_parg.itertext()])
def _read_next_and_forward(parg_iterator):
text = _read_next(parg_iterator)
next(parg_iterator)
return text
def process_document(repopath, tree, doctree, docfun=_create):
word_namespace = '{http://schemas.openxmlformats.org/wordprocessingml/2006/main}'
para = word_namespace + 'p'
paragraph = doctree.getiterator(para)
input_reqs = []
reqs = {}
while True:
try:
text = _read_next(paragraph)
if not text:
continue
if text == 'REQ_TYPE':
prefix = _read_next_and_forward(paragraph)
parent = _read_next(paragraph)
treepath = '/'.join([repopath, prefix.lower()])
try:
doc = docfun(tree,
path=treepath,
value=prefix,
parent=parent)
except Exception as exc:
print(str(exc))
break
reqs = dict(zip([x.number for x in doc.items],
[x.uid for x in doc.items]))
elif text == 'REQ_NUM':
for key in ['REQ_NUM', 'REQ_LINKS', 'REQ_TEXT', 'REQ_RATIO', 'REQ_NOTE']:
field = _read_next_and_forward(paragraph)
if key == 'REQ_NUM':
num = int(field)
if num not in reqs:
# create
item = doc.add_item(num)
print('new requirement')
else:
# update
uid = prefix + '{:0>3d}'.format(num)
item = doc.find_item(uid)
print('update requirement')
input_reqs.append(num)
elif key == 'REQ_LINKS':
item.links = [] # first remove all links
links = [x.strip() for x in field.split(',') if x != '']
for link in links:
item.link(link)
elif key == 'REQ_TEXT':
item.text = field
elif key == 'REQ_RATIO':
item.set(name='rationale', value=field)
elif key == 'REQ_NOTE':
item.set(name='note', value=field)
except StopIteration:
break
if reqs:
for key in set(reqs) - set(input_reqs):
# delete
uid = prefix + '{:0>3d}'.format(key)
doc.remove_item(uid)
print('delete requirement')
if __name__ == '__main__':
PARSER = argparse.ArgumentParser(description='Parse .docx documents into Doorstop files')
PARSER.add_argument('repopath', type=str, help='path to the requirement tree')
ARGS = PARSER.parse_args()
readline.parse_and_bind('tab: complete')
readline.parse_and_bind('set editing-mode vi')
REQTREE = doorstop.build(root=ARGS.repopath)
print('On tree {}'.format(ARGS.repopath))
print(REQTREE)
while True:
try:
print('1. Add document')
print('2. Update document')
print('3. Analyze requirement tree')
print('4. Quit')
SEL = int(input('> '))
if SEL == 1:
DOCPATH = input('Document path: ')
DOCTREE = get_xml_tree(DOCPATH)
process_document(ARGS.repopath, REQTREE, DOCTREE, _create)
elif SEL == 2:
DOCPATH = input('Document path: ')
DOCTREE = get_xml_tree(DOCPATH)
process_document(ARGS.repopath, REQTREE, DOCTREE, _find)
elif SEL == 3:
for issue in REQTREE.issues:
print(issue)
elif SEL == 4:
break
else:
continue
except KeyboardInterrupt:
break
except Exception as exc:
print(str(exc))
continue