-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathreadTable.py
More file actions
203 lines (189 loc) · 9.58 KB
/
readTable.py
File metadata and controls
203 lines (189 loc) · 9.58 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
# -*- coding: utf-8 -*-
from datetime import date
import xlrd
import openpyxl
# Below are dictionaries for data transform
HorizontalAlignment03Dict = {0: 'Default', 1: 'Left', 2: 'Center', 3: 'Right', 5: 'Justified', 4: 'Filled',
7: 'Distributed'}
HorizontalAlignment07Dict = {None: 'Default', 'general': 'Default', 'left': 'Left', 'center': 'Center',
'right': 'Right',
'justify': 'Justified', 'fill': 'Filled', 'distributed': 'Distributed'}
VerticalAlignment03Dict = {0: 'Top', 1: 'Middle', 2: 'Bottom', 3: 'Bottom', 4: 'Distributed'}
# VerticalAlignment03Dict = {0: 'Top', 1: 'Middle', 2: 'Bottom', 3: 'Justified', 4: 'Distributed'}
VerticalAlignment07Dict = {None: 'Bottom', 'top': 'Top', 'center': 'Middle', 'bottom': 'Bottom',
'distributed': 'Distributed'}
# VerticalAlignment07Dict = {'top': 'Top', 'center': 'Middle', 'bottom': 'Bottom', 'bottom': 'Justified','distributed': 'Distributed'}
Border03Dict = {0: 'No', 1: 'Solid', 2: 'Solid', 5: 'Solid', 13: 'Dotted', 4: 'Dotted', 8: 'Dashed', 3: 'Dashed',
13: 'Dashed', 9: 'DashDot', 10: 'DashDot', 11: 'DashDotDot', 12: 'DashDotDot', 6: 'DoubleThin'}
Border07Dict = {None: 'No', 'thin': 'Solid', 'dotted': 'Dotted', 'dashed': 'Dashed', 'dashDot': 'DashDot',
'dashDotDot': 'DashDotDot', 'double': 'DoubleThin'}
# read CSV data to produce my own data format, but no font, color or cell format supports for CSV file
def readCSV(path):
content = [] # represents one csv file, to store data of one or more sheets
with open(path, 'r') as f: # open file to read
sheetcontent = [] # represents one sheet, to store rows
for line in f:
line = line.replace(' ', '')
if line == '\n':
continue
rowcontent = [] # represents one row, to store cells
line = line.replace('\n', '')
celllist = line.split(',')
for cell in celllist:
cellcontent = {'value': cell, 'size': 10, 'bold': 0, 'italic': 0,
'underline': 0,
'fontname': 'Arial', 'valign': 'Bottom',
'halign': 'Default',
'tborder': 'No', 'bborder': 'No',
'lborder': 'No', 'rborder': 'No'} # store a cell's data and format
rowcontent.append(cellcontent)
#print(line)
sheetcontent.append(rowcontent)
content.append([sheetcontent, path, []])
#print(content)
return content
pass
# read xls( only for suites before office 2003) files' data value, font and cell format( alignment and border)
# to my own data format
def read03xls(path):
workbook = xlrd.open_workbook(path, formatting_info=True) # set 'True' for formatting_info
# to be able to access cell format
sheets = workbook.sheet_names() # read sheets' names to 'sheets'
content = [] # represents one csv file, to store data of one or more sheets
for sheetname in sheets: # read every single sheet
#print('sheet: ', sheetname)
worksheet = workbook.sheet_by_name(sheetname) # get sheet by sheetname
sheetcontent = [] # represents one sheet, to store rows
for i in range(0, worksheet.nrows): # get each row
# row = worksheet.row(i)
rowcontent = [] # represents one row, to store cells
for j in range(0, worksheet.ncols): # get each cell
cell = worksheet.cell(i, j) # get cell object
# print(cell.value, '\t', end='')
fmtindex = cell.xf_index
fmt = workbook.xf_list[fmtindex] # get cell format object by format index
fontindex = fmt.font_index
font = workbook.font_list[fontindex] # get cell font object
size = (font.height / 20)
bold = font.bold
italic = font.italic
underline = font.underlined
fontname = font.name
# color = font.colour_index # font color
# print(font.colour_index)
# bgx = fmt.background.pattern_colour_index # background color
align = fmt.alignment # get alignment object
valign = align.vert_align # vertical alignment
halign = align.hor_align # horizontal alignment
border = fmt.border
lborder = border.left_line_style # left border
rborder = border.right_line_style # right border
tborder = border.top_line_style # top border
bborder = border.bottom_line_style # bottom border
cvalue = cell.value # cell value
cvalue == str(cell.value)
if cvalue is None:
cvalue = ''
cellcontent = {'value': cvalue, 'size': size, 'bold': bold, 'italic': italic,
'underline': underline,
'fontname': 'Arial', 'valign': VerticalAlignment03Dict.get(valign),
'halign': HorizontalAlignment03Dict.get(halign),
'tborder': Border03Dict.get(tborder), 'bborder': Border03Dict.get(bborder),
'lborder': Border03Dict.get(lborder), 'rborder': Border03Dict.get(rborder)}
# store a cell's data and format
rowcontent.append(cellcontent)
sheetcontent.append(rowcontent)
#print()
# print(worksheet.merged_cells)
merge = [] # store sheet's merged cells
for m in worksheet.merged_cells:
pass
merge.append(((m[0], m[2]), (m[1] - 1, m[3] - 1)))
# print(merge)
content.append([sheetcontent, sheetname, merge])
test = fmt.border.top_line_style
# showDetail(test)
#print(content)
return content
# read xlsx( only for suites after office 2007)files' data value, font and cell format( alignment and border)
# to my own data format
def read07xlsx(path):
workbook = openpyxl.load_workbook(path)
sheetnames = workbook.sheetnames
content = [] # represents one csv file, to store data of one or more sheets
for sheetname in sheetnames: # read every single sheet
sheet = workbook[sheetname]
#print('sheet: ', sheetname)
sheetcontent = [] # represents one sheet, to store rows
ii = jj = 1
for row in sheet.rows: # get each row
rowcontent = [] # represents one row, to store cells
jj = 1
for cell in row: # get each cell
# print(cell.value, "\t", end="")
cellfont = cell.font # get font object
# print(ii, jj)
size = cellfont.sz
bold = cellfont.b
italic = cellfont.i
fontname = cellfont.name
underline = cellfont.underline
# color = cellfont.color.rgb # font color
# print(type(cellfont.color.rgb))
# bgx = cell.fill.fgColor.rgb # background color
align = cell.alignment # alignment object
valign = align.vertical # vertical alignment
halign = align.horizontal # horizontal alignment
border = cell.border
lborder = border.left.style # left border
rborder = border.right.style # right border
tborder = border.top.style # top border
bborder = border.bottom.style # bottom border
cvalue = cell.value # cell value
cvalue == str(cell.value)
if cvalue is None:
cvalue = ''
cellcontent = {'value': cvalue, 'size': size, 'bold': bold, 'italic': italic,
'underline': underline,
'fontname': 'Arial', 'valign': VerticalAlignment07Dict.get(valign),
'halign': HorizontalAlignment07Dict.get(halign),
'tborder': Border07Dict.get(tborder), 'bborder': Border07Dict.get(bborder),
'lborder': Border07Dict.get(lborder), 'rborder': Border07Dict.get(rborder)}
# store a cell's data and format
rowcontent.append(cellcontent)
jj += 1
sheetcontent.append(rowcontent)
# print()
ii += 1
# print('--------------------------------------------------')
# print(type(cell).__name__)
# print(cell.coordinate)
merge = [] # store sheet's merged cells
for m in sheet.merged_cells.ranges:
# print(m.__dict__)
merge.append(((m.min_row - 1, m.min_col - 1), (m.max_row - 1, m.max_col - 1)))
# print('--------------------------------------------------')
content.append([sheetcontent, sheetname, merge])
test = cell.border.top
# showDetail(test)
# print('\n',content)
return content
# function used for seeking methods or attributes
def showDetail(t):
print('------This is test part------')
print(str(t), ':', t)
print('-----------------------------')
print('dir():', dir(t))
print('-----------------------------')
print('type():', type(t))
print('-----------------------------')
try:
print('dump():', t.dump)
except Exception as e:
print('Exception:', e)
if __name__ == '__main__':
file_2003 = './2003.xls'
file_2007 = './2007.xlsx'
read03xls(file_2003)
read07xlsx(file_2007)
# readCSV('./cc.csv')