Skip to content

Commit 691bc15

Browse files
Merge pull request #9 from JiscDACT/CDSPT-13954-spike-memory-leak
Cdspt 13954 spike memory leak
2 parents f909be2 + 19febaf commit 691bc15

3 files changed

Lines changed: 20 additions & 12 deletions

File tree

mario/hyper_utils.py

Lines changed: 16 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -347,15 +347,15 @@ def save_hyper_as_csv(hyper_file: str, file_path: str, **kwargs):
347347

348348
# Get column names
349349
column_names = ','.join(f'"{column}"' for column in columns)
350-
sql = f"SELECT {column_names} FROM \"{schema}\".\"{table}\" ORDER BY row_number"
351-
offset = 0
352350

353351
if options.use_pantab:
354352
# Use pantab to stream hyper to csv
355353
logging.info('Stream hyper to csv with pantab.')
356354

357355
mode = 'w'
358356
header = True
357+
sql = f"SELECT {column_names} FROM \"{schema}\".\"{table}\" ORDER BY row_number"
358+
offset = 0
359359

360360
while True:
361361
query = f"{sql} LIMIT {options.chunk_size} OFFSET {offset}"
@@ -375,22 +375,30 @@ def save_hyper_as_csv(hyper_file: str, file_path: str, **kwargs):
375375
with HyperProcess(Telemetry.DO_NOT_SEND_USAGE_DATA_TO_TABLEAU, 'test') as hyper:
376376
with Connection(endpoint=hyper.endpoint, database=temp_hyper) as connection:
377377

378+
# Get min and max value from row_number
379+
sql_range = f"SELECT MIN(row_number), MAX(row_number) FROM \"{schema}\".\"{table}\""
380+
value_range = connection.execute_query(sql_range)
381+
start_val, end_val = list(value_range)[0]
382+
logging.info(f"Get row_number range: [{str(start_val)}, {str(end_val)}]")
383+
384+
sql = f"SELECT {column_names} FROM \"{schema}\".\"{table}\""
385+
378386
with open_func(file_path, mode, newline='', encoding="utf-8") as f:
379387

380388
writer = csv.writer(f)
381389
# write header
382390
writer.writerow(columns)
383391

384-
while True:
385-
query = f"{sql} LIMIT {options.chunk_size} OFFSET {offset}"
392+
while start_val <= end_val:
393+
chunk_end = min(start_val+options.chunk_size-1, end_val)
394+
query = f"{sql} WHERE row_number BETWEEN {start_val} AND {chunk_end}"
395+
logging.info(f"Query between {start_val} and {chunk_end}")
396+
386397
result = connection.execute_query(query)
387398

388399
rows = list(result)
389-
if not rows:
390-
break
391-
392400
writer.writerows(rows)
393-
offset += options.chunk_size
401+
start_val += options.chunk_size
394402

395403

396404
def save_dataframe_as_hyper(df, file_path, **kwargs):

setup.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22

33
setup(
44
name='mario-pipeline-tools',
5-
version='0.59',
5+
version='0.60',
66
packages=['mario'],
77
url='https://github.com/JiscDACT/mario',
88
license='all rights reserved',

test/test_data_extractor.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -553,7 +553,7 @@ def test_hyper_to_csv():
553553
compress_using_gzip=False
554554
)
555555
assert extractor.get_total() == 10194
556-
assert extractor.get_total(measure='Sales') == 2326534.354299952
556+
assert round(extractor.get_total(measure='Sales'), 2) == 2326534.35
557557

558558
df = pd.read_csv(output_file)
559559
assert round(df['Sales'].sum(), 4) == 2326534.3543
@@ -585,7 +585,7 @@ def test_hyper_to_csv_without_copy_to_tmp():
585585
do_not_modify_source=False
586586
)
587587
assert extractor.get_total() == 10194
588-
assert extractor.get_total(measure='Sales') == 2326534.3542999607
588+
assert round(extractor.get_total(measure='Sales'), 2) == 2326534.35
589589

590590
df = pd.read_csv(output_file)
591591
assert round(df['Sales'].sum(), 4) == 2326534.3543
@@ -618,7 +618,7 @@ def test_hyper_to_csv_without_using_pantab():
618618
use_pantab=False
619619
)
620620
assert extractor.get_total() == 10194
621-
assert extractor.get_total(measure='Sales') == 2326534.3542999607
621+
assert round(extractor.get_total(measure='Sales'), 2) == 2326534.35
622622

623623
df = pd.read_csv(output_file)
624624
assert round(df['Sales'].sum(), 4) == 2326534.3543

0 commit comments

Comments
 (0)