|
| 1 | +import os |
| 2 | + |
| 3 | +import matplotlib.pyplot as plt |
| 4 | +import numpy as np |
| 5 | +import pandas as pd |
| 6 | + |
| 7 | + |
| 8 | +def plot(file_formats, num_datasets, dimensions): |
| 9 | + # Generate two plots - one for the read / write times and one for the dataset create / open times |
| 10 | + if not os.path.exists('datasets_test/data/plots'): |
| 11 | + os.mkdir('datasets_test/data/plots') |
| 12 | + create_time, write_time, open_time, read_time, error = process_csv(file_formats, num_datasets, dimensions) |
| 13 | + width = .25 |
| 14 | + |
| 15 | + plt.figure(1) |
| 16 | + plt_labels = ['Dataset Read Time', 'Dataset Write Time'] |
| 17 | + x = np.arange(len(plt_labels)) |
| 18 | + offset = -width |
| 19 | + plt.ylabel('Time (ms)') |
| 20 | + plt.title(f'{num_datasets} Datasets {dimensions} Elements Dataset Read / Write Times') |
| 21 | + plt.xticks(x, plt_labels) |
| 22 | + for i in range(0, len(file_formats)): |
| 23 | + # Round to 5 decimal places so data shows nicely |
| 24 | + read_time_rounded = round(read_time[i], 5) |
| 25 | + write_time_rounded = round(write_time[i], 5) |
| 26 | + read_error = error[i][3] |
| 27 | + write_error = error[i][1] |
| 28 | + bar_create_open = plt.bar(x=x + offset, height=[read_time_rounded, write_time_rounded], width=width, |
| 29 | + label=file_formats[i], edgecolor='black', yerr=[read_error, write_error]) |
| 30 | + plt.bar_label(bar_create_open, padding=3) |
| 31 | + offset += width |
| 32 | + plt.legend() |
| 33 | + plt.tight_layout() |
| 34 | + plt.savefig(f'datasets_test/data/plots/{num_datasets}_{dimensions}_read_write.png') |
| 35 | + # plt.show() |
| 36 | + plt.cla() |
| 37 | + plt.clf() |
| 38 | + |
| 39 | + plt.figure(2) |
| 40 | + plt_labels = ['Dataset Create Time', 'Dataset Open Time'] |
| 41 | + x = np.arange(len(plt_labels)) |
| 42 | + offset = -width |
| 43 | + plt.ylabel('Time (ms)') |
| 44 | + plt.title(f'{num_datasets} Datasets {dimensions} Elements Dataset Create / Open Times') |
| 45 | + plt.xticks(x, plt_labels) |
| 46 | + for i in range(0, len(file_formats)): |
| 47 | + # Round to 5 decimal places, so that it displays nicely on the plot. |
| 48 | + create_time_rounded = round(create_time[i], 5) |
| 49 | + open_time_rounded = round(open_time[i], 5) |
| 50 | + create_error = error[i][0] |
| 51 | + open_error = error[i][2] |
| 52 | + bar_read_write = plt.bar(x=x + offset, height=[create_time_rounded, open_time_rounded], width=width, |
| 53 | + label=file_formats[i], edgecolor='black', yerr=[create_error, open_error]) |
| 54 | + plt.bar_label(bar_read_write, padding=3) |
| 55 | + offset += width |
| 56 | + plt.legend() |
| 57 | + plt.tight_layout() |
| 58 | + plt.savefig(f'datasets_test/data/plots/{num_datasets}_{dimensions}_create_open.png') |
| 59 | + # plt.show() |
| 60 | + plt.cla() |
| 61 | + plt.clf() |
| 62 | + |
| 63 | + |
| 64 | +def process_csv(file_formats, num_datasets, dimensions): |
| 65 | + # Calculate the average value in each column of the provided CSV file. |
| 66 | + # Append it to the file if not already appended. |
| 67 | + # Return these average times to be plotted. |
| 68 | + total_dataset_create_time = [] |
| 69 | + total_dataset_write_time = [] |
| 70 | + total_dataset_open_time = [] |
| 71 | + total_dataset_read_time = [] |
| 72 | + error = [] |
| 73 | + for file_format in file_formats: |
| 74 | + df = pd.read_csv(f'datasets_test/data/{file_format}_{num_datasets}_{dimensions}.csv') |
| 75 | + dataset_create_time, dataset_write_time, dataset_open_time, dataset_read_time = df.iloc[:, 1:].mean(axis=0) |
| 76 | + create_deviation, write_deviation, open_deviation, read_deviation = df.iloc[:, 1:].std(axis=0) |
| 77 | + total_dataset_create_time.append(dataset_create_time) |
| 78 | + total_dataset_write_time.append(dataset_write_time) |
| 79 | + total_dataset_open_time.append(dataset_open_time) |
| 80 | + total_dataset_read_time.append(dataset_read_time) |
| 81 | + error.append([create_deviation, write_deviation, open_deviation, read_deviation]) |
| 82 | + if df.iloc[-1, 0] == 'Average': |
| 83 | + # Go to next iteration if the last column of the CSV file has the average times |
| 84 | + continue |
| 85 | + average_values = pd.DataFrame({ |
| 86 | + file_format: 'Average', |
| 87 | + 'Dataset Creation Time': [dataset_create_time], |
| 88 | + 'Dataset Write Time': [dataset_write_time], |
| 89 | + 'Dataset Open Time': [dataset_open_time], |
| 90 | + 'Dataset Read Time': [dataset_read_time] |
| 91 | + }) |
| 92 | + df = pd.concat([df, average_values], ignore_index=True) |
| 93 | + df.to_csv(f'datasets_test/data/{file_format}_{num_datasets}_{dimensions}.csv', index=False) |
| 94 | + return total_dataset_create_time, total_dataset_write_time, total_dataset_open_time, total_dataset_read_time, error |
0 commit comments