Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
55 commits
Select commit Hold shift + click to select a range
e3ce2c8
Integrate spatial reuse with temporal reuse;
rdxpaper Dec 15, 2017
67beac9
FIxed some bugs
rdxpaper Dec 16, 2017
7add708
If the sampling period is set to 0, leave it be and the corresponding…
rdxpaper Dec 16, 2017
3ef77de
Added the function int read_event_counter(event_thread_t *current,uin…
rdxpaper Dec 16, 2017
7b930bb
Added the mechanism to read the perf counter
rdxpaper Dec 18, 2017
e386bc8
When reading the counter, currently disabled the scaling.
rdxpaper Dec 20, 2017
478d5d5
For the context of triggered point, use the precise PC instead of the…
rdxpaper Dec 30, 2017
f02834e
FIxed a typo
rdxpaper Dec 30, 2017
4731989
Merge branch 'master' of https://github.com/WitchTools/hpctoolkit
rdxpaper Jan 9, 2018
992e6fd
For reuse clients, the calling context becomes [root] reuse_point + j…
rdxpaper Jan 9, 2018
82dc2c5
Added weight field for PERF
rdxpaper Jan 10, 2018
204ab13
Added the metric LATENCY for reuse client
rdxpaper Jan 11, 2018
f4fac97
Fixed a bug
rdxpaper Jan 11, 2018
48b2d70
Set the correct period of the metric "LATENCY"
rdxpaper Jan 12, 2018
cc725a8
Minors
rdxpaper Jan 12, 2018
46afb96
Added data_src for each perf sample;
rdxpaper Jan 19, 2018
d882c50
To read the counter value, we need to scale the value by perf_scale()…
rdxpaper Feb 3, 2018
2cd6568
DEBUGGING MODE: Testing the reuse distance accuracy by "predicting th…
rdxpaper Feb 6, 2018
6778f89
DEBUGGING MODE: Only measure the temporal reuse; Check the reuse dist…
rdxpaper Feb 6, 2018
db21f22
Minors. Still debugging mode
rdxpaper Feb 7, 2018
28f475e
Support sampling both loads and stores
rdxpaper Mar 10, 2018
e88b432
Use another trace file to output the result from reuse client
rdxpaper Mar 10, 2018
8251845
Fixed a bug
rdxpaper Mar 15, 2018
430b1d2
Changed back to support reuse optimization
rdxpaper Mar 15, 2018
0300a71
Minors
rdxpaper Mar 15, 2018
b59b353
We will count the number of LOAD_ABOVE_LATENCY even it is a cache hit
rdxpaper Mar 16, 2018
dd1c6de
Merge branch 'reuse-histo-2' into reuse-histo
rdxpaper Mar 16, 2018
532ea41
Merge branch 'reuse-histo' and use REUSE_HISTO macro
rdxpaper Mar 16, 2018
2e4acee
1). set the monitor length from 4 to 1
rdxpaper Mar 23, 2018
778f05c
NOT a workable version. (But a compilable version)
rdxpaper Mar 28, 2018
526df62
NOT A WORKABLE VERSION (but a compilable version)
rdxpaper Mar 29, 2018
c2c01cb
Workable version. To collect reuse histo, we can just read sampling e…
rdxpaper Mar 30, 2018
9892b2a
Implemented the reuse guide version
rdxpaper Mar 30, 2018
8801537
Fixed some bug
rdxpaper Mar 31, 2018
9d5f4d7
Fixed some bug;
rdxpaper Apr 16, 2018
44d793c
Fixed a bug of overflow
rdxpaper Apr 16, 2018
d473014
Merge remote-tracking branch 'upstream/master'
rdxpaper Apr 18, 2018
7594a8e
Merge remote-tracking branch 'upstream/new_reservoir' into new-reservoir
rdxpaper Apr 18, 2018
0f4821c
Fixed some conflicts
rdxpaper Apr 18, 2018
b154c5d
Added the reservoir sampling patch
rdxpaper Apr 24, 2018
2c3818c
Added online profiling (without tracing)
rdxpaper May 8, 2018
d8b2892
cleaned some code
rdxpaper May 9, 2018
6e218bc
Merge branch 'master' of https://github.com/WitchTools/hpctoolkit
rdxpaper May 9, 2018
2443797
Merge branch 'master' of https://github.com/WitchTools/hpctoolkit
rdxpaper May 10, 2018
0360f54
Fixed a bug of opening perf events more than needed.
rdxpaper May 22, 2018
d3bd412
Merge branch 'master' of https://github.com/WitchTools/hpctoolkit
rdxpaper May 22, 2018
f56e79a
Merge remote-tracking branch 'upstream/master'
rdxpaper Jul 17, 2018
9fcc35c
Added the missing "}" due to resolving the conflicts of previous merge
rdxpaper Jul 17, 2018
74ee8a9
Merge remote-tracking branch 'upstream/master'
rdxpaper Jul 18, 2018
2500cf1
Fixed some counter issues. HPCA submission version
rdxpaper Aug 11, 2018
e59904f
Cleaned some code
rdxpaper Aug 11, 2018
ef41f34
Start to add spatial reuse
rdxpaper Aug 11, 2018
df07a0f
Merge branch 'master' of https://github.com/jqswang/hpctoolkit
rdxpaper Aug 11, 2018
4dfee89
Added latency metrics of different cache levels and more configurable
rdxpaper Jun 13, 2019
77aea3a
Merge branch 'master' of https://github.com/WitchTools/hpctoolkit
rdxpaper Jun 15, 2019
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
80 changes: 49 additions & 31 deletions src/tool/hpcrun/hpcrun_stats.c
Original file line number Diff line number Diff line change
Expand Up @@ -102,8 +102,10 @@ static atomic_long num_falseWWIns = ATOMIC_VAR_INIT(0);
static atomic_long num_falseRWIns = ATOMIC_VAR_INIT(0);
static atomic_long num_falseWRIns = ATOMIC_VAR_INIT(0);

static atomic_long num_reuse = ATOMIC_VAR_INIT(0);
static atomic_long num_reuseSpatial = ATOMIC_VAR_INIT(0);
static atomic_long num_reuseTemporal = ATOMIC_VAR_INIT(0);
static atomic_long num_latency = ATOMIC_VAR_INIT(0);
static atomic_long num_corrected_reuse_distance = ATOMIC_VAR_INIT(0);

static atomic_long num_unwind_intervals_total = ATOMIC_VAR_INIT(0);
static atomic_long num_unwind_intervals_suspicious = ATOMIC_VAR_INIT(0);
Expand Down Expand Up @@ -155,6 +157,11 @@ hpcrun_stats_reinit(void)
atomic_store_explicit(&num_trueWWIns, 0, memory_order_relaxed);
atomic_store_explicit(&num_trueRWIns, 0, memory_order_relaxed);
atomic_store_explicit(&num_trueWRIns, 0, memory_order_relaxed);

atomic_store_explicit(&num_reuseSpatial, 0, memory_order_relaxed);
atomic_store_explicit(&num_reuseTemporal, 0, memory_order_relaxed);
atomic_store_explicit(&num_latency, 0, memory_order_relaxed);
atomic_store_explicit(&num_corrected_reuse_distance, 0, memory_order_relaxed);
}


Expand Down Expand Up @@ -274,95 +281,107 @@ void
hpcrun_stats_num_insane_ip_inc(long val)
{
atomic_fetch_add_explicit(&num_insane_ip, val, memory_order_relaxed);
}
long
}


long
hpcrun_stats_num_insane_ip(void)
{
{
return atomic_load_explicit(&num_insane_ip, memory_order_relaxed);
}
}


void
hpcrun_stats_num_writtenBytes_inc(long val)
{
atomic_fetch_add_explicit(&num_writtenBytes, val, memory_order_relaxed);
}
}


void
hpcrun_stats_num_usedBytes_inc(long val)
{
atomic_fetch_add_explicit(&num_usedBytes, val, memory_order_relaxed);
}
}

void
hpcrun_stats_num_deadBytes_inc(long val)
{
atomic_fetch_add_explicit(&num_deadBytes, val, memory_order_relaxed);
}
}

void
hpcrun_stats_num_newBytes_inc(long val)
{
atomic_fetch_add_explicit(&num_newBytes, val, memory_order_relaxed);
}
}

void
hpcrun_stats_num_oldAppxBytes_inc(long val)
{
atomic_fetch_add_explicit(&num_oldAppxBytes, val, memory_order_relaxed);
}
}

void
hpcrun_stats_num_oldBytes_inc(long val)
{
atomic_fetch_add_explicit(&num_oldBytes, val, memory_order_relaxed);
}
}

void
hpcrun_stats_num_loadedBytes_inc(long val)
{
atomic_fetch_add_explicit(&num_loadedBytes, val, memory_order_relaxed);
}
}

void
hpcrun_stats_num_accessedIns_inc(long val)
{
atomic_fetch_add_explicit(&num_accessedIns, val, memory_order_relaxed);
}
}

void
hpcrun_stats_num_reuseTemporal_inc(long val)
{
atomic_fetch_add_explicit(&num_reuseTemporal, val, memory_order_relaxed);
}

void
hpcrun_stats_num_reuse_inc(long val)
hpcrun_stats_num_reuseSpatial_inc(long val)
{
atomic_fetch_add_explicit(&num_reuse, val, memory_order_relaxed);
}
atomic_fetch_add_explicit(&num_reuseSpatial, val, memory_order_relaxed);
}

void
hpcrun_stats_num_latency_inc(long val)
{
atomic_fetch_add_explicit(&num_latency, val, memory_order_relaxed);
}
}

void
hpcrun_stats_num_corrected_reuse_distance_inc(long val)
{
atomic_fetch_add_explicit(&num_corrected_reuse_distance, val, memory_order_relaxed);
}

void
hpcrun_stats_num_falseWWIns_inc(long val)
{
atomic_fetch_add_explicit(&num_falseWWIns, val, memory_order_relaxed);
}
}


void
hpcrun_stats_num_falseRWIns_inc(long val)
{
atomic_fetch_add_explicit(&num_falseRWIns, val, memory_order_relaxed);
}
}

void
hpcrun_stats_num_falseWRIns_inc(long val)
{
atomic_fetch_add_explicit(&num_falseWRIns, val, memory_order_relaxed);
}
}

void
hpcrun_stats_num_trueWWIns_inc(long val)
Expand All @@ -384,7 +403,7 @@ hpcrun_stats_num_trueWRIns_inc(long val)


//-----------------------------
// samples total
// samples total
//-----------------------------

void
Expand All @@ -403,7 +422,7 @@ hpcrun_stats_num_samples_total(void)


//-----------------------------
// samples attempted
// samples attempted
//-----------------------------

void
Expand All @@ -422,7 +441,7 @@ hpcrun_stats_num_samples_attempted(void)


//-----------------------------
// samples blocked async
// samples blocked async
//-----------------------------

// The async blocks happen in the signal handlers, without getting to
Expand All @@ -444,7 +463,7 @@ hpcrun_stats_num_samples_blocked_async(void)


//-----------------------------
// samples blocked dlopen
// samples blocked dlopen
//-----------------------------

void
Expand Down Expand Up @@ -617,7 +636,6 @@ hpcrun_stats_num_samples_yielded(void)
//-----------------------------
// print summary
//-----------------------------

void
hpcrun_stats_print_summary(void)
{
Expand All @@ -637,9 +655,9 @@ hpcrun_stats_print_summary(void)
getrusage(RUSAGE_SELF, &rusage);

//AMSG("WATCHPOINT ANOMALIES: samples:%ld, SM_imprecise:%ld, WP_Set:%ld, WP_triggered:%ld, WP_SampleTriggering:%ld, WP_ImpreciseIP:%ld, WP_InsaneIP:%ld, WP_Off8Addr:%ld, WP_ImpreciseAddr:%ld, WP_Dropped:%ld", num_samples_total, num_samples_imprecise, num_watchpoints_set, num_watchpoints_triggered, num_sample_triggering_watchpoints, num_watchpoints_imprecise, num_insane_ip, num_watchpoints_imprecise_address_8_byte, num_watchpoints_imprecise_address, num_watchpoints_dropped);
AMSG("WATCHPOINT ANOMALIES: samples:%.2e, SM_imprecise:%.2e, WP_Set:%.2e, WP_triggered:%.2e, WP_SampleTriggering:%.2e, WP_ImpreciseIP:%.2e, WP_InsaneIP:%.2e, WP_Off8Addr:%.2e, WP_ImpreciseAddr:%.2e, WP_Dropped:%.2e", (double)atomic_load(&num_samples_total), (double)atomic_load(&num_samples_imprecise), (double)atomic_load(&num_watchpoints_set), (double)atomic_load(&num_watchpoints_triggered), (double)atomic_load(&num_sample_triggering_watchpoints), (double)atomic_load(&num_watchpoints_imprecise), (double)atomic_load(&num_insane_ip), (double)atomic_load(&num_watchpoints_imprecise_address_8_byte), (double)atomic_load(&num_watchpoints_imprecise_address), (double)atomic_load(&num_watchpoints_dropped));
AMSG("WATCHPOINT ANOMALIES: samples:%.2e, SM_imprecise:%.2e, WP_Set:%.2e, WP_triggered:%.2e, WP_SampleTriggering:%.2e, WP_ImpreciseIP:%.2e, WP_InsaneIP:%.2e, WP_Off8Addr:%.2e, WP_ImpreciseAddr:%.2e, WP_Dropped:%.2e, CORRECTED_REUSE_DISTANCE:%.2e", (double)atomic_load(&num_samples_total), (double)atomic_load(&num_samples_imprecise), (double)atomic_load(&num_watchpoints_set), (double)atomic_load(&num_watchpoints_triggered), (double)atomic_load(&num_sample_triggering_watchpoints), (double)atomic_load(&num_watchpoints_imprecise), (double)atomic_load(&num_insane_ip), (double)atomic_load(&num_watchpoints_imprecise_address_8_byte), (double)atomic_load(&num_watchpoints_imprecise_address), (double)atomic_load(&num_watchpoints_dropped), (double)atomic_load(&num_corrected_reuse_distance));

AMSG("WATCHPOINT STATS: writtenBytes:%ld, usedBytes:%ld, deadBytes:%ld, newBytes:%ld, oldBytes:%ld, oldAppxBytes:%ld, loadedBytes:%ld, accessedIns:%ld, falseWWIns:%ld, falseRWIns:%ld, falseWRIns:%ld, trueWWIns:%ld, trueRWIns:%ld, trueWRIns:%ld, RSS:%ld, reuse:%ld, latency:%ld", num_writtenBytes, num_usedBytes, num_deadBytes, num_newBytes, num_oldBytes, num_oldAppxBytes, num_loadedBytes, num_accessedIns, num_falseWWIns, num_falseRWIns, num_falseWRIns, num_trueWWIns, num_trueRWIns, num_trueWRIns, (size_t)(rusage.ru_maxrss), num_reuse, num_latency);
AMSG("WATCHPOINT STATS: writtenBytes:%ld, usedBytes:%ld, deadBytes:%ld, newBytes:%ld, oldBytes:%ld, oldAppxBytes:%ld, loadedBytes:%ld, accessedIns:%ld, falseWWIns:%ld, falseRWIns:%ld, falseWRIns:%ld, trueWWIns:%ld, trueRWIns:%ld, trueWRIns:%ld, RSS:%ld, reuseTemporal:%ld, reuseSpatial:%ldlatency:%ld", num_writtenBytes, num_usedBytes, num_deadBytes, num_newBytes, num_oldBytes, num_oldAppxBytes, num_loadedBytes, num_accessedIns, num_falseWWIns, num_falseRWIns, num_falseWRIns, num_trueWWIns, num_trueRWIns, num_trueWRIns, (size_t)(rusage.ru_maxrss), num_reuseTemporal, num_reuseSpatial, num_latency);

AMSG("SAMPLE ANOMALIES: blocks: %ld (async: %ld, dlopen: %ld), "
"errors: %ld (segv: %ld, soft: %ld)",
Expand Down
13 changes: 7 additions & 6 deletions src/tool/hpcrun/hpcrun_stats.h
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@
void hpcrun_stats_reinit(void);

//-----------------------------
// watchpoint
// watchpoint
//-----------------------------
void hpcrun_stats_num_samples_imprecise_inc(long val);
long hpcrun_stats_num_samples_imprecise(void);
Expand Down Expand Up @@ -85,36 +85,37 @@ void hpcrun_stats_num_deadBytes_inc(long val);
void hpcrun_stats_num_newBytes_inc(long val);
void hpcrun_stats_num_oldBytes_inc(long val);
void hpcrun_stats_num_oldAppxBytes_inc(long val);
void hpcrun_stats_num_reuse_inc(long val);
void hpcrun_stats_num_reuseTemporal_inc(long val);
void hpcrun_stats_num_reuseSpatial_inc(long val);
void hpcrun_stats_num_loadedBytes_inc(long val);


//-----------------------------
// samples total
// samples total
//-----------------------------

void hpcrun_stats_num_samples_total_inc(void);
long hpcrun_stats_num_samples_total(void);


//-----------------------------
// samples attempted
// samples attempted
//-----------------------------

void hpcrun_stats_num_samples_attempted_inc(void);
long hpcrun_stats_num_samples_attempted(void);


//-----------------------------
// samples blocked async
// samples blocked async
//-----------------------------

void hpcrun_stats_num_samples_blocked_async_inc(void);
long hpcrun_stats_num_samples_blocked_async(void);


//-----------------------------
// samples blocked dlopen
// samples blocked dlopen
//-----------------------------

void hpcrun_stats_num_samples_blocked_dlopen_inc(void);
Expand Down
32 changes: 18 additions & 14 deletions src/tool/hpcrun/metrics.c
Original file line number Diff line number Diff line change
Expand Up @@ -211,7 +211,7 @@ hpcrun_get_num_metrics()
id2metric[l->id] = &(l->val);
}
metric_proc_tbl = (metric_upd_proc_t**) hpcrun_malloc(n_metrics * sizeof(metric_upd_proc_t*));

for(metric_proc_map_t* l = proc_map; l; l = l->next) {
// for(metric_proc_map_t* l = proc_map; l; l = l->next) {
TMSG(METRICS_FINALIZE, "metric_proc[%d] = %p", l->id, l->proc);
Expand All @@ -233,15 +233,15 @@ hpcrun_get_num_metrics()

// Finalize metrics

void hpcrun_finalize_metrics()
void hpcrun_finalize_metrics()
{
hpcrun_get_num_metrics();
}

metric_desc_t*
hpcrun_id2metric(int metric_id)
{
hpcrun_get_num_metrics();
hpcrun_get_num_metrics();
if ((0 <= metric_id) && (metric_id < n_metrics)) {
return id2metric[metric_id];
}
Expand Down Expand Up @@ -307,7 +307,7 @@ hpcrun_new_metric_of_kind(kind_info_t* kind)
kind->idx++;

n_metrics++;

//
// No preallocation for metric_proc tbl
//
Expand All @@ -316,7 +316,7 @@ hpcrun_new_metric_of_kind(kind_info_t* kind)
m->id = metric_data->id;
m->proc = (metric_upd_proc_t*) NULL;
proc_map = m;

return metric_data->id;
}

Expand All @@ -326,7 +326,7 @@ hpcrun_new_metric(void)
return hpcrun_new_metric_of_kind(current_kind);
}

metric_desc_t*
metric_desc_t*
hpcrun_set_metric_info_w_fn(int metric_id, const char* name,
MetricFlags_ValFmt_t valFmt, size_t period,
metric_upd_proc_t upd_fn, metric_desc_properties_t prop)
Expand Down Expand Up @@ -377,7 +377,7 @@ hpcrun_set_metric_info_w_fn(int metric_id, const char* name,
}


metric_desc_t*
metric_desc_t*
hpcrun_set_metric_info_and_period(int metric_id, const char* name,
MetricFlags_ValFmt_t valFmt, size_t period, metric_desc_properties_t prop)
{
Expand Down Expand Up @@ -454,7 +454,7 @@ hpcrun_metric_std(int metric_id, metric_set_t* set,
switch (minfo->flags.fields.valFmt) {
case MetricFlags_ValFmt_Int:
if (operation == '+')
loc->i += val.i;
loc->i += val.i;
else if (operation == '=')
loc->i = val.i;
break;
Expand All @@ -479,13 +479,13 @@ hpcrun_metric_std_set(int metric_id, metric_set_t* set,
}

//
// Given two metrics, metric_id1 and metric_id2,
// bump up metric_id2 to reach metric_id1 and return
// Given two metrics, metric_id1 and metric_id2,
// bump up metric_id2 to reach metric_id1 and return
// the difference between them multiplied by the period.
//
int
hpcrun_get_weighted_metric_diff(int metric_id1, int metric_id2,
metric_set_t* set, cct_metric_data_t * diff,
hpcrun_get_weighted_metric_diff(int metric_id1, int metric_id2,
metric_set_t* set, cct_metric_data_t * diff,
cct_metric_data_t * diffWithPeriod)
{
metric_desc_t* minfo1 = hpcrun_id2metric(metric_id1);
Expand All @@ -509,8 +509,12 @@ hpcrun_get_weighted_metric_diff(int metric_id1, int metric_id2,
diff->i = (loc1->i - loc2->i);
break;
case MetricFlags_ValFmt_Real:
assert(loc1->r >= loc2->r);
diff->r = (loc1->r - loc2->r);
if (loc1->r < loc2->r){
diff->r = 0;
}
else {
diff->r = (loc1->r - loc2->r);
}
diffWithPeriod->r = (loc1->r - loc2->r) * minfo1->period;
break;
default:
Expand Down
Loading