Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view

This file was deleted.

Original file line number Diff line number Diff line change
@@ -1,35 +1,45 @@
USE GRAPH Mule_Account_Detection

CREATE DISTRIBUTED QUERY average_money_received_by_one_hop_neighbors(
DATETIME min_createTime = to_datetime("2019-01-06"),
DATETIME max_createTime = to_datetime("2022-01-08") ) FOR GRAPH Mule_Account_Detection {
DATETIME min_createTime = to_datetime("2019-01-06"),
DATETIME max_createTime = to_datetime("2022-01-08")
) FOR GRAPH Mule_Account_Detection {

/*
* Feature:
average money, total money received by all 1 hop away neighbors from this account (considering transfer in as edges)
and the count of one hop accounts
*/

SumAccum<DOUBLE> @total_amount;
AvgAccum @avg_amount;
SetAccum<STRING> @accounts;


rlt = SELECT s
/*
Query Overview:
This is a feature engineering query: it calculates the average amount of money, total money received,
and the count of one-hop neighbors for each account within a specified time range. The result provides
insights into the transaction behavior of one-hop neighbors linked through incoming transfers.

Parameters:
- min_createTime, max_createTime: Filter transactions within this time range.

Outputs:
- Total_One_Hop_Amount: Total money received by all one-hop neighbors.
- Average_One_Hop_Amount: Average amount received by one-hop neighbors.
- Count_One_Hop_Accounts: Total count of unique one-hop accounts.
*/

SumAccum<DOUBLE> @total_amount;
AvgAccum @avg_amount;
SetAccum<STRING> @accounts;

rlt =
SELECT s
FROM Account:s -(Send_Transfer>)- Transfer_Transaction:ta -(Receive_Transfer>)- Account:d
-(<Receive_Transfer)- Transfer_Transaction:td
-(<Receive_Transfer)- Transfer_Transaction:td
WHERE ta.transfer_time >= min_createTime
AND ta.transfer_time < max_createTime
AND td.transfer_time >= min_createTime
AND td.transfer_time < max_createTime
ACCUM s.@total_amount += td.amount,
s.@avg_amount +=td.amount,
s.@accounts += d.id;

PRINT rlt[
rlt.@total_amount AS Total_One_Hop_Amount,
rlt.@avg_amount AS Average_One_Hop_Amount,
rlt.@accounts.size() AS Count_One_Hop_Accounts
];

}
AND ta.transfer_time < max_createTime
AND td.transfer_time >= min_createTime
AND td.transfer_time < max_createTime
ACCUM
s.@total_amount += td.amount,
s.@avg_amount += td.amount,
s.@accounts += d.id;

PRINT rlt[
rlt.@total_amount AS Total_One_Hop_Amount,
rlt.@avg_amount AS Average_One_Hop_Amount,
rlt.@accounts.size() AS Count_One_Hop_Accounts
];
}
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@ CREATE OR REPLACE QUERY insights_get_binary_classification_ratios (
INT tg_false_negative,
INT tg_false_positive
) {
// This is an query to be installed to support Insights Application binary classification ratios

TYPEDEF TUPLE <STRING Performance_Metrics, DOUBLE Percentage_Without_TG, DOUBLE Percentage_With_TG> Table_Entry;
ListAccum<Table_Entry> @@performance_metrics_stats;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ CREATE OR REPLACE QUERY insights_get_net_gain_numbers_and_percentages (
INT tg_false_positive,
INT tg_false_negative
) {
// This is an query to be installed to support Insights Application get net gain numbers and percentages
INT additional_fraud_blocked = tg_true_positive - no_tg_true_positive;
DOUBLE additional_fraud_blocked_pct = (100.0 * additional_fraud_blocked) / no_tg_true_positive;
STRING additional_fraud_blocked_pct_str = to_string(round(additional_fraud_blocked_pct, 3)) + " %";
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ CREATE DISTRIBUTED QUERY insights_shortest_path(
Vertex<Account> ver
// 0448743965
) FOR GRAPH Mule_Account_Detection {

//This is an insights query to be installed so that Insights App Shortest Path can be built upon.
SetAccum<EDGE> @@fraud;
OrAccum @or_visted;
ListAccum<VERTEX> @path_list;
Expand Down
Original file line number Diff line number Diff line change
@@ -1,52 +1,76 @@
USE GRAPH Mule_Account_Detection

CREATE DISTRIBUTED QUERY n_hop_fraud_count_device(
INT max_hops=3) FOR GRAPH Mule_Account_Detection {

/*
This query determines number of fraudulent accounts that can be reached in max_hops of device sharing
*/

SetAccum<STRING> @sources;
SumAccum<INT> @last_iter_source_size;
MinAccum<INT> @cnt;

mule = SELECT a
FROM Account:a
WHERE a.is_fraud == 1;

fraudster = SELECT p
FROM mule:a -(Party_Has_Account:e)- Party:p
WHERE a.is_fraud == 1
ACCUM p.@sources += p.id
POST-ACCUM p.@last_iter_source_size = p.@sources.size();
INT max_hops = 3
) FOR GRAPH Mule_Account_Detection {

/*
Query Overview:
This is a feature engineering query: it calculates the number of fraudulent accounts reachable within a
specified number of hops (max_hops) through device-sharing connections. This information can help assess
fraud risk by analyzing the spread of fraud across devices shared between accounts.

Feature Storage Approaches:
- Approach 1: Store features as attributes of the Account vertex, allowing downstream ML models to access features directly from these attributes.
- Approach 2: Output features to CSV files, which downstream ML models can read for feature access.

Note: This solution uses Approach 1 for direct feature updates on Account vertices.

Parameters:
- max_hops: Specifies the maximum number of hops for device-sharing traversal.

Outputs:
- fraud_device: Attribute on Account vertices representing the count of reachable fraudulent accounts.
*/

SetAccum<STRING> @sources;
SumAccum<INT> @last_iter_source_size;
MinAccum<INT> @cnt;

// Identify fraudulent accounts
mule =
SELECT a
FROM Account:a
WHERE a.is_fraud == 1;

// Initialize fraudster accounts linked to fraudulent accounts through shared devices
fraudster =
SELECT p
FROM mule:a -(Party_Has_Account:e)- Party:p
WHERE a.is_fraud == 1
ACCUM p.@sources += p.id
POST-ACCUM p.@last_iter_source_size = p.@sources.size();

start = fraudster;

WHILE start.size() > 0 LIMIT max_hops DO

dest = SELECT t
FROM start:s -(Has_Device:h1)- Device:d -(Has_Device:h2)- Party:t
WHERE t.id != s.id
ACCUM t.@sources += s.@sources;

start = SELECT t
FROM dest:t
WHERE t.@sources.size() > t.@last_iter_source_size
POST-ACCUM t.@last_iter_source_size = t.@sources.size();

END;

mule = SELECT a
FROM Account:a -(Party_Has_Account:e)- Party:p
ACCUM a.@cnt = p.@sources.size()
HAVING a.@cnt >0;

PRINT mule.size();
rlt = SELECT a
start = fraudster;

// Traverse up to max_hops through shared devices
WHILE start.size() > 0 LIMIT max_hops DO
dest =
SELECT t
FROM start:s -(Has_Device:h1)- Device:d -(Has_Device:h2)- Party:t
WHERE t.id != s.id
ACCUM t.@sources += s.@sources;

start =
SELECT t
FROM dest:t
WHERE t.@sources.size() > t.@last_iter_source_size
POST-ACCUM t.@last_iter_source_size = t.@sources.size();
END;

// Count and assign reachable fraudulent accounts for each account
mule =
SELECT a
FROM Account:a -(Party_Has_Account:e)- Party:p
ACCUM a.@cnt = p.@sources.size()
HAVING a.@cnt > 0;

PRINT mule.size();

rlt =
SELECT a
FROM mule:a
POST-ACCUM
a.setAttr("fraud_device", a.@cnt);
POST-ACCUM a.setAttr("fraud_device", a.@cnt);

PRINT "Finished successfully" AS status;

}
}
Loading