From 0ae1ab44b320b8dd43f0171726daf4706e42b468 Mon Sep 17 00:00:00 2001 From: Truman Wang Date: Wed, 6 Nov 2024 15:22:54 -0800 Subject: [PATCH 1/5] add S3-like output file example --- .../queries/community_transaction_average_amount.gsql | 3 ++- .../transaction_fraud/queries/community_transaction_count.gsql | 3 ++- .../queries/community_transaction_max_amount.gsql | 3 ++- .../queries/community_transaction_min_amount.gsql | 3 ++- .../queries/community_transaction_total_amount.gsql | 3 ++- .../queries/merchant_category_transaction_average_amount.gsql | 3 ++- .../queries/merchant_category_transaction_count.gsql | 3 ++- .../queries/merchant_category_transaction_maximum_amount.gsql | 3 ++- .../queries/merchant_category_transaction_min_amount.gsql | 3 ++- .../queries/merchant_category_transaction_total_amount.gsql | 3 ++- 10 files changed, 20 insertions(+), 10 deletions(-) diff --git a/financial_crime/transaction_fraud/queries/community_transaction_average_amount.gsql b/financial_crime/transaction_fraud/queries/community_transaction_average_amount.gsql index 338d53c3..1621138a 100644 --- a/financial_crime/transaction_fraud/queries/community_transaction_average_amount.gsql +++ b/financial_crime/transaction_fraud/queries/community_transaction_average_amount.gsql @@ -1,6 +1,7 @@ USE GRAPH Transaction_Fraud CREATE OR REPLACE DISTRIBUTED QUERY community_transaction_average_amount( - //STRING file_path = "/home/tigergraph/gsql_output/community_count_transaction.csv", + //STRING file_path = "/home/tigergraph/gsql_output/print_example.csv", + //STRING file_path = "s3://bucket-name/path/print_example_file.txt", //bool save_csv=false, STRING community_type = "Merchant", //change to "Card" when run query on Card Community STRING result_attribute = "com_mer_txn_avg_amt", //change to "com_cd_txn_avg_amt" when run query on Card Community diff --git a/financial_crime/transaction_fraud/queries/community_transaction_count.gsql b/financial_crime/transaction_fraud/queries/community_transaction_count.gsql index a6532515..33a8f51a 100644 --- a/financial_crime/transaction_fraud/queries/community_transaction_count.gsql +++ b/financial_crime/transaction_fraud/queries/community_transaction_count.gsql @@ -1,6 +1,7 @@ USE GRAPH Transaction_Fraud CREATE OR REPLACE DISTRIBUTED QUERY community_transaction_count( - //STRING file_path = "/home/tigergraph/gsql_output/community_count_transaction.csv", + //STRING file_path = "/home/tigergraph/gsql_output/print_example.csv", + //STRING file_path = "s3://bucket-name/path/print_example_file.txt", //bool save_csv=false, STRING community_type = "Merchant", //change to "Card" when run query on Card Community STRING result_attribute = "com_mer_txn_cnt", //change to "com_cd_txn_cnt" when run query on Card Community diff --git a/financial_crime/transaction_fraud/queries/community_transaction_max_amount.gsql b/financial_crime/transaction_fraud/queries/community_transaction_max_amount.gsql index f1e32ad9..9731849f 100644 --- a/financial_crime/transaction_fraud/queries/community_transaction_max_amount.gsql +++ b/financial_crime/transaction_fraud/queries/community_transaction_max_amount.gsql @@ -1,6 +1,7 @@ USE GRAPH Transaction_Fraud CREATE OR REPLACE DISTRIBUTED QUERY community_transaction_max_amount( - //STRING file_path = "/home/tigergraph/gsql_output/community_count_transaction.csv", + //STRING file_path = "/home/tigergraph/gsql_output/print_example.csv", + //STRING file_path = "s3://bucket-name/path/print_example_file.txt", //bool save_csv=false, STRING community_type = "Merchant", //change to "Card" when run query on Card Community STRING result_attribute = "com_mer_txn_max_amt", //change to "com_cd_txn_max_amt" when run query on Card Community diff --git a/financial_crime/transaction_fraud/queries/community_transaction_min_amount.gsql b/financial_crime/transaction_fraud/queries/community_transaction_min_amount.gsql index bd7090ee..98520df6 100644 --- a/financial_crime/transaction_fraud/queries/community_transaction_min_amount.gsql +++ b/financial_crime/transaction_fraud/queries/community_transaction_min_amount.gsql @@ -1,6 +1,7 @@ USE GRAPH Transaction_Fraud CREATE OR REPLACE DISTRIBUTED QUERY community_transaction_min_amount( - //STRING file_path = "/home/tigergraph/gsql_output/community_count_transaction.csv", + //STRING file_path = "/home/tigergraph/gsql_output/print_example.csv", + //STRING file_path = "s3://bucket-name/path/print_example_file.txt", //bool save_csv=false, STRING community_type = "Merchant", //change to "Card" when run query on Card Community STRING result_attribute = "com_mer_txn_min_amt", //change to "com_cd_txn_min_amt" when run query on Card Community diff --git a/financial_crime/transaction_fraud/queries/community_transaction_total_amount.gsql b/financial_crime/transaction_fraud/queries/community_transaction_total_amount.gsql index f3f813c7..a3dbe194 100644 --- a/financial_crime/transaction_fraud/queries/community_transaction_total_amount.gsql +++ b/financial_crime/transaction_fraud/queries/community_transaction_total_amount.gsql @@ -1,6 +1,7 @@ USE GRAPH Transaction_Fraud CREATE OR REPLACE DISTRIBUTED QUERY community_transaction_total_amount( - //STRING file_path = "/home/tigergraph/gsql_output/community_count_transaction.csv", + //STRING file_path = "/home/tigergraph/gsql_output/print_example.csv", + //STRING file_path = "s3://bucket-name/path/print_example_file.txt", //bool save_csv=false, STRING community_type = "Merchant", //change to "Card" when run query on Card Community STRING result_attribute = "com_mer_txn_total_amt", //change to "com_cd_txn_total_amt" when run query on Card Community diff --git a/financial_crime/transaction_fraud/queries/merchant_category_transaction_average_amount.gsql b/financial_crime/transaction_fraud/queries/merchant_category_transaction_average_amount.gsql index 05a49aaf..9c7a8c4c 100644 --- a/financial_crime/transaction_fraud/queries/merchant_category_transaction_average_amount.gsql +++ b/financial_crime/transaction_fraud/queries/merchant_category_transaction_average_amount.gsql @@ -1,6 +1,7 @@ USE GRAPH Transaction_Fraud CREATE OR REPLACE DISTRIBUTED QUERY merchant_category_transaction_average_amount( -//STRING file_path = "/home/tigergraph/gsql_output/community_count_transaction.csv", +//STRING file_path = "/home/tigergraph/gsql_output/print_example.csv", +//STRING file_path = "s3://bucket-name/path/print_example_file.txt", //bool save_csv=false, bool print_results=false ) FOR GRAPH Transaction_Fraud SYNTAX v3 { diff --git a/financial_crime/transaction_fraud/queries/merchant_category_transaction_count.gsql b/financial_crime/transaction_fraud/queries/merchant_category_transaction_count.gsql index a82c9c95..7d1bae8a 100644 --- a/financial_crime/transaction_fraud/queries/merchant_category_transaction_count.gsql +++ b/financial_crime/transaction_fraud/queries/merchant_category_transaction_count.gsql @@ -1,6 +1,7 @@ USE GRAPH Transaction_Fraud CREATE OR REPLACE DISTRIBUTED QUERY merchant_category_transaction_count( -//STRING file_path = "/home/tigergraph/gsql_output/community_count_transaction.csv", +//STRING file_path = "/home/tigergraph/gsql_output/print_example.csv", +//STRING file_path = "s3://bucket-name/path/print_example_file.txt", //bool save_csv=false, bool print_results=false ) FOR GRAPH Transaction_Fraud SYNTAX v3 { diff --git a/financial_crime/transaction_fraud/queries/merchant_category_transaction_maximum_amount.gsql b/financial_crime/transaction_fraud/queries/merchant_category_transaction_maximum_amount.gsql index 25c400db..9b87c89d 100644 --- a/financial_crime/transaction_fraud/queries/merchant_category_transaction_maximum_amount.gsql +++ b/financial_crime/transaction_fraud/queries/merchant_category_transaction_maximum_amount.gsql @@ -1,6 +1,7 @@ USE GRAPH Transaction_Fraud CREATE OR REPLACE DISTRIBUTED QUERY merchant_category_transaction_maximum_amount( -//STRING file_path = "/home/tigergraph/gsql_output/community_count_transaction.csv", +//STRING file_path = "/home/tigergraph/gsql_output/print_example.csv", +//STRING file_path = "s3://bucket-name/path/print_example_file.txt", //bool save_csv=false, bool print_results=false ) FOR GRAPH Transaction_Fraud SYNTAX v3 { diff --git a/financial_crime/transaction_fraud/queries/merchant_category_transaction_min_amount.gsql b/financial_crime/transaction_fraud/queries/merchant_category_transaction_min_amount.gsql index 01c803d9..305daaf0 100644 --- a/financial_crime/transaction_fraud/queries/merchant_category_transaction_min_amount.gsql +++ b/financial_crime/transaction_fraud/queries/merchant_category_transaction_min_amount.gsql @@ -1,6 +1,7 @@ USE GRAPH Transaction_Fraud CREATE OR REPLACE DISTRIBUTED QUERY merchant_category_transaction_min_amount( -//STRING file_path = "/home/tigergraph/gsql_output/community_count_transaction.csv", +//STRING file_path = "/home/tigergraph/gsql_output/print_example.csv", +//STRING file_path = "s3://bucket-name/path/print_example_file.txt", //bool save_csv=false, bool print_results=false ) FOR GRAPH Transaction_Fraud SYNTAX v3 { diff --git a/financial_crime/transaction_fraud/queries/merchant_category_transaction_total_amount.gsql b/financial_crime/transaction_fraud/queries/merchant_category_transaction_total_amount.gsql index 130508d2..4b2a8774 100644 --- a/financial_crime/transaction_fraud/queries/merchant_category_transaction_total_amount.gsql +++ b/financial_crime/transaction_fraud/queries/merchant_category_transaction_total_amount.gsql @@ -1,6 +1,7 @@ USE GRAPH Transaction_Fraud CREATE OR REPLACE DISTRIBUTED QUERY merchant_category_transaction_total_amount( -//STRING file_path = "/home/tigergraph/gsql_output/community_count_transaction.csv", +//STRING file_path = "/home/tigergraph/gsql_output/print_example.csv", +//STRING file_path = "s3://bucket-name/path/print_example_file.txt", //bool save_csv=false, bool print_results=false ) FOR GRAPH Transaction_Fraud SYNTAX v3 { From 626f40106e89397d55aa62f970e52c1f5a7dc514 Mon Sep 17 00:00:00 2001 From: Truman Wang Date: Thu, 7 Nov 2024 13:53:43 -0800 Subject: [PATCH 2/5] remove copilot related queries --- .../attributes_to_party_traversal.gsql | 81 ------------------- .../queries/party_full_address.gsql | 32 -------- .../queries/single_Party_PII.gsql | 72 ----------------- .../queries/single_card_lookup.gsql | 77 ------------------ .../queries/single_merchant_lookup.gsql | 39 --------- .../queries/single_transaction_lookup.gsql | 25 ------ 6 files changed, 326 deletions(-) delete mode 100644 financial_crime/transaction_fraud/queries/attributes_to_party_traversal.gsql delete mode 100644 financial_crime/transaction_fraud/queries/party_full_address.gsql delete mode 100644 financial_crime/transaction_fraud/queries/single_Party_PII.gsql delete mode 100644 financial_crime/transaction_fraud/queries/single_card_lookup.gsql delete mode 100644 financial_crime/transaction_fraud/queries/single_merchant_lookup.gsql delete mode 100644 financial_crime/transaction_fraud/queries/single_transaction_lookup.gsql diff --git a/financial_crime/transaction_fraud/queries/attributes_to_party_traversal.gsql b/financial_crime/transaction_fraud/queries/attributes_to_party_traversal.gsql deleted file mode 100644 index e3dcd7f8..00000000 --- a/financial_crime/transaction_fraud/queries/attributes_to_party_traversal.gsql +++ /dev/null @@ -1,81 +0,0 @@ -USE GRAPH Transaction_Fraud -CREATE OR REPLACE DISTRIBUTED QUERY attributes_to_party_traversal( - STRING v_type, - STRING id) FOR GRAPH Transaction_Fraud SYNTAX V3{ - - /* - This query navigates from any given attribute of a party's identification information to locate all other personally - identifiable information (PII) associated with that party. - - * Parameters: - * - v_type: the vertex type of the given PII attribute - * - id: the primary id of the given PII vertex - */ - - SetAccum @@set; - SetAccum @@edge_type_set; - MinAccum @address, @full_name, @phone,@dob,@email,@ids,@device,@ips; - VERTEX v; - v = to_vertex(id,v_type); - - @@set += v; - start = @@set; - - rlt = SELECT t - FROM (s:start) -[]-> (t:Party); - - - @@edge_type_set = ( - "Has_Address", - "Has_Full_Name", - "Has_Phone", - "Has_DOB", - "Has_Email", - "Has_ID", - "Has_Device", - "Has_IP" - ); - - - - rlt = SELECT s - FROM (s:rlt) -[e:@@edge_type_set]-> (t) - ACCUM - Case e.type - WHEN "Has_Address" THEN - s.@address += t - WHEN "Has_Full_Name" THEN - s.@full_name += t - WHEN "Has_Phone" THEN - s.@phone += t - WHEN "Has_DOB" THEN - s.@dob += t - WHEN "Has_Email" THEN - s.@email += t - WHEN "Has_ID" THEN - s.@ids += t - WHEN "Has_Device" THEN - s.@device += t - WHEN "Has_IP" THEN - s.@ips += t - END; - PRINT rlt[ - rlt.@address AS Address, - rlt.@full_name AS Full_Name, - rlt.@phone AS Phone, - rlt.@dob AS DOB, - rlt.@email AS Email, - rlt.@ids AS ID, - rlt.@device AS Device, - rlt.@ips AS IP - ]; - - - - -} - -UPDATE DESCRIPTION OF QUERY attributes_to_party_traversal "Originating from a chosen attribute of party identification information, this query methodically searches the transaction fraud graph to retrieve all associated PII for the specified party. Its utility is paramount in fraud detection and prevention frameworks, enabling a comprehensive analysis of party identities." - -UPDATE DESCRIPTION OF QUERY_PARAM attributes_to_party_traversal.v_type "The vertex type of party's identification information Vertex type." -UPDATE DESCRIPTION OF QUERY_PARAM attributes_to_party_traversal.id "The id of the party's identification information Vertex." diff --git a/financial_crime/transaction_fraud/queries/party_full_address.gsql b/financial_crime/transaction_fraud/queries/party_full_address.gsql deleted file mode 100644 index 106165a4..00000000 --- a/financial_crime/transaction_fraud/queries/party_full_address.gsql +++ /dev/null @@ -1,32 +0,0 @@ -USE GRAPH Transaction_Fraud -CREATE OR REPLACE QUERY party_full_address ( - VERTEX p -) FOR GRAPH Transaction_Fraud SYNTAX V3 { - - // This query retrieves the complete address for a party, encompassing the street address, city, state, and zipcode. - - MinAccum @@address, @@zipcode, @@state; - MinAccum @@city; - - start = {p}; - - rlt = SELECT a - FROM (s:start) -[:Has_Address]- (a:Address) -[:Located_In]- (c:City) - [:Located_In]- (t:State) - ACCUM - @@address += a, - @@city += c.city, - @@state += t - ; - - rlt = SELECT a - FROM (s:start) -[:Has_Address]- (a:Address) -[:Assigned_To]- (z:Zipcode) - ACCUM @@zipcode += z - ; - - PRINT @@address AS Street_Address, @@city AS City, @@state AS State, @@zipcode AS Zipcode; - -} - -UPDATE DESCRIPTION OF QUERY party_full_address "This query retrieves the complete address for a party, encompassing the street address, city, state, and zipcode. It's essential for verifying and analyzing party location data, facilitating accurate and efficient address validation." - -UPDATE DESCRIPTION OF QUERY_PARAM party_full_address.p "The Party Vertex of interest." diff --git a/financial_crime/transaction_fraud/queries/single_Party_PII.gsql b/financial_crime/transaction_fraud/queries/single_Party_PII.gsql deleted file mode 100644 index da5b9be3..00000000 --- a/financial_crime/transaction_fraud/queries/single_Party_PII.gsql +++ /dev/null @@ -1,72 +0,0 @@ -USE GRAPH Transaction_Fraud -CREATE OR REPLACE QUERY single_Party_PII ( - VERTEX ver // sample party VERTEX: 213137721090747 -) FOR GRAPH Transaction_Fraud SYNTAX V3 { - - /** - - * Feature: - * This query returns the PII attributes of a single Party vertex. - - * Parameters: - * - Vertex ver specifies a single input Vertex - - * Output: - * the query prints the results directly to the console. - - */ - - SetAccum @@edge_type_set; - @@edge_type_set = ( - "Has_Address", - "Has_Full_Name", - "Has_Phone", - "Has_DOB", - "Has_Email", - "Has_ID", - "Has_Device", - "Has_IP" - ); - - MinAccum @address, @full_name, @phone,@dob,@email,@ids,@device,@ips; - - rlt = {ver}; - rlt = SELECT s - FROM (s:rlt) -[e:@@edge_type_set]- (t) - ACCUM - Case e.type - WHEN "Has_Address" THEN - s.@address += t - WHEN "Has_Full_Name" THEN - s.@full_name += t - WHEN "Has_Phone" THEN - s.@phone += t - WHEN "Has_DOB" THEN - s.@dob += t - WHEN "Has_Email" THEN - s.@email += t - WHEN "Has_ID" THEN - s.@ids += t - WHEN "Has_Device" THEN - s.@device += t - WHEN "Has_IP" THEN - s.@ips += t - END - ; - - PRINT rlt[ - rlt.@address AS Address, - rlt.@full_name AS Full_Name, - rlt.@phone AS Phone, - rlt.@dob AS DOB, - rlt.@email AS Email, - rlt.@ids AS ID, - rlt.@device AS Device, - rlt.@ips AS IP - ]; - -} - -UPDATE DESCRIPTION OF QUERY single_Party_PII "This query retrieves party identification information, including details like full name, date of birth, email address, and other personal identifiers. It is used for verifying the identity of individuals associated with transactions, which is crucial for enhancing security measures and preventing identity theft or fraud within financial operations." - -UPDATE DESCRIPTION OF QUERY_PARAM single_Party_PII.ver "The single Party vertex of interest." diff --git a/financial_crime/transaction_fraud/queries/single_card_lookup.gsql b/financial_crime/transaction_fraud/queries/single_card_lookup.gsql deleted file mode 100644 index 802463e7..00000000 --- a/financial_crime/transaction_fraud/queries/single_card_lookup.gsql +++ /dev/null @@ -1,77 +0,0 @@ -USE GRAPH Transaction_Fraud -CREATE OR REPLACE QUERY single_card_lookup( - VERTEX ver // sample card vertex: 3577157368507054 -) FOR GRAPH Transaction_Fraud SYNTAX V3 { - - /** - - * Feature: - * This query returns the detailed attributes of a single Card vertex. - * - * Parameters: - * - Vertex ver specifies a single input Vertex - - * Output: - * the query prints the results directly to the console. - - */ - - SetAccum @@edge_type_set; - @@edge_type_set = ( - "Has_Address", - "Has_Full_Name", - "Has_Phone", - "Has_DOB", - "Has_Email", - "Has_ID", - "Has_Device", - "Has_IP" - ); - - MinAccum @address, @full_name, @phone,@dob,@email,@ids,@device,@ips; - - start = {ver}; - rlt = SELECT s - FROM (s:start)-[:Party_Has_Card]- (:Party) -[e:@@edge_type_set]- (t) - ACCUM - Case e.type - WHEN "Has_Address" THEN - s.@address += t - WHEN "Has_Full_Name" THEN - s.@full_name += t - WHEN "Has_Phone" THEN - s.@phone += t - WHEN "Has_DOB" THEN - s.@dob += t - WHEN "Has_Email" THEN - s.@email += t - WHEN "Has_ID" THEN - s.@ids += t - WHEN "Has_Device" THEN - s.@device += t - WHEN "Has_IP" THEN - s.@ips += t - END - ; - - PRINT rlt[ - rlt.card_number AS Card_Number, - rlt.@full_name AS Full_Name, - rlt.occupation AS Job, - rlt.@address AS Address, - rlt.@phone AS Phone, - rlt.@dob AS DOB, - rlt.@email AS Email, - rlt.@ids AS ID, - rlt.@device AS Device, - rlt.@ips AS IP, - rlt.pagerank AS Pagerank_Score, - rlt.c_id AS Community_ID, - rlt.c_size AS Comunity_Size - ]; - -} - -UPDATE DESCRIPTION OF QUERY single_card_lookup "This query retrieves the detailed attributes associated with a single Card vertex. It is used for a thorough examination of a specific card's details, aiding in fraud investigation and risk assessment by providing in-depth insight into the card's attributes and usage patterns." - -UPDATE DESCRIPTION OF QUERY_PARAM single_card_lookup.ver "The single Card vertex of interest." diff --git a/financial_crime/transaction_fraud/queries/single_merchant_lookup.gsql b/financial_crime/transaction_fraud/queries/single_merchant_lookup.gsql deleted file mode 100644 index e2baf7a3..00000000 --- a/financial_crime/transaction_fraud/queries/single_merchant_lookup.gsql +++ /dev/null @@ -1,39 +0,0 @@ -USE GRAPH Transaction_Fraud -CREATE OR REPLACE QUERY single_merchant_lookup( - VERTEX ver // sample card vertex: fraud_Roberts-Beahan -) FOR GRAPH Transaction_Fraud SYNTAX V3 { - - /** - - * Feature: - * This query returns the detailed attributes of a single Merchant vertex. - - * Parameters: - * - Vertex ver specifies a single input Vertex - - * Output: - * the query prints the results directly to the console. - - */ - - MinAccum @category; - - start = {ver}; - rlt = SELECT s - FROM (s:start) -[e:Merchant_Assigned]- (t:Merchant_Category) - ACCUM s.@category += t - ; - - PRINT rlt[ - rlt.id AS Merchant_Name, - rlt.@category AS Merchant_Category, - rlt.pagerank AS Pagerank_Score, - rlt.c_id AS Community_ID, - rlt.c_size AS Comunity_Size - ]; - -} - -UPDATE DESCRIPTION OF QUERY single_merchant_lookup "This query retrieves the detailed attributes associated with a single Merchant vertex. It is used to analyze the merchant's profile and transaction behaviors, which can help in assessing risk levels and identifying patterns indicative of fraudulent activities or compromised merchant accounts." - -UPDATE DESCRIPTION OF QUERY_PARAM single_merchant_lookup.ver "The single Merchant vertex of interest." diff --git a/financial_crime/transaction_fraud/queries/single_transaction_lookup.gsql b/financial_crime/transaction_fraud/queries/single_transaction_lookup.gsql deleted file mode 100644 index eb6d385a..00000000 --- a/financial_crime/transaction_fraud/queries/single_transaction_lookup.gsql +++ /dev/null @@ -1,25 +0,0 @@ -USE GRAPH Transaction_Fraud -CREATE OR REPLACE QUERY single_transaction_lookup( - VERTEX ver // sample Payment_Transaction vertex: 300e78435108972ad13c58a7e578f30f -) FOR GRAPH Transaction_Fraud SYNTAX V3 { - - /** - - * Feature: - * This query returns the detailed attributes of a single Payment_Transaction vertex. - - * Parameters: - * - Vertex ver specifies a single input Vertex - - * Output: - * the query prints the results directly to the console. - - */ - - rlt = {ver}; - PRINT rlt; - -} -UPDATE DESCRIPTION OF QUERY single_transaction_lookup "This query retrieves the comprehensive attributes of a single Payment_Transaction vertex, including all features such as transaction amount, date, graph algoarithm features and aggregated features required for downstream machine learning models. It is used to gather detailed information on individual transactions, facilitating the development and refinement of ML models aimed at detecting fraudulent activity by providing a rich dataset for analysis and model training." - -UPDATE DESCRIPTION OF QUERY_PARAM single_transaction_lookup.ver "The single Payment_Transaction vertex of interest." From 09e57f7042468b66d59b9265bf59f4cce9d9eb46 Mon Sep 17 00:00:00 2001 From: Truman Wang Date: Thu, 7 Nov 2024 13:56:45 -0800 Subject: [PATCH 3/5] remove copilot support queries --- .../queries/card_transactions_stats.gsql | 34 ------------------ .../merchant_category_transactions_stats.gsql | 35 ------------------ .../queries/merchant_transactions_stats.gsql | 36 ------------------- 3 files changed, 105 deletions(-) delete mode 100644 financial_crime/transaction_fraud/queries/card_transactions_stats.gsql delete mode 100644 financial_crime/transaction_fraud/queries/merchant_category_transactions_stats.gsql delete mode 100644 financial_crime/transaction_fraud/queries/merchant_transactions_stats.gsql diff --git a/financial_crime/transaction_fraud/queries/card_transactions_stats.gsql b/financial_crime/transaction_fraud/queries/card_transactions_stats.gsql deleted file mode 100644 index b315d8f0..00000000 --- a/financial_crime/transaction_fraud/queries/card_transactions_stats.gsql +++ /dev/null @@ -1,34 +0,0 @@ -USE GRAPH Transaction_Fraud -CREATE OR REPLACE DISTRIBUTED QUERY card_transactions_stats(Vertex v) FOR GRAPH Transaction_Fraud SYNTAX v3 { - - /* - This query provides transaction statistics for a specified card, including the count of transactions, total transaction amount, - and the maximum, average, and minimum transaction amount. - */ - SumAccum @count; - SumAccum @amount; - MaxAccum @max; - MinAccum @min; - AvgAccum @avg; - start = {v}; - rlt = SELECT s - FROM (s:start) -[:Card_Send_Transaction]-> (t:Payment_Transaction) - ACCUM - s.@count += 1, - s.@amount += t.amount, - s.@max += t.amount, - s.@min += t.amount, - s.@avg += t.amount; - PRINT rlt[ - rlt.@count AS Transaction_Count, - rlt.@amount AS Total_Transaction_Amount, - rlt.@max AS Maximum_Transaction_Amount, - rlt.@min AS Minimum_Transaction_Amount, - rlt.@avg AS Average_Transaction_Amount - ]; - - -} -UPDATE DESCRIPTION OF QUERY card_transactions_stats "This query is designed to yield comprehensive statistics on transactions made using a specific card, encapsulating transaction count, total amount, and the maximum, average, and minimum transaction amounts. It serves as a pivotal tool for monitoring card usage, identifying spending patterns, and enhancing fraud detection efforts by pinpointing irregularities in transaction activities." - -UPDATE DESCRIPTION OF QUERY_PARAM card_transactions_stats.v "The single Card vertex of interest." diff --git a/financial_crime/transaction_fraud/queries/merchant_category_transactions_stats.gsql b/financial_crime/transaction_fraud/queries/merchant_category_transactions_stats.gsql deleted file mode 100644 index 2b9ce2e8..00000000 --- a/financial_crime/transaction_fraud/queries/merchant_category_transactions_stats.gsql +++ /dev/null @@ -1,35 +0,0 @@ -USE GRAPH Transaction_Fraud -CREATE OR REPLACE DISTRIBUTED QUERY merchant_category_transactions_stats( - Vertex v) FOR GRAPH Transaction_Fraud SYNTAX v3 { - - /* - This query provides transaction statistics for a specified merchant category, including the count of transactions, total transaction amount, - and the maximum, average, and minimum transaction amount. - */ - SumAccum @count; - SumAccum @amount; - MaxAccum @max; - MinAccum @min; - AvgAccum @avg; - start = {v}; - rlt = SELECT s - FROM (s:start) -[:Merchant_Assigned]- (:Merchant) <-[:Merchant_Receive_Transaction]- (t:Payment_Transaction) - ACCUM - s.@count += 1, - s.@amount += t.amount, - s.@max += t.amount, - s.@min += t.amount, - s.@avg += t.amount; - PRINT rlt[ - rlt.@count AS Transaction_Count, - rlt.@amount AS Total_Transaction_Amount, - rlt.@max AS Maximum_Transaction_Amount, - rlt.@min AS Minimum_Transaction_Amount, - rlt.@avg AS Average_Transaction_Amount - ]; - - -} -UPDATE DESCRIPTION OF QUERY merchant_category_transactions_stats "This query furnishes detailed transaction statistics for a specific merchant category, covering aspects such as the count of transactions, total transaction amount, and the maximum, average, and minimum transaction amounts. It's crucial for analyzing market trends, assessing the financial health of merchant categories, and detecting anomalies that could indicate fraudulent activity or market shifts." - -UPDATE DESCRIPTION OF QUERY_PARAM merchant_category_transactions_stats.v "The single Merchant_Category vertex of interest." diff --git a/financial_crime/transaction_fraud/queries/merchant_transactions_stats.gsql b/financial_crime/transaction_fraud/queries/merchant_transactions_stats.gsql deleted file mode 100644 index ecda8140..00000000 --- a/financial_crime/transaction_fraud/queries/merchant_transactions_stats.gsql +++ /dev/null @@ -1,36 +0,0 @@ -USE GRAPH Transaction_Fraud -CREATE OR REPLACE DISTRIBUTED QUERY merchant_transactions_stats( - Vertex v) FOR GRAPH Transaction_Fraud SYNTAX v3 { - - /* - This query provides transaction statistics for a specified merchant, including the count of transactions, total transaction amount, - and the maximum, average, and minimum transaction amount. - */ - SumAccum @count; - SumAccum @amount; - MaxAccum @max; - MinAccum @min; - AvgAccum @avg; - start = {v}; - rlt = SELECT s - FROM (s:start) <-[:Merchant_Receive_Transaction]- (t:Payment_Transaction) - ACCUM - s.@count += 1, - s.@amount += t.amount, - s.@max += t.amount, - s.@min += t.amount, - s.@avg += t.amount; - PRINT rlt[ - rlt.@count AS Transaction_Count, - rlt.@amount AS Total_Transaction_Amount, - rlt.@max AS Maximum_Transaction_Amount, - rlt.@min AS Minimum_Transaction_Amount, - rlt.@avg AS Average_Transaction_Amount - ]; - - -} - -UPDATE DESCRIPTION OF QUERY merchant_transactions_stats "This query offers crucial insights into a merchant's transactional behavior by providing detailed statistics, including the count of transactions, total transaction amount, and the maximum, average, and minimum transaction amounts. It is instrumental for financial analysis, aiding in the detection of anomalies, fraud prevention, and the assessment of merchant performance." - -UPDATE DESCRIPTION OF QUERY_PARAM merchant_transactions_stats.v "The single Merchant vertex of interest." From 31207577c8d50861f608d8163e9dbc8d463f805f Mon Sep 17 00:00:00 2001 From: Truman Wang Date: Mon, 11 Nov 2024 19:23:42 -0800 Subject: [PATCH 4/5] add comments to explain each query --- .../transaction_fraud/queries/age.gsql | 30 ++- .../queries/all_shortest_path_length.gsql | 6 +- .../card_has_frequent_transactions.gsql | 7 +- .../queries/card_has_large_total_amount.gsql | 31 +-- ...d_merchant_max_amount_within_interval.gsql | 147 +++++------ ...rd_merchant_max_txn_count_in_interval.gsql | 135 +++++----- .../card_with_single_large_transaction.gsql | 11 +- .../transaction_fraud/queries/city_pop.gsql | 27 +- .../queries/community_size.gsql | 94 +++---- .../community_transaction_average_amount.gsql | 106 ++++---- .../queries/community_transaction_count.gsql | 98 ++++---- .../community_transaction_max_amount.gsql | 108 ++++---- .../community_transaction_min_amount.gsql | 108 ++++---- .../community_transaction_total_amount.gsql | 108 ++++---- .../transaction_fraud/queries/degrees.gsql | 35 +-- .../transaction_fraud/queries/gender.gsql | 28 ++- .../queries/insights_show_shortest_path.gsql | 4 +- .../queries/mer_shortest_path_length.gsql | 8 + .../queries/merchant_category.gsql | 32 ++- ...t_category_transaction_average_amount.gsql | 104 ++++---- .../merchant_category_transaction_count.gsql | 94 +++---- ...t_category_transaction_maximum_amount.gsql | 105 ++++---- ...chant_category_transaction_min_amount.gsql | 104 ++++---- ...ant_category_transaction_total_amount.gsql | 104 ++++---- .../merchant_has_frequent_transactions.gsql | 8 +- .../merchant_has_large_total_amount.gsql | 10 +- ...erchant_with_single_large_transaction.gsql | 8 +- .../queries/number_of_repeated_card.gsql | 88 +++---- .../transaction_fraud/queries/occupation.gsql | 28 ++- .../queries/search_any_vertex.gsql | 2 +- ...shortest_path_length_from_transaction.gsql | 53 ---- .../queries/tg_pagerank_wt_card.gsql | 232 ++++++++---------- .../queries/tg_pagerank_wt_merchant.gsql | 232 ++++++++---------- .../tg_pagerank_wt_merchant_insights.gsql | 3 + 34 files changed, 1158 insertions(+), 1140 deletions(-) delete mode 100644 financial_crime/transaction_fraud/queries/shortest_path_length_from_transaction.gsql diff --git a/financial_crime/transaction_fraud/queries/age.gsql b/financial_crime/transaction_fraud/queries/age.gsql index 8209372f..336d58b4 100644 --- a/financial_crime/transaction_fraud/queries/age.gsql +++ b/financial_crime/transaction_fraud/queries/age.gsql @@ -1,15 +1,23 @@ USE GRAPH Transaction_Fraud CREATE OR REPLACE DISTRIBUTED QUERY age() FOR GRAPH Transaction_Fraud SYNTAX V3 { - - // Feature: This query propagate the age information of the card owner to Payment_Transaction vertex. + /* + Query Overview: + This is a feature engineering query: this query propagates the age information of the card owner + to the linked Payment_Transaction vertices and stores the feature as [attribute name: age] of + the linked Payment_Transaction vertices. - MinAccum @age; - SetAccum @@DOB; - - rlt = SELECT s - FROM (s:Payment_Transaction) -[]-> (:Card) -[]-> (t:Party) - ACCUM s.@age = year(now())-year(t.dob) - POST-ACCUM s.setAttr("age",s.@age) - ; + Feature Storage Approaches: + - Approach 1: Store features as attributes of the Payment_Transaction vertex, allowing downstream ML models to access features directly from these attributes. + - Approach 2: Output features to CSV files, which downstream ML models can read for feature access. + + Note: This solution kit uses the first approach, storing features as attributes on the Payment_Transaction vertex for easier demonstration. + + */ -} \ No newline at end of file + MinAccum @age; + + rlt = SELECT s + FROM (s:Payment_Transaction) -[]-> (:Card) -[]-> (t:Party) + ACCUM s.@age = year(now()) - year(t.dob) + POST-ACCUM s.setAttr("age", s.@age); +} diff --git a/financial_crime/transaction_fraud/queries/all_shortest_path_length.gsql b/financial_crime/transaction_fraud/queries/all_shortest_path_length.gsql index e28fd6d7..4d040169 100644 --- a/financial_crime/transaction_fraud/queries/all_shortest_path_length.gsql +++ b/financial_crime/transaction_fraud/queries/all_shortest_path_length.gsql @@ -3,9 +3,9 @@ CREATE OR REPLACE DISTRIBUTED QUERY all_shortest_path_length( int max_hops=3) FOR GRAPH Transaction_Fraud SYNTAX v3 { /* - This query determines the shortest path length from each transaction to a fraudulent one, - assessing the proximity to fraud risk. Due to the complexity of tracing paths across - numerous transactions. + This query invokes a sub-query, mer_shortest_path_length, for each merchant. The sub-query calculates the shortest path length from the merchant to + a fraudulent transaction, providing a measure of proximity to fraud risk. Since each transaction is associated with only one merchant, the distance + from the merchant to its nearest fraudulent transaction is effectively the transaction itself, allowing for significant memory savings. */ rlt = {Merchant.*}; diff --git a/financial_crime/transaction_fraud/queries/card_has_frequent_transactions.gsql b/financial_crime/transaction_fraud/queries/card_has_frequent_transactions.gsql index 3039c563..393b0447 100644 --- a/financial_crime/transaction_fraud/queries/card_has_frequent_transactions.gsql +++ b/financial_crime/transaction_fraud/queries/card_has_frequent_transactions.gsql @@ -5,9 +5,11 @@ CREATE OR REPLACE DISTRIBUTED QUERY card_has_frequent_transactions( INT freq=3000) FOR GRAPH Transaction_Fraud SYNTAX v3 { /** - * Feature: + * This is an analytical query. * This query detects and returns all the cards that has more than k transactions - within specified time period + within specified time period for data analytics. + The global accumulator @@edges are also printed to indicate fraudulent transactions and + its linked card. This is used as an installed query for Insights Application. */ SumAccum @count; SetAccum @@edges; @@ -30,7 +32,6 @@ CREATE OR REPLACE DISTRIBUTED QUERY card_has_frequent_transactions( } UPDATE DESCRIPTION OF QUERY card_has_frequent_transactions "This query identifies and retrieves all card numbers that have conducted more than k transactions within a specified time period. It is used to detect potential fraud by pinpointing cards with unusually high transaction volumes, which may suggest stolen card details being exploited or other fraudulent behaviors." - UPDATE DESCRIPTION OF QUERY_PARAM card_has_frequent_transactions.min_createTime "The earliest time to look back in history. Defaults to 2019-01-06" UPDATE DESCRIPTION OF QUERY_PARAM card_has_frequent_transactions.max_createTime "The latest time to look back in history. Defaults to 2024-01-01" UPDATE DESCRIPTION OF QUERY_PARAM card_has_frequent_transactions.freq "The threshold count or frequency value for transactions . Defaults to 3000" \ No newline at end of file diff --git a/financial_crime/transaction_fraud/queries/card_has_large_total_amount.gsql b/financial_crime/transaction_fraud/queries/card_has_large_total_amount.gsql index 9570636f..7ffd55f3 100644 --- a/financial_crime/transaction_fraud/queries/card_has_large_total_amount.gsql +++ b/financial_crime/transaction_fraud/queries/card_has_large_total_amount.gsql @@ -5,32 +5,33 @@ CREATE OR REPLACE DISTRIBUTED QUERY card_has_large_total_amount( DOUBLE m=200000) FOR GRAPH Transaction_Fraud SYNTAX v3 { - /** - * Feature: + /** + * This is an analytical query. * This query detects and returns all the cards whose total transaction amount is - over m within specified time period + over m within specified time period for data analytics. + The global accumulator @@edges are also printed to indicate fraudulent transactions and + its linked card. This is used as an installed query for Insights Application. */ - SumAccum @totalAmount; - - rlt = - SELECT s - FROM (s:Card) -[:Card_Send_Transaction]-> (t:Payment_Transaction) - WHERE t.transaction_time >= min_createTime + SumAccum @totalAmount; + SetAccum @@edges; + + rlt = SELECT s + FROM (s:Card) -[:Card_Send_Transaction]-> (t:Payment_Transaction) + WHERE t.transaction_time >= min_createTime AND t.transaction_time < max_createTime - ACCUM s.@totalAmount += t.amount - HAVING s.@totalAmount > m; + ACCUM s.@totalAmount += t.amount + HAVING s.@totalAmount > m; PRINT rlt[rlt.@totalAmount]; -SetAccum @@edges; + rlt = SELECT t FROM (s:rlt) -[e:Card_Send_Transaction]-> (t:Payment_Transaction) - WHERE t.is_fraud == 1 - ACCUM @@edges += e; + WHERE t.is_fraud == 1 + ACCUM @@edges += e; PRINT @@edges; } UPDATE DESCRIPTION OF QUERY card_has_large_total_amount "This query identifies and retrieves all card numbers whose total transaction amount exceeds m dollars within a specified time period. It is used for identifying cards that may be involved in fraudulent activities by tracking the aggregate spending pattern, which, if unusually high, could indicate misuse or unauthorized transactions." - UPDATE DESCRIPTION OF QUERY_PARAM card_has_large_total_amount.min_createTime "The earliest time to look back in history. Defaults to 2020-01-01" UPDATE DESCRIPTION OF QUERY_PARAM card_has_large_total_amount.max_createTime "The latest time to look back in history. Defaults to 2024-01-01" UPDATE DESCRIPTION OF QUERY_PARAM card_has_large_total_amount.m "The threshold amount for total transaction values. Defaults to 200000" \ No newline at end of file diff --git a/financial_crime/transaction_fraud/queries/card_merchant_max_amount_within_interval.gsql b/financial_crime/transaction_fraud/queries/card_merchant_max_amount_within_interval.gsql index 55597397..3f4bf27d 100644 --- a/financial_crime/transaction_fraud/queries/card_merchant_max_amount_within_interval.gsql +++ b/financial_crime/transaction_fraud/queries/card_merchant_max_amount_within_interval.gsql @@ -1,81 +1,82 @@ USE GRAPH Transaction_Fraud CREATE OR REPLACE DISTRIBUTED QUERY card_merchant_max_amount_within_interval( - INT time_interval = 5) FOR GRAPH Transaction_Fraud SYNTAX V3{ - /* - Find the max amount transacted to the same merchant within a given interval (int minutes) for each account - */ - - TYPEDEF TUPLE payment; - - // Stores a list of payments to each merchant from a given account. - MapAccum> @all_payments; - - MinAccum @merchant_id; - - // Heap Accum, ListAccum, MaxAccum are data structures used during calcluation - HeapAccum(1000, date ASC) @heap; - ListAccum @list; - MaxAccum @max = 0; - - // FILE f (path); - //f.println("id", "max_amount_in_interval"); - - - - // Store merchant id on each transaction vertex - Merchants = SELECT m - FROM (t:Payment_Transaction)-[e:Merchant_Receive_Transaction]->(m:Merchant) - ACCUM t.@merchant_id += m.id; - - - Start = SELECT s - FROM (s:Card)-[e:Card_Send_Transaction]->(t:Payment_Transaction) - ACCUM + INT time_interval = 5 +) FOR GRAPH Transaction_Fraud SYNTAX V3 { + /* + Query Overview: + This is a feature engineering query: this query finds the maximum transaction amount to the same merchant + within a specified time interval (in minutes) for each account, and stores the feature as + [attribute name: max_txn_amt_interval] of the linked Payment_Transaction vertices. + + Feature Storage Approaches: + - Approach 1: Store features as attributes of the Payment_Transaction vertex, allowing downstream ML models to access features directly from these attributes. + - Approach 2: Output features to CSV files, which downstream ML models can read for feature access. + + Note: This solution kit uses the first approach, storing features as attributes on the Payment_Transaction vertex for easier demonstration. + + */ + + TYPEDEF TUPLE payment; + + // Stores a list of payments to each merchant from a given account. + MapAccum> @all_payments; + MinAccum @merchant_id; + + // Heap Accum, ListAccum, MaxAccum are data structures used during calculation + HeapAccum(1000, date ASC) @heap; + ListAccum @list; + MaxAccum @max = 0; + + // Store merchant id on each transaction vertex + Merchants = SELECT m + FROM (t:Payment_Transaction) -[e:Merchant_Receive_Transaction]-> (m:Merchant) + ACCUM t.@merchant_id += m.id; + + Start = SELECT s + FROM (s:Card) -[e:Card_Send_Transaction]-> (t:Payment_Transaction) + ACCUM // Collect all payments by merchant id s.@all_payments += (t.@merchant_id -> payment(t.transaction_time, t.amount)) - POST-ACCUM + POST-ACCUM FOREACH (key, list) IN s.@all_payments DO - // For each merchant, first load the payments into a heap to sort the payments - s.@heap.clear(), - s.@list.clear(), - FOREACH p IN list DO - s.@heap += p - END, - FOREACH p IN s.@heap DO - s.@list += p - END, - - // Go through the sorted dates and find the largest value within the given interval - datetime start_date = s.@heap.top().date, - int start_amount = s.@heap.pop().amount, - datetime date = start_date, - FLOAT amount = start_amount, - int start_index = 0, - FLOAT total_amount = 0, - int diff_minutes = 0, - WHILE diff_minutes > time_interval OR s.@heap.size() > 0 DO - IF diff_minutes > time_interval THEN - s.@max += total_amount, - total_amount = total_amount - start_amount, - start_index = start_index + 1, - start_date = s.@list.get(start_index).date, - start_amount = s.@list.get(start_index).amount - ELSE - total_amount = total_amount + amount, - date = s.@heap.top().date, - amount = s.@heap.pop().amount + // For each merchant, first load the payments into a heap to sort the payments + s.@heap.clear(), + s.@list.clear(), + FOREACH p IN list DO + s.@heap += p END, - s.@max += total_amount, - diff_minutes = datetime_diff(date, start_date) / 60 - END - END + FOREACH p IN s.@heap DO + s.@list += p + END, + + // Go through the sorted dates and find the largest value within the given interval + datetime start_date = s.@heap.top().date, + int start_amount = s.@heap.pop().amount, + datetime date = start_date, + FLOAT amount = start_amount, + int start_index = 0, + FLOAT total_amount = 0, + int diff_minutes = 0, + WHILE diff_minutes > time_interval OR s.@heap.size() > 0 DO + IF diff_minutes > time_interval THEN + s.@max += total_amount, + total_amount = total_amount - start_amount, + start_index = start_index + 1, + start_date = s.@list.get(start_index).date, + start_amount = s.@list.get(start_index).amount + ELSE + total_amount = total_amount + amount, + date = s.@heap.top().date, + amount = s.@heap.pop().amount + END, + s.@max += total_amount, + diff_minutes = datetime_diff(date, start_date) / 60 + END + END; - // f.println(s.id, s.@max) - ; - //set Payment_Transaction feature attributes + // Set Payment_Transaction feature attributes rlt = SELECT s - FROM (s:Card)-[e:Card_Send_Transaction]->(t:Payment_Transaction) - ACCUM t.@max = s.@max - Post-ACCUM t.setAttr("max_txn_amt_interval", t.@max); - -} \ No newline at end of file + FROM (s:Card) -[e:Card_Send_Transaction]-> (t:Payment_Transaction) + ACCUM t.@max = s.@max + POST-ACCUM t.setAttr("max_txn_amt_interval", t.@max); +} diff --git a/financial_crime/transaction_fraud/queries/card_merchant_max_txn_count_in_interval.gsql b/financial_crime/transaction_fraud/queries/card_merchant_max_txn_count_in_interval.gsql index 8d45f88e..8ac8d51d 100644 --- a/financial_crime/transaction_fraud/queries/card_merchant_max_txn_count_in_interval.gsql +++ b/financial_crime/transaction_fraud/queries/card_merchant_max_txn_count_in_interval.gsql @@ -1,74 +1,77 @@ USE GRAPH Transaction_Fraud CREATE OR REPLACE DISTRIBUTED QUERY card_merchant_max_txn_count_in_interval( - INT time_interval = 5) FOR GRAPH Transaction_Fraud SYNTAX V3{ - /* - Find the max number of transactions made to the same merchant within a given interval (int minutes) for each account - */ - - TYPEDEF TUPLE date_tuple; - - // Stores a list of transaction dates to each merchant, for a specific account. - MapAccum> @dates; - - MinAccum @merchant_id; + INT time_interval = 5 +) FOR GRAPH Transaction_Fraud SYNTAX V3 { + /* + Query Overview: + This is a feature engineering query: this query finds the maximum number of transactions made to the same + merchant within a specified time interval (in minutes) for each account, and stores the feature as + [attribute name: max_txn_cnt_interval] of the linked Payment_Transaction vertices. - // Heap, list, and max accum are auxilary data structures - HeapAccum(1000, date ASC) @heap; - ListAccum @list; - MaxAccum @max = 1; - - // FILE f (path); -// f.println("id", "max_transactions_in_interval"); - - - // Store merchant id on each transaction vertex - Merchants = SELECT m - FROM (t:Payment_Transaction)-[e:Merchant_Receive_Transaction]->(m:Merchant) - ACCUM t.@merchant_id += m.id; - - - Start = SELECT s - FROM (s:Card)-[e:Card_Send_Transaction]->(t:Payment_Transaction) - ACCUM + Feature Storage Approaches: + - Approach 1: Store features as attributes of the Payment_Transaction vertex, allowing downstream ML models to access features directly from these attributes. + - Approach 2: Output features to CSV files, which downstream ML models can read for feature access. + + Note: This solution kit uses the first approach, storing features as attributes on the Payment_Transaction vertex for easier demonstration. + + */ + + TYPEDEF TUPLE date_tuple; + + // Stores a list of transaction dates to each merchant, for a specific account. + MapAccum> @dates; + MinAccum @merchant_id; + + // Heap, list, and max accum are auxiliary data structures + HeapAccum(1000, date ASC) @heap; + ListAccum @list; + MaxAccum @max = 1; + + // Store merchant id on each transaction vertex + Merchants = SELECT m + FROM (t:Payment_Transaction) -[e:Merchant_Receive_Transaction]-> (m:Merchant) + ACCUM t.@merchant_id += m.id; + + Start = SELECT s + FROM (s:Card) -[e:Card_Send_Transaction]-> (t:Payment_Transaction) + ACCUM // Collect all dates by merchant id s.@dates += (t.@merchant_id -> date_tuple(t.transaction_time)) - POST-ACCUM + POST-ACCUM FOREACH (key, list) IN s.@dates DO - // For each merchant, first load the dates into a heap to sort the payments - s.@heap.clear(), - s.@list.clear(), - FOREACH p IN list DO - s.@heap += p - END, - FOREACH p IN s.@heap DO - s.@list += p.date - END, - - datetime start_date = s.@heap.pop().date, - int start_index = 0, - datetime curr_date = start_date, - int occurrences = 0, - int diff_minutes = 0, - WHILE diff_minutes > time_interval OR s.@heap.size() > 0 DO - IF diff_minutes > time_interval THEN - start_index = start_index + 1, - curr_date = s.@list.get(start_index), - s.@max += occurrences, - occurrences = occurrences - 1 - ELSE - occurrences = occurrences + 1, - curr_date = s.@heap.pop().date + // For each merchant, first load the dates into a heap to sort the payments + s.@heap.clear(), + s.@list.clear(), + FOREACH p IN list DO + s.@heap += p + END, + FOREACH p IN s.@heap DO + s.@list += p.date END, - s.@max += occurrences, - diff_minutes = datetime_diff(curr_date, start_date) / 60 - END - END - // f.println(s.id, s.@max) - ; - - //set Payment_Transaction feature attributes + + datetime start_date = s.@heap.pop().date, + int start_index = 0, + datetime curr_date = start_date, + int occurrences = 0, + int diff_minutes = 0, + WHILE diff_minutes > time_interval OR s.@heap.size() > 0 DO + IF diff_minutes > time_interval THEN + start_index = start_index + 1, + curr_date = s.@list.get(start_index), + s.@max += occurrences, + occurrences = occurrences - 1 + ELSE + occurrences = occurrences + 1, + curr_date = s.@heap.pop().date + END, + s.@max += occurrences, + diff_minutes = datetime_diff(curr_date, start_date) / 60 + END + END; + + // Set Payment_Transaction feature attributes rlt = SELECT s - FROM (s:Card)-[e:Card_Send_Transaction]->(t:Payment_Transaction) - ACCUM t.@max = s.@max - Post-ACCUM t.setAttr("max_txn_cnt_interval", t.@max); -} \ No newline at end of file + FROM (s:Card) -[e:Card_Send_Transaction]-> (t:Payment_Transaction) + ACCUM t.@max = s.@max + POST-ACCUM t.setAttr("max_txn_cnt_interval", t.@max); +} diff --git a/financial_crime/transaction_fraud/queries/card_with_single_large_transaction.gsql b/financial_crime/transaction_fraud/queries/card_with_single_large_transaction.gsql index e4d5dcfe..2736e0f3 100644 --- a/financial_crime/transaction_fraud/queries/card_with_single_large_transaction.gsql +++ b/financial_crime/transaction_fraud/queries/card_with_single_large_transaction.gsql @@ -3,12 +3,14 @@ CREATE OR REPLACE DISTRIBUTED QUERY card_with_single_large_transaction( DATETIME min_createTime = to_datetime("2020-01-01"), DATETIME max_createTime = to_datetime("2024-01-01"), DOUBLE m=10000) FOR GRAPH Transaction_Fraud SYNTAX v3 { - - /** - * Feature: + /** + * This is an analytical query. * This query detects and returns all the credit debit cards that has single transaction - with amount over m within specified time period + with amount over m within specified time period for data analytics. + The global accumulator @@edges are also printed to indicate fraudulent transactions and + its linked card. This is used as an installed query for Insights Application. */ + SetAccum @transaction; rlt = @@ -24,7 +26,6 @@ CREATE OR REPLACE DISTRIBUTED QUERY card_with_single_large_transaction( } UPDATE DESCRIPTION OF QUERY card_with_single_large_transaction "This query identifies and retrieves all card numbers that have recorded a single transaction exceeding a specified amount m within a certain time frame. It is used for flagging potentially fraudulent activity by highlighting unusually large transactions that could indicate unauthorized use or testing of the card." - UPDATE DESCRIPTION OF QUERY_PARAM card_with_single_large_transaction.min_createTime "The earliest time to look back in history. Defaults to 2020-01-01" UPDATE DESCRIPTION OF QUERY_PARAM card_with_single_large_transaction.max_createTime "The latest time to look back in history. Defaults to 2024-01-01" UPDATE DESCRIPTION OF QUERY_PARAM card_with_single_large_transaction.m "The threshold amount for transaction values. Defaults to 10000" \ No newline at end of file diff --git a/financial_crime/transaction_fraud/queries/city_pop.gsql b/financial_crime/transaction_fraud/queries/city_pop.gsql index ecb06090..a7c0f4f8 100644 --- a/financial_crime/transaction_fraud/queries/city_pop.gsql +++ b/financial_crime/transaction_fraud/queries/city_pop.gsql @@ -1,14 +1,23 @@ USE GRAPH Transaction_Fraud CREATE OR REPLACE DISTRIBUTED QUERY city_pop() FOR GRAPH Transaction_Fraud SYNTAX V3 { - - // Feature: This query propagate the city population information of the card owner to Payment_Transaction vertex. + /* + Query Overview: + This is a feature engineering query: this query propagates the city population information of the card owner + to the linked Payment_Transaction vertices and stores the feature as [attribute name: city_pop] of the + linked Payment_Transaction vertices. - MinAccum @city_pop; + Feature Storage Approaches: + - Approach 1: Store features as attributes of the Payment_Transaction vertex, allowing downstream ML models to access features directly from these attributes. + - Approach 2: Output features to CSV files, which downstream ML models can read for feature access. + + Note: This solution kit uses the first approach, storing features as attributes on the Payment_Transaction vertex for easier demonstration. - rlt = SELECT s - FROM (s:Payment_Transaction) -[]- (:Card) -[]- (:Party) -[]- (:Address) -[]- (t:City) - ACCUM s.@city_pop = t.population - POST-ACCUM s.setAttr("city_pop",s.@city_pop) - ; + */ -} \ No newline at end of file + MinAccum @city_pop; + + rlt = SELECT s + FROM (s:Payment_Transaction) -[]- (:Card) -[]- (:Party) -[]- (:Address) -[]- (t:City) + ACCUM s.@city_pop = t.population + POST-ACCUM s.setAttr("city_pop", s.@city_pop); +} diff --git a/financial_crime/transaction_fraud/queries/community_size.gsql b/financial_crime/transaction_fraud/queries/community_size.gsql index 4b6e0360..9d46235c 100644 --- a/financial_crime/transaction_fraud/queries/community_size.gsql +++ b/financial_crime/transaction_fraud/queries/community_size.gsql @@ -1,43 +1,53 @@ USE GRAPH Transaction_Fraud -CREATE OR REPLACE QUERY community_size() FOR GRAPH Transaction_Fraud SYNTAX V3{ - - /** - * Description: The query counts the component size of each Community and update - * the result size attribute for each party. - */ - MapAccum> @@card_component_count,@@merchant_component_count; - MinAccum @c_size; - res = SELECT s - FROM (s:Card) - POST-ACCUM @@card_component_count += (s.c_id -> 1); - - res = SELECT s - FROM (s:Card) - POST-ACCUM - INT tmp1 = @@card_component_count.get(s.c_id), - s.@c_size = tmp1, - s.setAttr("c_size", tmp1); - - res = SELECT s - FROM (s:Card) -[e:Card_Send_Transaction]-> (t:Payment_Transaction) - ACCUM t.@c_size += s.@c_size - POST-ACCUM t.setAttr("cd_com_size", t.@c_size); - - - - res = SELECT s - FROM (s:Merchant) - POST-ACCUM @@merchant_component_count += (s.c_id -> 1); - - res = SELECT s - FROM (s:Merchant) - POST-ACCUM - INT tmp2 = @@merchant_component_count.get(s.c_id), - s.@c_size = tmp2, - s.setAttr("c_size", tmp2); - - res = SELECT s - FROM (s:Merchant) -[e:Merchant_Receive_Transaction]-> (t:Payment_Transaction) - ACCUM t.@c_size += s.@c_size - POST-ACCUM t.setAttr("mer_com_size", t.@c_size); -} \ No newline at end of file +CREATE OR REPLACE QUERY community_size() FOR GRAPH Transaction_Fraud SYNTAX V3 { + /* + Query Overview: + This is a feature engineering query: the query calculates the component size of each Card and Merchant Community, updating + the size attribute for each Card/merchant. The calculated component size is stored as [attribute name: c_size] for + individual Card/Merchant, and also propagated to [attribute names: cd_com_size and mer_com_size] for Payment_Transaction vertices. + + Feature Storage Approaches: + - Approach 1: Store features as attributes of the Payment_Transaction vertex, allowing downstream ML models to access features directly from these attributes. + - Approach 2: Output features to CSV files, which downstream ML models can read for feature access. + + Note: This solution kit uses the first approach, storing features as attributes on the Payment_Transaction vertex for easier demonstration. + + */ + + MapAccum> @@card_component_count, @@merchant_component_count; + MinAccum @c_size; + + // Count component sizes for Card communities + res = SELECT s + FROM (s:Card) + POST-ACCUM @@card_component_count += (s.c_id -> 1); + + res = SELECT s + FROM (s:Card) + POST-ACCUM + INT tmp1 = @@card_component_count.get(s.c_id), + s.@c_size = tmp1, + s.setAttr("c_size", tmp1); + + res = SELECT s + FROM (s:Card) -[e:Card_Send_Transaction]-> (t:Payment_Transaction) + ACCUM t.@c_size += s.@c_size + POST-ACCUM t.setAttr("cd_com_size", t.@c_size); + + // Count component sizes for Merchant communities + res = SELECT s + FROM (s:Merchant) + POST-ACCUM @@merchant_component_count += (s.c_id -> 1); + + res = SELECT s + FROM (s:Merchant) + POST-ACCUM + INT tmp2 = @@merchant_component_count.get(s.c_id), + s.@c_size = tmp2, + s.setAttr("c_size", tmp2); + + res = SELECT s + FROM (s:Merchant) -[e:Merchant_Receive_Transaction]-> (t:Payment_Transaction) + ACCUM t.@c_size += s.@c_size + POST-ACCUM t.setAttr("mer_com_size", t.@c_size); +} diff --git a/financial_crime/transaction_fraud/queries/community_transaction_average_amount.gsql b/financial_crime/transaction_fraud/queries/community_transaction_average_amount.gsql index 1621138a..0b668bf7 100644 --- a/financial_crime/transaction_fraud/queries/community_transaction_average_amount.gsql +++ b/financial_crime/transaction_fraud/queries/community_transaction_average_amount.gsql @@ -1,57 +1,61 @@ USE GRAPH Transaction_Fraud CREATE OR REPLACE DISTRIBUTED QUERY community_transaction_average_amount( - //STRING file_path = "/home/tigergraph/gsql_output/print_example.csv", - //STRING file_path = "s3://bucket-name/path/print_example_file.txt", - //bool save_csv=false, - STRING community_type = "Merchant", //change to "Card" when run query on Card Community - STRING result_attribute = "com_mer_txn_avg_amt", //change to "com_cd_txn_avg_amt" when run query on Card Community - bool print_results=false + // STRING file_path = "/home/tigergraph/gsql_output/print_example.csv", + // STRING file_path = "s3://bucket-name/path/print_example_file.txt", + // bool save_csv = false, + STRING community_type = "Merchant", // change to "Card" when running query on Card Community + STRING result_attribute = "com_mer_txn_avg_amt", // change to "com_cd_txn_avg_amt" when running on Card Community + bool print_results = false ) FOR GRAPH Transaction_Fraud SYNTAX V3 { - /** - * Feature: - * This query counts the average amount of transactions for community. - * - * Parameters: - * - file_path: Specifies the path to the output CSV file where the results will be saved if `save_csv` is true. - * - save_csv: A boolean parameter that determines whether the results are saved to a CSV file at `file_path`. - * - print_results: A boolean parameter that controls whether the results are printed to the console. - * - * Output: - * Depending on the boolean flags provided: - * - If `save_csv` is true, the query outputs a CSV file at `file_path` - * - If `print_results` is true, the query prints the results directly to the console. + /* + Query Overview: + This is a feature engineering query: this query calculates the average transaction amount for each community + (e.g., Merchant or Card community) and stores the feature as [attribute name: result_attribute] on the + linked Payment_Transaction vertices. + + Feature Storage Approaches: + - Approach 1: Store features as attributes of the Payment_Transaction vertex, allowing downstream ML models to access features directly from these attributes. + - Approach 2: Output features to CSV files, which downstream ML models can read for feature access. + + Note: This solution kit uses the first approach, storing features as attributes on the Payment_Transaction vertex for easier demonstration. + + Parameters: + - community_type: Specifies the community type (e.g., "Merchant" or "Card"). + - result_attribute: Specifies the name of the attribute to store the result (e.g., "com_mer_txn_avg_amt" or "com_cd_txn_avg_amt"). + - print_results: Boolean flag to control whether results are printed to the console. + - save_csv (commented out): When enabled, saves output to a CSV file at the specified file_path. */ - - MinAccum @txn_avg_amount; - //FILE output_file(file_path); - AvgAccum @txn_amt; - MapAccum> @txn_map; - - community = - SELECT c - FROM (c:Community) -[e1:Has_Community]- (:community_type) -[e2]- (t:Payment_Transaction) - ACCUM c.@txn_map += (t.id -> t.amount) - POST-ACCUM - FOREACH (tid,amount) IN c.@txn_map DO - c.@txn_amt += amount - END - ; - - community = - SELECT c - FROM (c:Community) -[e1:Has_Community]- (:community_type) -[e2]- (t:Payment_Transaction) - ACCUM t.@txn_avg_amount += c.@txn_amt - POST-ACCUM t.setAttr(result_attribute,t.@txn_avg_amount) - ; - - IF print_results == TRUE THEN - PRINT community[community.@txn_amt]; - END; - /* - IF save_csv == TRUE THEN - PRINT community.@count.size() TO_CSV output_file; - END; - */ + MinAccum @txn_avg_amount; + // FILE output_file(file_path); + AvgAccum @txn_amt; + MapAccum> @txn_map; + + // Calculate the average transaction amount for each community + community = + SELECT c + FROM (c:Community) -[e1:Has_Community]- (:community_type) -[e2]- (t:Payment_Transaction) + ACCUM c.@txn_map += (t.id -> t.amount) + POST-ACCUM + FOREACH (tid, amount) IN c.@txn_map DO + c.@txn_amt += amount + END; -} \ No newline at end of file + // Store the calculated average transaction amount as an attribute on Payment_Transaction vertices + community = + SELECT c + FROM (c:Community) -[e1:Has_Community]- (:community_type) -[e2]- (t:Payment_Transaction) + ACCUM t.@txn_avg_amount += c.@txn_amt + POST-ACCUM t.setAttr(result_attribute, t.@txn_avg_amount); + + // Optionally print results if the print_results flag is true + IF print_results == TRUE THEN + PRINT community[community.@txn_amt]; + END; + + /* + IF save_csv == TRUE THEN + PRINT community.@count.size() TO_CSV output_file; + END; + */ +} diff --git a/financial_crime/transaction_fraud/queries/community_transaction_count.gsql b/financial_crime/transaction_fraud/queries/community_transaction_count.gsql index 33a8f51a..9e98d993 100644 --- a/financial_crime/transaction_fraud/queries/community_transaction_count.gsql +++ b/financial_crime/transaction_fraud/queries/community_transaction_count.gsql @@ -1,52 +1,56 @@ USE GRAPH Transaction_Fraud CREATE OR REPLACE DISTRIBUTED QUERY community_transaction_count( - //STRING file_path = "/home/tigergraph/gsql_output/print_example.csv", - //STRING file_path = "s3://bucket-name/path/print_example_file.txt", - //bool save_csv=false, - STRING community_type = "Merchant", //change to "Card" when run query on Card Community - STRING result_attribute = "com_mer_txn_cnt", //change to "com_cd_txn_cnt" when run query on Card Community - bool print_results=false + // STRING file_path = "/home/tigergraph/gsql_output/print_example.csv", + // STRING file_path = "s3://bucket-name/path/print_example_file.txt", + // bool save_csv = false, + STRING community_type = "Merchant", // change to "Card" when running query on Card Community + STRING result_attribute = "com_mer_txn_cnt", // change to "com_cd_txn_cnt" when running on Card Community + bool print_results = false ) FOR GRAPH Transaction_Fraud SYNTAX V3 { - /** - * Feature: - * This query counts the total number of transactions for community. - * - * Parameters: - * - file_path: Specifies the path to the output CSV file where the results will be saved if `save_csv` is true. - * - save_csv: A boolean parameter that determines whether the results are saved to a CSV file at `file_path`. - * - print_results: A boolean parameter that controls whether the results are printed to the console. - * - * Output: - * Depending on the boolean flags provided: - * - If `save_csv` is true, the query outputs a CSV file at `file_path` - * - If `print_results` is true, the query prints the results directly to the console. - */ - - SetAccum @txn_set; - MaxAccum @txn_cnt; - //FILE output_file(file_path); - - community = - SELECT c - FROM (c:Community) -[e1:Has_Community]- (:community_type) -[e2]- (t:Payment_Transaction) - ACCUM c.@txn_set += t - ; - - community = - SELECT c - FROM (c:Community) -[e1:Has_Community]- (:community_type) -[e2]- (t:Payment_Transaction) - ACCUM t.@txn_cnt += c.@txn_set.size() - POST-ACCUM t.setAttr(result_attribute,t.@txn_cnt) - ; - - IF print_results == TRUE THEN - PRINT community[community.@txn_set.size()]; - END; + /* + Query Overview: + This is a feature engineering query: this query counts the total number of transactions for each community + (e.g., Merchant or Card community) and stores the feature as [attribute name: result_attribute] on the + linked Payment_Transaction vertices. - /* - IF save_csv == TRUE THEN - PRINT community.@count.size() TO_CSV output_file; - END; - */ + Feature Storage Approaches: + - Approach 1: Store features as attributes of the Payment_Transaction vertex, allowing downstream ML models to access features directly from these attributes. + - Approach 2: Output features to CSV files, which downstream ML models can read for feature access. + + Note: This solution kit uses the first approach, storing features as attributes on the Payment_Transaction vertex for easier demonstration. -} \ No newline at end of file + Parameters: + - community_type: Specifies the community type (e.g., "Merchant" or "Card"). + - result_attribute: Specifies the name of the attribute to store the result (e.g., "com_mer_txn_cnt" or "com_cd_txn_cnt"). + - print_results: Boolean flag to control whether results are printed to the console. + - save_csv (commented out): When enabled, saves output to a CSV file at the specified file_path. + */ + + SetAccum @txn_set; + MaxAccum @txn_cnt; + // FILE output_file(file_path); + + // Accumulate transactions for each community + community = + SELECT c + FROM (c:Community) -[e1:Has_Community]- (:community_type) -[e2]- (t:Payment_Transaction) + ACCUM c.@txn_set += t; + + // Store the transaction count as an attribute on Payment_Transaction vertices + community = + SELECT c + FROM (c:Community) -[e1:Has_Community]- (:community_type) -[e2]- (t:Payment_Transaction) + ACCUM t.@txn_cnt += c.@txn_set.size() + POST-ACCUM t.setAttr(result_attribute, t.@txn_cnt); + + // Optionally print results if the print_results flag is true + IF print_results == TRUE THEN + PRINT community[community.@txn_set.size()]; + END; + + /* + IF save_csv == TRUE THEN + PRINT community.@count.size() TO_CSV output_file; + END; + */ +} diff --git a/financial_crime/transaction_fraud/queries/community_transaction_max_amount.gsql b/financial_crime/transaction_fraud/queries/community_transaction_max_amount.gsql index 9731849f..dd924278 100644 --- a/financial_crime/transaction_fraud/queries/community_transaction_max_amount.gsql +++ b/financial_crime/transaction_fraud/queries/community_transaction_max_amount.gsql @@ -1,57 +1,61 @@ USE GRAPH Transaction_Fraud CREATE OR REPLACE DISTRIBUTED QUERY community_transaction_max_amount( - //STRING file_path = "/home/tigergraph/gsql_output/print_example.csv", - //STRING file_path = "s3://bucket-name/path/print_example_file.txt", - //bool save_csv=false, - STRING community_type = "Merchant", //change to "Card" when run query on Card Community - STRING result_attribute = "com_mer_txn_max_amt", //change to "com_cd_txn_max_amt" when run query on Card Community - bool print_results=false + // STRING file_path = "/home/tigergraph/gsql_output/print_example.csv", + // STRING file_path = "s3://bucket-name/path/print_example_file.txt", + // bool save_csv = false, + STRING community_type = "Merchant", // change to "Card" when running query on Card Community + STRING result_attribute = "com_mer_txn_max_amt", // change to "com_cd_txn_max_amt" when running on Card Community + bool print_results = false ) FOR GRAPH Transaction_Fraud SYNTAX V3 { - /** - * Feature: - * This query counts the maximum amount of transactions for community. - * - * Parameters: - * - file_path: Specifies the path to the output CSV file where the results will be saved if `save_csv` is true. - * - save_csv: A boolean parameter that determines whether the results are saved to a CSV file at `file_path`. - * - print_results: A boolean parameter that controls whether the results are printed to the console. - * - * Output: - * Depending on the boolean flags provided: - * - If `save_csv` is true, the query outputs a CSV file at `file_path` - * - If `print_results` is true, the query prints the results directly to the console. - */ - - MaxAccum @txn_max_amount; - //FILE output_file(file_path); - MaxAccum @txn_amt; - MapAccum> @txn_map; - - community = - SELECT c - FROM (c:Community) -[e1:Has_Community]- (:community_type) -[e2]- (t:Payment_Transaction) - ACCUM c.@txn_map += (t.id -> t.amount) - POST-ACCUM - FOREACH (tid,amount) IN c.@txn_map DO - c.@txn_amt += amount - END - ; - - community = - SELECT c - FROM (c:Community) -[e1:Has_Community]- (:community_type) -[e2]- (t:Payment_Transaction) - ACCUM t.@txn_max_amount += c.@txn_amt - POST-ACCUM t.setAttr(result_attribute,t.@txn_max_amount) - ; - - IF print_results == TRUE THEN - PRINT community[community.@txn_amt]; - END; + /* + Query Overview: + This is a feature engineering query: this query calculates the maximum transaction amount for each community + (e.g., Merchant or Card community) and stores the feature as [attribute name: result_attribute] on the + linked Payment_Transaction vertices. - /* - IF save_csv == TRUE THEN - PRINT community.@count.size() TO_CSV output_file; - END; - */ + Feature Storage Approaches: + - Approach 1: Store features as attributes of the Payment_Transaction vertex, allowing downstream ML models to access features directly from these attributes. + - Approach 2: Output features to CSV files, which downstream ML models can read for feature access. + + Note: This solution kit uses the first approach, storing features as attributes on the Payment_Transaction vertex for easier demonstration. -} \ No newline at end of file + Parameters: + - community_type: Specifies the community type (e.g., "Merchant" or "Card"). + - result_attribute: Specifies the name of the attribute to store the result (e.g., "com_mer_txn_max_amt" or "com_cd_txn_max_amt"). + - print_results: Boolean flag to control whether results are printed to the console. + - save_csv (commented out): When enabled, saves output to a CSV file at the specified file_path. + */ + + MaxAccum @txn_max_amount; + // FILE output_file(file_path); + MaxAccum @txn_amt; + MapAccum> @txn_map; + + // Accumulate the maximum transaction amount for each community + community = + SELECT c + FROM (c:Community) -[e1:Has_Community]- (:community_type) -[e2]- (t:Payment_Transaction) + ACCUM c.@txn_map += (t.id -> t.amount) + POST-ACCUM + FOREACH (tid, amount) IN c.@txn_map DO + c.@txn_amt += amount + END; + + // Store the maximum transaction amount as an attribute on Payment_Transaction vertices + community = + SELECT c + FROM (c:Community) -[e1:Has_Community]- (:community_type) -[e2]- (t:Payment_Transaction) + ACCUM t.@txn_max_amount += c.@txn_amt + POST-ACCUM t.setAttr(result_attribute, t.@txn_max_amount); + + // Optionally print results if the print_results flag is true + IF print_results == TRUE THEN + PRINT community[community.@txn_amt]; + END; + + /* + IF save_csv == TRUE THEN + PRINT community.@count.size() TO_CSV output_file; + END; + */ +} diff --git a/financial_crime/transaction_fraud/queries/community_transaction_min_amount.gsql b/financial_crime/transaction_fraud/queries/community_transaction_min_amount.gsql index 98520df6..9e8c78d5 100644 --- a/financial_crime/transaction_fraud/queries/community_transaction_min_amount.gsql +++ b/financial_crime/transaction_fraud/queries/community_transaction_min_amount.gsql @@ -1,57 +1,61 @@ USE GRAPH Transaction_Fraud CREATE OR REPLACE DISTRIBUTED QUERY community_transaction_min_amount( - //STRING file_path = "/home/tigergraph/gsql_output/print_example.csv", - //STRING file_path = "s3://bucket-name/path/print_example_file.txt", - //bool save_csv=false, - STRING community_type = "Merchant", //change to "Card" when run query on Card Community - STRING result_attribute = "com_mer_txn_min_amt", //change to "com_cd_txn_min_amt" when run query on Card Community - bool print_results=false + // STRING file_path = "/home/tigergraph/gsql_output/print_example.csv", + // STRING file_path = "s3://bucket-name/path/print_example_file.txt", + // bool save_csv = false, + STRING community_type = "Merchant", // change to "Card" when running query on Card Community + STRING result_attribute = "com_mer_txn_min_amt", // change to "com_cd_txn_min_amt" when running on Card Community + bool print_results = false ) FOR GRAPH Transaction_Fraud SYNTAX V3 { - /** - * Feature: - * This query counts the minimum amount of transactions for community. - * - * Parameters: - * - file_path: Specifies the path to the output CSV file where the results will be saved if `save_csv` is true. - * - save_csv: A boolean parameter that determines whether the results are saved to a CSV file at `file_path`. - * - print_results: A boolean parameter that controls whether the results are printed to the console. - * - * Output: - * Depending on the boolean flags provided: - * - If `save_csv` is true, the query outputs a CSV file at `file_path` - * - If `print_results` is true, the query prints the results directly to the console. - */ - - MinAccum @txn_min_amount; - //FILE output_file(file_path); - MinAccum @txn_amt; - MapAccum> @txn_map; - - community = - SELECT c - FROM (c:Community) -[e1:Has_Community]- (:community_type) -[e2]- (t:Payment_Transaction) - ACCUM c.@txn_map += (t.id -> t.amount) - POST-ACCUM - FOREACH (tid,amount) IN c.@txn_map DO - c.@txn_amt += amount - END - ; - - community = - SELECT c - FROM (c:Community) -[e1:Has_Community]- (:community_type) -[e2]- (t:Payment_Transaction) - ACCUM t.@txn_min_amount += c.@txn_amt - POST-ACCUM t.setAttr(result_attribute,t.@txn_min_amount) - ; - - IF print_results == TRUE THEN - PRINT community[community.@txn_amt]; - END; + /* + Query Overview: + This is a feature engineering query: this query calculates the minimum transaction amount for each community + (e.g., Merchant or Card community) and stores the feature as [attribute name: result_attribute] on the + linked Payment_Transaction vertices. - /* - IF save_csv == TRUE THEN - PRINT community.@count.size() TO_CSV output_file; - END; - */ + Feature Storage Approaches: + - Approach 1: Store features as attributes of the Payment_Transaction vertex, allowing downstream ML models to access features directly from these attributes. + - Approach 2: Output features to CSV files, which downstream ML models can read for feature access. + + Note: This solution kit uses the first approach, storing features as attributes on the Payment_Transaction vertex for easier demonstration. -} \ No newline at end of file + Parameters: + - community_type: Specifies the community type (e.g., "Merchant" or "Card"). + - result_attribute: Specifies the name of the attribute to store the result (e.g., "com_mer_txn_min_amt" or "com_cd_txn_min_amt"). + - print_results: Boolean flag to control whether results are printed to the console. + - save_csv (commented out): When enabled, saves output to a CSV file at the specified file_path. + */ + + MinAccum @txn_min_amount; + // FILE output_file(file_path); + MinAccum @txn_amt; + MapAccum> @txn_map; + + // Accumulate the minimum transaction amount for each community + community = + SELECT c + FROM (c:Community) -[e1:Has_Community]- (:community_type) -[e2]- (t:Payment_Transaction) + ACCUM c.@txn_map += (t.id -> t.amount) + POST-ACCUM + FOREACH (tid, amount) IN c.@txn_map DO + c.@txn_amt += amount + END; + + // Store the minimum transaction amount as an attribute on Payment_Transaction vertices + community = + SELECT c + FROM (c:Community) -[e1:Has_Community]- (:community_type) -[e2]- (t:Payment_Transaction) + ACCUM t.@txn_min_amount += c.@txn_amt + POST-ACCUM t.setAttr(result_attribute, t.@txn_min_amount); + + // Optionally print results if the print_results flag is true + IF print_results == TRUE THEN + PRINT community[community.@txn_amt]; + END; + + /* + IF save_csv == TRUE THEN + PRINT community.@count.size() TO_CSV output_file; + END; + */ +} diff --git a/financial_crime/transaction_fraud/queries/community_transaction_total_amount.gsql b/financial_crime/transaction_fraud/queries/community_transaction_total_amount.gsql index a3dbe194..4b3c8227 100644 --- a/financial_crime/transaction_fraud/queries/community_transaction_total_amount.gsql +++ b/financial_crime/transaction_fraud/queries/community_transaction_total_amount.gsql @@ -1,57 +1,61 @@ USE GRAPH Transaction_Fraud CREATE OR REPLACE DISTRIBUTED QUERY community_transaction_total_amount( - //STRING file_path = "/home/tigergraph/gsql_output/print_example.csv", - //STRING file_path = "s3://bucket-name/path/print_example_file.txt", - //bool save_csv=false, - STRING community_type = "Merchant", //change to "Card" when run query on Card Community - STRING result_attribute = "com_mer_txn_total_amt", //change to "com_cd_txn_total_amt" when run query on Card Community - bool print_results=false + // STRING file_path = "/home/tigergraph/gsql_output/print_example.csv", + // STRING file_path = "s3://bucket-name/path/print_example_file.txt", + // bool save_csv = false, + STRING community_type = "Merchant", // change to "Card" when running query on Card Community + STRING result_attribute = "com_mer_txn_total_amt", // change to "com_cd_txn_total_amt" when running on Card Community + bool print_results = false ) FOR GRAPH Transaction_Fraud SYNTAX V3 { - /** - * Feature: - * This query counts the total amount of transactions for community. - * - * Parameters: - * - file_path: Specifies the path to the output CSV file where the results will be saved if `save_csv` is true. - * - save_csv: A boolean parameter that determines whether the results are saved to a CSV file at `file_path`. - * - print_results: A boolean parameter that controls whether the results are printed to the console. - * - * Output: - * Depending on the boolean flags provided: - * - If `save_csv` is true, the query outputs a CSV file at `file_path` - * - If `print_results` is true, the query prints the results directly to the console. - */ - - MinAccum @txn_total_amount; - //FILE output_file(file_path); - SumAccum @txn_amt; - MapAccum> @txn_map; - - community = - SELECT c - FROM (c:Community) -[e1:Has_Community]- (:community_type) -[e2]- (t:Payment_Transaction) - ACCUM c.@txn_map += (t.id -> t.amount) - POST-ACCUM - FOREACH (tid,amount) IN c.@txn_map DO - c.@txn_amt += amount - END - ; - - community = - SELECT c - FROM (c:Community) -[e1:Has_Community]- (:community_type) -[e2]- (t:Payment_Transaction) - ACCUM t.@txn_total_amount += c.@txn_amt - POST-ACCUM t.setAttr(result_attribute,t.@txn_total_amount) - ; - - IF print_results == TRUE THEN - PRINT community[community.@txn_amt]; - END; + /* + Query Overview: + This is a feature engineering query: this query calculates the total transaction amount for each community + (e.g., Merchant or Card community) and stores the feature as [attribute name: result_attribute] on the + linked Payment_Transaction vertices. - /* - IF save_csv == TRUE THEN - PRINT community.@count.size() TO_CSV output_file; - END; - */ + Feature Storage Approaches: + - Approach 1: Store features as attributes of the Payment_Transaction vertex, allowing downstream ML models to access features directly from these attributes. + - Approach 2: Output features to CSV files, which downstream ML models can read for feature access. + + Note: This solution kit uses the first approach, storing features as attributes on the Payment_Transaction vertex for easier demonstration. -} \ No newline at end of file + Parameters: + - community_type: Specifies the community type (e.g., "Merchant" or "Card"). + - result_attribute: Specifies the name of the attribute to store the result (e.g., "com_mer_txn_total_amt" or "com_cd_txn_total_amt"). + - print_results: Boolean flag to control whether results are printed to the console. + - save_csv (commented out): When enabled, saves output to a CSV file at the specified file_path. + */ + + MinAccum @txn_total_amount; + // FILE output_file(file_path); + SumAccum @txn_amt; + MapAccum> @txn_map; + + // Accumulate the total transaction amount for each community + community = + SELECT c + FROM (c:Community) -[e1:Has_Community]- (:community_type) -[e2]- (t:Payment_Transaction) + ACCUM c.@txn_map += (t.id -> t.amount) + POST-ACCUM + FOREACH (tid, amount) IN c.@txn_map DO + c.@txn_amt += amount + END; + + // Store the total transaction amount as an attribute on Payment_Transaction vertices + community = + SELECT c + FROM (c:Community) -[e1:Has_Community]- (:community_type) -[e2]- (t:Payment_Transaction) + ACCUM t.@txn_total_amount += c.@txn_amt + POST-ACCUM t.setAttr(result_attribute, t.@txn_total_amount); + + // Optionally print results if the print_results flag is true + IF print_results == TRUE THEN + PRINT community[community.@txn_amt]; + END; + + /* + IF save_csv == TRUE THEN + PRINT community.@count.size() TO_CSV output_file; + END; + */ +} diff --git a/financial_crime/transaction_fraud/queries/degrees.gsql b/financial_crime/transaction_fraud/queries/degrees.gsql index 736f90e7..2d786317 100644 --- a/financial_crime/transaction_fraud/queries/degrees.gsql +++ b/financial_crime/transaction_fraud/queries/degrees.gsql @@ -1,24 +1,31 @@ USE GRAPH Transaction_Fraud CREATE OR REPLACE QUERY degrees() FOR GRAPH Transaction_Fraud SYNTAX V3 { - - /** - * Description: The query calculates the merchant in-degree and card out_degree as features - of Payment_Transaction vertices. - * - */ + /* + Query Overview: + This is a feature engineering query: the query calculates the in-degree for Merchants (number of transactions received) + and the out-degree for Cards (number of transactions sent). These values are then stored as features on + Payment_Transaction vertices with attributes [indegree and outdegree]. + + Feature Storage Approaches: + - Approach 1: Store features as attributes of the Payment_Transaction vertex, allowing downstream ML models to access features directly from these attributes. + - Approach 2: Output features to CSV files, which downstream ML models can read for feature access. + + Note: This solution kit uses the first approach, storing features as attributes on the Payment_Transaction vertex for easier demonstration. + */ SumAccum @in_degree; SumAccum @out_degree; - MinAccum @in,@out; + MinAccum @in, @out; + // Calculate in-degree for Merchant vertices and out-degree for Card vertices res1 = SELECT p FROM (p:Merchant) -[e]- (q:Card) ACCUM p.@in_degree += 1, - q.@out_degree += 1 - ; - + q.@out_degree += 1; + + // Propagate in-degree and out-degree to Payment_Transaction vertices res2 = SELECT t FROM (m:Merchant) <-[:Merchant_Receive_Transaction]- (t:Payment_Transaction) <-[:Card_Send_Transaction]- (c:Card) @@ -27,9 +34,7 @@ CREATE OR REPLACE QUERY degrees() FOR GRAPH Transaction_Fraud SYNTAX V3 { t.@out = c.@out_degree POST-ACCUM t.setAttr("indegree", t.@in), - t.setAttr("outdegree",t.@out) - ; - - PRINT "Degrees computed Successfully" as Status; + t.setAttr("outdegree", t.@out); -} \ No newline at end of file + PRINT "Degrees computed Successfully" AS Status; +} diff --git a/financial_crime/transaction_fraud/queries/gender.gsql b/financial_crime/transaction_fraud/queries/gender.gsql index 870049be..ab32dd8d 100644 --- a/financial_crime/transaction_fraud/queries/gender.gsql +++ b/financial_crime/transaction_fraud/queries/gender.gsql @@ -1,14 +1,22 @@ USE GRAPH Transaction_Fraud -CREATE OR REPLACE DISTRIBUTED QUERY gender() FOR GRAPH Transaction_Fraud SYNTAX V3{ - - // Feature: This query propagate the gender information of the card owner to Payment_Transaction vertex. +CREATE OR REPLACE DISTRIBUTED QUERY gender() FOR GRAPH Transaction_Fraud SYNTAX V3 { + /* + Query Overview: + This is a feature engineering query: this query propagates the gender information of the card owner + to the linked Payment_Transaction vertices and stores the feature as [attribute name: gender] on the + linked Payment_Transaction vertices. - MinAccum @gender; + Feature Storage Approaches: + - Approach 1: Store features as attributes of the Payment_Transaction vertex, allowing downstream ML models to access features directly from these attributes. + - Approach 2: Output features to CSV files, which downstream ML models can read for feature access. + + Note: This solution kit uses the first approach, storing features as attributes on the Payment_Transaction vertex for easier demonstration. + */ - rlt = SELECT s - FROM (s:Payment_Transaction) -[]- (:Card) -[]- (t:Party) - ACCUM s.@gender = t.gender - POST-ACCUM s.setAttr("gender",s.@gender) - ; + MinAccum @gender; -} \ No newline at end of file + rlt = SELECT s + FROM (s:Payment_Transaction) -[]- (:Card) -[]- (t:Party) + ACCUM s.@gender = t.gender + POST-ACCUM s.setAttr("gender", s.@gender); +} diff --git a/financial_crime/transaction_fraud/queries/insights_show_shortest_path.gsql b/financial_crime/transaction_fraud/queries/insights_show_shortest_path.gsql index 0fc0f11c..ef8e79a6 100644 --- a/financial_crime/transaction_fraud/queries/insights_show_shortest_path.gsql +++ b/financial_crime/transaction_fraud/queries/insights_show_shortest_path.gsql @@ -1,6 +1,8 @@ CREATE OR REPLACE DISTRIBUTED QUERY insights_show_shortest_path( Vertex ver) FOR GRAPH Transaction_Fraud SYNTAX v3 { - + // This is an Insights Query to support shortest path app. This query is better to be installed to provide better performance. + + SetAccum @@fraud; OrAccum @or_visted; ListAccum @path_list; diff --git a/financial_crime/transaction_fraud/queries/mer_shortest_path_length.gsql b/financial_crime/transaction_fraud/queries/mer_shortest_path_length.gsql index 0970f5ba..480dd6c4 100644 --- a/financial_crime/transaction_fraud/queries/mer_shortest_path_length.gsql +++ b/financial_crime/transaction_fraud/queries/mer_shortest_path_length.gsql @@ -3,7 +3,15 @@ CREATE QUERY mer_shortest_path_length ( int max_hops = 5, VERTEX ver ) FOR GRAPH Transaction_Fraud RETURNS(INT) SYNTAX V3 { + /* + This is a sub-query called by query all_shortest_path_length. The sub-query calculates the shortest path length from the merchant to + a fraudulent transaction, providing a measure of proximity to fraud risk. Since each transaction is associated with only one merchant, the distance + from the merchant to its nearest fraudulent transaction is effectively the transaction itself, allowing for significant memory savings. + This query stores the feature as [attribute name: shortest_path_length] on the linked Payment_Transaction vertices. + */ + + OrAccum @or_visited; MinAccum @path_len; SetAccum @@target_vertices; diff --git a/financial_crime/transaction_fraud/queries/merchant_category.gsql b/financial_crime/transaction_fraud/queries/merchant_category.gsql index 0300450b..217f93c8 100644 --- a/financial_crime/transaction_fraud/queries/merchant_category.gsql +++ b/financial_crime/transaction_fraud/queries/merchant_category.gsql @@ -1,15 +1,21 @@ USE GRAPH Transaction_Fraud CREATE OR REPLACE DISTRIBUTED QUERY merchant_category(/* Parameters here */) FOR GRAPH Transaction_Fraud SYNTAX v3 { - /* - This query assigns the merchant's category to the 'feature' attribute of linked Payment_Transaction - vertices, updating transaction data with merchant category information - */ - - MinAccum @mer_cat; - rlt = SELECT s - FROM (s:Merchant_Category) -[:Merchant_Assigned]- (m:Merchant) - <-[:Merchant_Receive_Transaction]- (t:Payment_Transaction) - ACCUM t.@mer_cat += s.category - POST-ACCUM t.setAttr("mer_cat", t.@mer_cat); - -} \ No newline at end of file + /* + Query Overview: + This is a feature engineering query: this query aggregates the feature [merchant category] for the linked Payment_Transaction vertices + and stores the feature as [attribute name: mer_cat] of the linked Payment_Transaction vertices. + + Feature Storage Approaches: + - Approach 1: Store features as attributes of the Payment_Transaction vertex, allowing downstream ML models to access features directly from these attributes. + - Approach 2: Output features to CSV files, which downstream ML models can read for feature access. + + Note: This solution kit uses the first approach, storing features as attributes on the Payment_Transaction vertex for easier demonstration. + + */ + + MinAccum @mer_cat; + rlt = SELECT s + FROM (s:Merchant_Category) -[:Merchant_Assigned]- (m:Merchant) <-[:Merchant_Receive_Transaction]- (t:Payment_Transaction) + ACCUM t.@mer_cat += s.category + POST-ACCUM t.setAttr("mer_cat", t.@mer_cat); +} diff --git a/financial_crime/transaction_fraud/queries/merchant_category_transaction_average_amount.gsql b/financial_crime/transaction_fraud/queries/merchant_category_transaction_average_amount.gsql index 9c7a8c4c..69d70b60 100644 --- a/financial_crime/transaction_fraud/queries/merchant_category_transaction_average_amount.gsql +++ b/financial_crime/transaction_fraud/queries/merchant_category_transaction_average_amount.gsql @@ -1,51 +1,57 @@ USE GRAPH Transaction_Fraud CREATE OR REPLACE DISTRIBUTED QUERY merchant_category_transaction_average_amount( -//STRING file_path = "/home/tigergraph/gsql_output/print_example.csv", -//STRING file_path = "s3://bucket-name/path/print_example_file.txt", -//bool save_csv=false, - bool print_results=false -) FOR GRAPH Transaction_Fraud SYNTAX v3 { - /** - * Feature: - * This query counts the average amount of transactions for each merchant category. - * Results are then updated to feature attribute of Payment_Transaction vertices. - * Parameters: - * - file_path: Specifies the path to the output CSV file where the results will be saved if `save_csv` is true. - * - save_csv: A boolean parameter that determines whether the results are saved to a CSV file at `file_path`. - * - print_results: A boolean parameter that controls whether the results are printed to the console. - * - * Output: - * Depending on the boolean flags provided: - * - If `save_csv` is true, the query outputs a CSV file at `file_path` - * - If `print_results` is true, the query prints the results directly to the console. - */ - - MinAccum @txn_avg_amount; - //FILE output_file(file_path); - AvgAccum @txn_amt; - MapAccum> @txn_map; - - mer_cat = - SELECT c - FROM (c:Merchant_Category) -[e1:Merchant_Assigned]- (:Merchant) -[]- (t:Payment_Transaction) - ACCUM c.@txn_map += (t.id -> t.amount) - POST-ACCUM - FOREACH (tid, amount) in c.@txn_map DO - c.@txn_amt += amount - END; - - mer_cat = - SELECT c - FROM (c:Merchant_Category) -[e1:Merchant_Assigned]- (:Merchant) -[]- (t:Payment_Transaction) - ACCUM t.@txn_avg_amount += c.@txn_amt - POST-ACCUM t.setAttr("mer_cat_avg_amt", t.@txn_avg_amount); - - IF print_results == TRUE THEN - PRINT mer_cat[mer_cat.@txn_amt]; - END; - /* - IF save_csv == TRUE THEN - PRINT community.@count.size() TO_CSV output_file; - END; - */ -} \ No newline at end of file + // STRING file_path = "/home/tigergraph/gsql_output/print_example.csv", + // STRING file_path = "s3://bucket-name/path/print_example_file.txt", + // bool save_csv = false, + bool print_results = false +) FOR GRAPH Transaction_Fraud SYNTAX V3 { + /* + Query Overview: + This is a feature engineering query: this query calculates the average transaction amount for each merchant + category and stores the result as a feature on Payment_Transaction vertices with the attribute + [mer_cat_avg_amt]. + + Feature Storage Approaches: + - Approach 1: Store features as attributes of the Payment_Transaction vertex, allowing downstream ML models to access features directly from these attributes. + - Approach 2: Output features to CSV files, which downstream ML models can read for feature access. + + Note: This solution kit uses the first approach, storing features as attributes on the Payment_Transaction vertex for easier demonstration. + + Parameters: + - print_results: Boolean flag to control whether results are printed to the console. + - save_csv (commented out): When enabled, saves output to a CSV file at the specified file_path. + */ + + MinAccum @txn_avg_amount; + // FILE output_file(file_path); + AvgAccum @txn_amt; + MapAccum> @txn_map; + + // Calculate average transaction amount for each merchant category + mer_cat = + SELECT c + FROM (c:Merchant_Category) -[e1:Merchant_Assigned]- (:Merchant) -[]- (t:Payment_Transaction) + ACCUM c.@txn_map += (t.id -> t.amount) + POST-ACCUM + FOREACH (tid, amount) IN c.@txn_map DO + c.@txn_amt += amount + END; + + // Store the average transaction amount as an attribute on Payment_Transaction vertices + mer_cat = + SELECT c + FROM (c:Merchant_Category) -[e1:Merchant_Assigned]- (:Merchant) -[]- (t:Payment_Transaction) + ACCUM t.@txn_avg_amount += c.@txn_amt + POST-ACCUM t.setAttr("mer_cat_avg_amt", t.@txn_avg_amount); + + // Optionally print results if the print_results flag is true + IF print_results == TRUE THEN + PRINT mer_cat[mer_cat.@txn_amt]; + END; + + /* + IF save_csv == TRUE THEN + PRINT community.@count.size() TO_CSV output_file; + END; + */ +} diff --git a/financial_crime/transaction_fraud/queries/merchant_category_transaction_count.gsql b/financial_crime/transaction_fraud/queries/merchant_category_transaction_count.gsql index 7d1bae8a..cfdd822a 100644 --- a/financial_crime/transaction_fraud/queries/merchant_category_transaction_count.gsql +++ b/financial_crime/transaction_fraud/queries/merchant_category_transaction_count.gsql @@ -1,46 +1,52 @@ USE GRAPH Transaction_Fraud CREATE OR REPLACE DISTRIBUTED QUERY merchant_category_transaction_count( -//STRING file_path = "/home/tigergraph/gsql_output/print_example.csv", -//STRING file_path = "s3://bucket-name/path/print_example_file.txt", -//bool save_csv=false, - bool print_results=false -) FOR GRAPH Transaction_Fraud SYNTAX v3 { - /** - * Feature: - * This query counts total number of transactions for each merchant category. - * Results are then updated to feature attribute of Payment_Transaction vertices. - * Parameters: - * - file_path: Specifies the path to the output CSV file where the results will be saved if `save_csv` is true. - * - save_csv: A boolean parameter that determines whether the results are saved to a CSV file at `file_path`. - * - print_results: A boolean parameter that controls whether the results are printed to the console. - * - * Output: - * Depending on the boolean flags provided: - * - If `save_csv` is true, the query outputs a CSV file at `file_path` - * - If `print_results` is true, the query prints the results directly to the console. - */ - - SetAccum @txn_set; - MaxAccum @txn_cnt; - //FILE output_file(file_path); - - mer_cat = - SELECT c - FROM (c:Merchant_Category) -[e1:Merchant_Assigned]- (:Merchant) -[]- (t:Payment_Transaction) - ACCUM c.@txn_set += t; - - mer_cat = - SELECT c - FROM (c:Merchant_Category) -[e1:Merchant_Assigned]- (:Merchant) -[]- (t:Payment_Transaction) - ACCUM t.@txn_cnt += c.@txn_set.size() - POST-ACCUM t.setAttr("mer_cat_cnt", t.@txn_cnt); - - IF print_results == TRUE THEN - PRINT mer_cat[mer_cat.@txn_set.size()]; - END; - /* - IF save_csv == TRUE THEN - PRINT community.@count.size() TO_CSV output_file; - END; - */ -} \ No newline at end of file + // STRING file_path = "/home/tigergraph/gsql_output/print_example.csv", + // STRING file_path = "s3://bucket-name/path/print_example_file.txt", + // bool save_csv = false, + bool print_results = false +) FOR GRAPH Transaction_Fraud SYNTAX V3 { + /* + Query Overview: + This is a feature engineering query: this query counts the total number of transactions for each merchant + category and stores the result as a feature on Payment_Transaction vertices with the attribute + [mer_cat_cnt]. + + Feature Storage Approaches: + - Approach 1: Store features as attributes of the Payment_Transaction vertex, allowing downstream ML models to access features directly from these attributes. + - Approach 2: Output features to CSV files, which downstream ML models can read for feature access. + + Note: This solution kit uses the first approach, storing features as attributes on the Payment_Transaction vertex for easier demonstration. + + Parameters: + - print_results: Boolean flag to control whether results are printed to the console. + - save_csv (commented out): When enabled, saves output to a CSV file at the specified file_path. + */ + + SetAccum @txn_set; + MaxAccum @txn_cnt; + // FILE output_file(file_path); + + // Accumulate transaction count for each merchant category + mer_cat = + SELECT c + FROM (c:Merchant_Category) -[e1:Merchant_Assigned]- (:Merchant) -[]- (t:Payment_Transaction) + ACCUM c.@txn_set += t; + + // Store the transaction count as an attribute on Payment_Transaction vertices + mer_cat = + SELECT c + FROM (c:Merchant_Category) -[e1:Merchant_Assigned]- (:Merchant) -[]- (t:Payment_Transaction) + ACCUM t.@txn_cnt += c.@txn_set.size() + POST-ACCUM t.setAttr("mer_cat_cnt", t.@txn_cnt); + + // Optionally print results if the print_results flag is true + IF print_results == TRUE THEN + PRINT mer_cat[mer_cat.@txn_set.size()]; + END; + + /* + IF save_csv == TRUE THEN + PRINT community.@count.size() TO_CSV output_file; + END; + */ +} diff --git a/financial_crime/transaction_fraud/queries/merchant_category_transaction_maximum_amount.gsql b/financial_crime/transaction_fraud/queries/merchant_category_transaction_maximum_amount.gsql index 9b87c89d..4273ef39 100644 --- a/financial_crime/transaction_fraud/queries/merchant_category_transaction_maximum_amount.gsql +++ b/financial_crime/transaction_fraud/queries/merchant_category_transaction_maximum_amount.gsql @@ -1,52 +1,57 @@ USE GRAPH Transaction_Fraud CREATE OR REPLACE DISTRIBUTED QUERY merchant_category_transaction_maximum_amount( -//STRING file_path = "/home/tigergraph/gsql_output/print_example.csv", -//STRING file_path = "s3://bucket-name/path/print_example_file.txt", -//bool save_csv=false, - bool print_results=false -) FOR GRAPH Transaction_Fraud SYNTAX v3 { - /** - * Feature: - * This query counts the maximum amount of transactions for each merchant category. - * Results are then updated to feature attribute of Payment_Transaction vertices. - * - * Parameters: - * - file_path: Specifies the path to the output CSV file where the results will be saved if `save_csv` is true. - * - save_csv: A boolean parameter that determines whether the results are saved to a CSV file at `file_path`. - * - print_results: A boolean parameter that controls whether the results are printed to the console. - * - * Output: - * Depending on the boolean flags provided: - * - If `save_csv` is true, the query outputs a CSV file at `file_path` - * - If `print_results` is true, the query prints the results directly to the console. - */ - - MaxAccum @txn_max_amount; - //FILE output_file(file_path); - MaxAccum @txn_amt; - MapAccum> @txn_map; - - mer_cat = - SELECT c - FROM (c:Merchant_Category) -[e1:Merchant_Assigned]- (:Merchant) -[]- (t:Payment_Transaction) - ACCUM c.@txn_map += (t.id -> t.amount) - POST-ACCUM - FOREACH (tid, amount) in c.@txn_map DO - c.@txn_amt += amount - END; - - mer_cat = - SELECT c - FROM (c:Merchant_Category) -[e1:Merchant_Assigned]- (:Merchant) -[]- (t:Payment_Transaction) - ACCUM t.@txn_max_amount += c.@txn_amt - POST-ACCUM t.setAttr("mer_cat_max_amt", t.@txn_max_amount); - - IF print_results == TRUE THEN - PRINT mer_cat[mer_cat.@txn_amt]; - END; - /* - IF save_csv == TRUE THEN - PRINT community.@count.size() TO_CSV output_file; - END; - */ -} \ No newline at end of file + // STRING file_path = "/home/tigergraph/gsql_output/print_example.csv", + // STRING file_path = "s3://bucket-name/path/print_example_file.txt", + // bool save_csv = false, + bool print_results = false +) FOR GRAPH Transaction_Fraud SYNTAX V3 { + /* + Query Overview: + This is a feature engineering query: this query calculates the maximum transaction amount for each merchant + category and stores the result as a feature on Payment_Transaction vertices with the attribute + [mer_cat_max_amt]. + + Feature Storage Approaches: + - Approach 1: Store features as attributes of the Payment_Transaction vertex, allowing downstream ML models to access features directly from these attributes. + - Approach 2: Output features to CSV files, which downstream ML models can read for feature access. + + Note: This solution kit uses the first approach, storing features as attributes on the Payment_Transaction vertex for easier demonstration. + + Parameters: + - print_results: Boolean flag to control whether results are printed to the console. + - save_csv (commented out): When enabled, saves output to a CSV file at the specified file_path. + */ + + MaxAccum @txn_max_amount; + // FILE output_file(file_path); + MaxAccum @txn_amt; + MapAccum> @txn_map; + + // Calculate the maximum transaction amount for each merchant category + mer_cat = + SELECT c + FROM (c:Merchant_Category) -[e1:Merchant_Assigned]- (:Merchant) -[]- (t:Payment_Transaction) + ACCUM c.@txn_map += (t.id -> t.amount) + POST-ACCUM + FOREACH (tid, amount) IN c.@txn_map DO + c.@txn_amt += amount + END; + + // Store the maximum transaction amount as an attribute on Payment_Transaction vertices + mer_cat = + SELECT c + FROM (c:Merchant_Category) -[e1:Merchant_Assigned]- (:Merchant) -[]- (t:Payment_Transaction) + ACCUM t.@txn_max_amount += c.@txn_amt + POST-ACCUM t.setAttr("mer_cat_max_amt", t.@txn_max_amount); + + // Optionally print results if the print_results flag is true + IF print_results == TRUE THEN + PRINT mer_cat[mer_cat.@txn_amt]; + END; + + /* + IF save_csv == TRUE THEN + PRINT community.@count.size() TO_CSV output_file; + END; + */ +} diff --git a/financial_crime/transaction_fraud/queries/merchant_category_transaction_min_amount.gsql b/financial_crime/transaction_fraud/queries/merchant_category_transaction_min_amount.gsql index 305daaf0..2b57911d 100644 --- a/financial_crime/transaction_fraud/queries/merchant_category_transaction_min_amount.gsql +++ b/financial_crime/transaction_fraud/queries/merchant_category_transaction_min_amount.gsql @@ -1,51 +1,57 @@ USE GRAPH Transaction_Fraud CREATE OR REPLACE DISTRIBUTED QUERY merchant_category_transaction_min_amount( -//STRING file_path = "/home/tigergraph/gsql_output/print_example.csv", -//STRING file_path = "s3://bucket-name/path/print_example_file.txt", -//bool save_csv=false, - bool print_results=false -) FOR GRAPH Transaction_Fraud SYNTAX v3 { - /** - * Feature: - * This query counts the minimum amount of transactions for each merchant category. - * Results are then updated to feature attribute of Payment_Transaction vertices. - * Parameters: - * - file_path: Specifies the path to the output CSV file where the results will be saved if `save_csv` is true. - * - save_csv: A boolean parameter that determines whether the results are saved to a CSV file at `file_path`. - * - print_results: A boolean parameter that controls whether the results are printed to the console. - * - * Output: - * Depending on the boolean flags provided: - * - If `save_csv` is true, the query outputs a CSV file at `file_path` - * - If `print_results` is true, the query prints the results directly to the console. - */ - - MinAccum @txn_min_amount; - //FILE output_file(file_path); - MinAccum @txn_amt; - MapAccum> @txn_map; - - mer_cat = - SELECT c - FROM (c:Merchant_Category) -[e1:Merchant_Assigned]- (:Merchant) -[]- (t:Payment_Transaction) - ACCUM c.@txn_map += (t.id -> t.amount) - POST-ACCUM - FOREACH (tid, amount) in c.@txn_map DO - c.@txn_amt += amount - END; - - mer_cat = - SELECT c - FROM (c:Merchant_Category) -[e1:Merchant_Assigned]- (:Merchant) -[]- (t:Payment_Transaction) - ACCUM t.@txn_min_amount += c.@txn_amt - POST-ACCUM t.setAttr("mer_cat_min_amt", t.@txn_min_amount); - - IF print_results == TRUE THEN - PRINT mer_cat[mer_cat.@txn_amt]; - END; - /* - IF save_csv == TRUE THEN - PRINT community.@count.size() TO_CSV output_file; - END; - */ -} \ No newline at end of file + // STRING file_path = "/home/tigergraph/gsql_output/print_example.csv", + // STRING file_path = "s3://bucket-name/path/print_example_file.txt", + // bool save_csv = false, + bool print_results = false +) FOR GRAPH Transaction_Fraud SYNTAX V3 { + /* + Query Overview: + This is a feature engineering query: this query calculates the minimum transaction amount for each merchant + category and stores the result as a feature on Payment_Transaction vertices with the attribute + [mer_cat_min_amt]. + + Feature Storage Approaches: + - Approach 1: Store features as attributes of the Payment_Transaction vertex, allowing downstream ML models to access features directly from these attributes. + - Approach 2: Output features to CSV files, which downstream ML models can read for feature access. + + Note: This solution kit uses the first approach, storing features as attributes on the Payment_Transaction vertex for easier demonstration. + + Parameters: + - print_results: Boolean flag to control whether results are printed to the console. + - save_csv (commented out): When enabled, saves output to a CSV file at the specified file_path. + */ + + MinAccum @txn_min_amount; + // FILE output_file(file_path); + MinAccum @txn_amt; + MapAccum> @txn_map; + + // Calculate the minimum transaction amount for each merchant category + mer_cat = + SELECT c + FROM (c:Merchant_Category) -[e1:Merchant_Assigned]- (:Merchant) -[]- (t:Payment_Transaction) + ACCUM c.@txn_map += (t.id -> t.amount) + POST-ACCUM + FOREACH (tid, amount) IN c.@txn_map DO + c.@txn_amt += amount + END; + + // Store the minimum transaction amount as an attribute on Payment_Transaction vertices + mer_cat = + SELECT c + FROM (c:Merchant_Category) -[e1:Merchant_Assigned]- (:Merchant) -[]- (t:Payment_Transaction) + ACCUM t.@txn_min_amount += c.@txn_amt + POST-ACCUM t.setAttr("mer_cat_min_amt", t.@txn_min_amount); + + // Optionally print results if the print_results flag is true + IF print_results == TRUE THEN + PRINT mer_cat[mer_cat.@txn_amt]; + END; + + /* + IF save_csv == TRUE THEN + PRINT community.@count.size() TO_CSV output_file; + END; + */ +} diff --git a/financial_crime/transaction_fraud/queries/merchant_category_transaction_total_amount.gsql b/financial_crime/transaction_fraud/queries/merchant_category_transaction_total_amount.gsql index 4b2a8774..422f95e9 100644 --- a/financial_crime/transaction_fraud/queries/merchant_category_transaction_total_amount.gsql +++ b/financial_crime/transaction_fraud/queries/merchant_category_transaction_total_amount.gsql @@ -1,51 +1,57 @@ USE GRAPH Transaction_Fraud CREATE OR REPLACE DISTRIBUTED QUERY merchant_category_transaction_total_amount( -//STRING file_path = "/home/tigergraph/gsql_output/print_example.csv", -//STRING file_path = "s3://bucket-name/path/print_example_file.txt", -//bool save_csv=false, - bool print_results=false -) FOR GRAPH Transaction_Fraud SYNTAX v3 { - /** - * Feature: - * This query calculates the total transaction amount for merchant category. - * - * Parameters: - * - file_path: Specifies the path to the output CSV file where the results will be saved if `save_csv` is true. - * - save_csv: A boolean parameter that determines whether the results are saved to a CSV file at `file_path`. - * - print_results: A boolean parameter that controls whether the results are printed to the console. - * - * Output: - * Depending on the boolean flags provided: - * - If `save_csv` is true, the query outputs a CSV file at `file_path` - * - If `print_results` is true, the query prints the results directly to the console. - */ - - MinAccum @txn_total_amount; - //FILE output_file(file_path); - SumAccum @txn_amt; - MapAccum> @txn_map; - - mer_cat = - SELECT c - FROM (c:Merchant_Category) -[e1:Merchant_Assigned]- (:Merchant) -[]- (t:Payment_Transaction) - ACCUM c.@txn_map += (t.id -> t.amount) - POST-ACCUM - FOREACH (tid, amount) in c.@txn_map DO - c.@txn_amt += amount - END; - - mer_cat = - SELECT c - FROM (c:Merchant_Category) -[e1:Merchant_Assigned]- (:Merchant) -[]- (t:Payment_Transaction) - ACCUM t.@txn_total_amount += c.@txn_amt - POST-ACCUM t.setAttr("mer_cat_total_amt", t.@txn_total_amount); - - IF print_results == TRUE THEN - PRINT mer_cat[mer_cat.@txn_amt]; - END; - /* - IF save_csv == TRUE THEN - PRINT community.@count.size() TO_CSV output_file; - END; - */ -} \ No newline at end of file + // STRING file_path = "/home/tigergraph/gsql_output/print_example.csv", + // STRING file_path = "s3://bucket-name/path/print_example_file.txt", + // bool save_csv = false, + bool print_results = false +) FOR GRAPH Transaction_Fraud SYNTAX V3 { + /* + Query Overview: + This is a feature engineering query: this query calculates the total transaction amount for each merchant + category and stores the result as a feature on Payment_Transaction vertices with the attribute + [mer_cat_total_amt]. + + Feature Storage Approaches: + - Approach 1: Store features as attributes of the Payment_Transaction vertex, allowing downstream ML models to access features directly from these attributes. + - Approach 2: Output features to CSV files, which downstream ML models can read for feature access. + + Note: This solution kit uses the first approach, storing features as attributes on the Payment_Transaction vertex for easier demonstration. + + Parameters: + - print_results: Boolean flag to control whether results are printed to the console. + - save_csv (commented out): When enabled, saves output to a CSV file at the specified file_path. + */ + + MinAccum @txn_total_amount; + // FILE output_file(file_path); + SumAccum @txn_amt; + MapAccum> @txn_map; + + // Calculate the total transaction amount for each merchant category + mer_cat = + SELECT c + FROM (c:Merchant_Category) -[e1:Merchant_Assigned]- (:Merchant) -[]- (t:Payment_Transaction) + ACCUM c.@txn_map += (t.id -> t.amount) + POST-ACCUM + FOREACH (tid, amount) IN c.@txn_map DO + c.@txn_amt += amount + END; + + // Store the total transaction amount as an attribute on Payment_Transaction vertices + mer_cat = + SELECT c + FROM (c:Merchant_Category) -[e1:Merchant_Assigned]- (:Merchant) -[]- (t:Payment_Transaction) + ACCUM t.@txn_total_amount += c.@txn_amt + POST-ACCUM t.setAttr("mer_cat_total_amt", t.@txn_total_amount); + + // Optionally print results if the print_results flag is true + IF print_results == TRUE THEN + PRINT mer_cat[mer_cat.@txn_amt]; + END; + + /* + IF save_csv == TRUE THEN + PRINT community.@count.size() TO_CSV output_file; + END; + */ +} diff --git a/financial_crime/transaction_fraud/queries/merchant_has_frequent_transactions.gsql b/financial_crime/transaction_fraud/queries/merchant_has_frequent_transactions.gsql index 67c3f9cc..64686bc9 100644 --- a/financial_crime/transaction_fraud/queries/merchant_has_frequent_transactions.gsql +++ b/financial_crime/transaction_fraud/queries/merchant_has_frequent_transactions.gsql @@ -5,10 +5,13 @@ CREATE OR REPLACE DISTRIBUTED QUERY merchant_has_frequent_transactions( INT freq=3000) FOR GRAPH Transaction_Fraud SYNTAX v3 { /** - * Feature: + * This is an analytical query. * This query detects and returns all the merchants that has more than k transactions - within specified time period + within specified time period for data analytics. + The global accumulator @@edges are also printed to indicate fraudulent transactions and + its linked card. This is used as an installed query for Insights Application. */ + SumAccum @count; SetAccum @@edges; rlt = @@ -30,7 +33,6 @@ CREATE OR REPLACE DISTRIBUTED QUERY merchant_has_frequent_transactions( } UPDATE DESCRIPTION OF QUERY merchant_has_frequent_transactions "This query identifies and retrieves all merchants that have processed more than k transactions within a specified time period. It is used to detect potential fraudulent or unusual activity by identifying merchants with an exceptionally high volume of transactions, which may indicate a compromised merchant account or other forms of transactional fraud." - UPDATE DESCRIPTION OF QUERY_PARAM merchant_has_frequent_transactions.min_createTime "The earliest time to look back in history. Defaults to 2020-01-01" UPDATE DESCRIPTION OF QUERY_PARAM merchant_has_frequent_transactions.max_createTime "The latest time to look back in history. Defaults to 2024-01-01" UPDATE DESCRIPTION OF QUERY_PARAM merchant_has_frequent_transactions.freq "The threshold amount for transaction values. Defaults to 3000" \ No newline at end of file diff --git a/financial_crime/transaction_fraud/queries/merchant_has_large_total_amount.gsql b/financial_crime/transaction_fraud/queries/merchant_has_large_total_amount.gsql index 63f2b607..06c88205 100644 --- a/financial_crime/transaction_fraud/queries/merchant_has_large_total_amount.gsql +++ b/financial_crime/transaction_fraud/queries/merchant_has_large_total_amount.gsql @@ -6,10 +6,13 @@ CREATE OR REPLACE DISTRIBUTED QUERY merchant_has_large_total_amount( /** - * Feature: + * This is an analytical query. * This query detects and returns all the cards whose total transaction amount is - over m within specified time period - */ + over m within specified time period for data analytics. + The global accumulator @@edges are also printed to indicate fraudulent transactions and + its linked card. This is used as an installed query for Insights Application. + */ + SumAccum @totalAmount; rlt = @@ -30,7 +33,6 @@ SetAccum @@edges; } UPDATE DESCRIPTION OF QUERY merchant_has_large_total_amount "This query identifies and retrieves all merchants whose total transaction amount exceeds m dollars within a specified time period. It is used for detecting potential fraudulent or unusual business activity by highlighting merchants with exceptionally high total sales, which could indicate fraudulent transactions or money laundering activities." - UPDATE DESCRIPTION OF QUERY_PARAM merchant_has_large_total_amount.min_createTime "The earliest time to look back in history. Defaults to 2020-01-01" UPDATE DESCRIPTION OF QUERY_PARAM merchant_has_large_total_amount.max_createTime "The latest time to look back in history. Defaults to 2024-01-01" UPDATE DESCRIPTION OF QUERY_PARAM merchant_has_large_total_amount.m "The threshold amount for transaction values. Defaults to 200000" diff --git a/financial_crime/transaction_fraud/queries/merchant_with_single_large_transaction.gsql b/financial_crime/transaction_fraud/queries/merchant_with_single_large_transaction.gsql index 2e6b9353..44bf3888 100644 --- a/financial_crime/transaction_fraud/queries/merchant_with_single_large_transaction.gsql +++ b/financial_crime/transaction_fraud/queries/merchant_with_single_large_transaction.gsql @@ -5,10 +5,13 @@ CREATE OR REPLACE DISTRIBUTED QUERY merchant_with_single_large_transaction( DOUBLE m=10000) FOR GRAPH Transaction_Fraud SYNTAX v3 { /** - * Feature: + * This is an analytical query. * This query detects and returns all the merchants that has single transaction - with amount over m within specified time period + with amount over m within specified time period for data analytics. + The global accumulator @@edges are also printed to indicate fraudulent transactions and + its linked card. This is used as an installed query for Insights Application. */ + SetAccum @transaction; rlt = @@ -24,7 +27,6 @@ CREATE OR REPLACE DISTRIBUTED QUERY merchant_with_single_large_transaction( } UPDATE DESCRIPTION OF QUERY merchant_with_single_large_transaction "This query identifies and retrieves all merchants that have recorded a single transaction exceeding a specified amount m within a certain time frame. It is used to flag potential fraud or irregular activities by highlighting transactions that are unusually large for a given merchant, which could indicate suspicious or anomalous behavior." - UPDATE DESCRIPTION OF QUERY_PARAM merchant_with_single_large_transaction.min_createTime "The earliest time to look back in history. Defaults to 2020-01-01" UPDATE DESCRIPTION OF QUERY_PARAM merchant_with_single_large_transaction.max_createTime "The latest time to look back in history. Defaults to 2024-01-01" UPDATE DESCRIPTION OF QUERY_PARAM merchant_with_single_large_transaction.m "The threshold amount for transaction values. Defaults to 10000" \ No newline at end of file diff --git a/financial_crime/transaction_fraud/queries/number_of_repeated_card.gsql b/financial_crime/transaction_fraud/queries/number_of_repeated_card.gsql index 64b52c36..6d0e5608 100644 --- a/financial_crime/transaction_fraud/queries/number_of_repeated_card.gsql +++ b/financial_crime/transaction_fraud/queries/number_of_repeated_card.gsql @@ -1,47 +1,49 @@ USE GRAPH Transaction_Fraud CREATE OR REPLACE DISTRIBUTED QUERY number_of_repeated_card() FOR GRAPH Transaction_Fraud SYNTAX V3 { - - /* - Find the number of repeated cards for each merchant - */ - - // Key = sender id, Value = number of transactions from sender - MapAccum @count; - MinAccum @sender_id; - SumAccum @repeated; - MinAccum @cnt; - - // FILE f (path); - //f.println("id", "max_transactions_in_interval"); - - - // Store sender id on transaction - Transactions = SELECT t - FROM (a:Card)-[e:Card_Send_Transaction]->(t:Payment_Transaction) - ACCUM t.@sender_id += a.card_number - ; - - // Store sender ids on merchant vertex and count number of repeated senders - Merchants = SELECT m - FROM (t:Payment_Transaction)-[e:Merchant_Receive_Transaction]->(m:Merchant) - ACCUM - m.@count += (t.@sender_id -> 1) - POST-ACCUM - FOREACH (key, value) IN m.@count DO + /* + Query Overview: + This is a feature engineering query: this query calculates the number of repeated card transactions + for each merchant and stores the result as a feature on Payment_Transaction vertices with the attribute + [cnt_repeated_card]. + + Feature Storage Approaches: + - Approach 1: Store features as attributes of the Payment_Transaction vertex, allowing downstream ML models to access features directly from these attributes. + - Approach 2: Output features to CSV files, which downstream ML models can read for feature access. + + Note: This solution kit uses the first approach, storing features as attributes on the Payment_Transaction vertex for easier demonstration. + */ + + // Key = sender id, Value = number of transactions from sender + MapAccum @count; + MinAccum @sender_id; + SumAccum @repeated; + MinAccum @cnt; + + // Store sender id on transaction + Transactions = + SELECT t + FROM (a:Card) -[e:Card_Send_Transaction]-> (t:Payment_Transaction) + ACCUM t.@sender_id += a.card_number; + + // Store sender ids on merchant vertex and count number of repeated senders + Merchants = + SELECT m + FROM (t:Payment_Transaction) -[e:Merchant_Receive_Transaction]-> (m:Merchant) + ACCUM + m.@count += (t.@sender_id -> 1) + POST-ACCUM + FOREACH (key, value) IN m.@count DO IF value >= 2 THEN - m.@repeated += 1 + m.@repeated += 1 END - END - //f.println(m.id, repeated) - ; - - // Set the Payment_Transaction feature attributes - rlt = SELECT s - FROM (s:Merchant) <-[:Merchant_Receive_Transaction]- (t:Payment_Transaction) - ACCUM t.@cnt = s.@repeated - Post-ACCUM t.setAttr("cnt_repeated_card", t.@cnt) - ; - - PRINT rlt; - -} \ No newline at end of file + END; + + // Set the Payment_Transaction feature attributes + rlt = + SELECT s + FROM (s:Merchant) <-[:Merchant_Receive_Transaction]- (t:Payment_Transaction) + ACCUM t.@cnt = s.@repeated + POST-ACCUM t.setAttr("cnt_repeated_card", t.@cnt); + + PRINT rlt; +} diff --git a/financial_crime/transaction_fraud/queries/occupation.gsql b/financial_crime/transaction_fraud/queries/occupation.gsql index 5e311eec..4675ac5e 100644 --- a/financial_crime/transaction_fraud/queries/occupation.gsql +++ b/financial_crime/transaction_fraud/queries/occupation.gsql @@ -1,14 +1,22 @@ USE GRAPH Transaction_Fraud -CREATE OR REPLACE DISTRIBUTED QUERY occupation() FOR GRAPH Transaction_Fraud SYNTAX V3{ - - // Feature: This query propagate the occupation of the card owner to Payment_Transaction vertex. +CREATE OR REPLACE DISTRIBUTED QUERY occupation() FOR GRAPH Transaction_Fraud SYNTAX V3 { + /* + Query Overview: + This is a feature engineering query: this query propagates the occupation information of the card owner + to the linked Payment_Transaction vertices and stores the feature as [attribute name: occupation] on the + linked Payment_Transaction vertices. - MinAccum @occupation; + Feature Storage Approaches: + - Approach 1: Store features as attributes of the Payment_Transaction vertex, allowing downstream ML models to access features directly from these attributes. + - Approach 2: Output features to CSV files, which downstream ML models can read for feature access. + + Note: This solution kit uses the first approach, storing features as attributes on the Payment_Transaction vertex for easier demonstration. + */ - rlt = SELECT s - FROM (s:Payment_Transaction) -[]- (t:Card) - ACCUM s.@occupation = t.occupation - POST-ACCUM s.setAttr("occupation",s.@occupation) - ; + MinAccum @occupation; -} \ No newline at end of file + rlt = SELECT s + FROM (s:Payment_Transaction) -[]- (t:Card) + ACCUM s.@occupation = t.occupation + POST-ACCUM s.setAttr("occupation", s.@occupation); +} diff --git a/financial_crime/transaction_fraud/queries/search_any_vertex.gsql b/financial_crime/transaction_fraud/queries/search_any_vertex.gsql index 2ab23d70..7524c495 100644 --- a/financial_crime/transaction_fraud/queries/search_any_vertex.gsql +++ b/financial_crime/transaction_fraud/queries/search_any_vertex.gsql @@ -1,6 +1,6 @@ USE GRAPH Transaction_Fraud CREATE QUERY search_any_vertex(STRING keyword) FOR GRAPH Transaction_Fraud SYNTAX V3 { - + // This query allows user to search any vertex by its primary key STRING nameSearch = ""; STRING wildcard = "%"; nameSearch = wildcard + lower(keyword) + wildcard; diff --git a/financial_crime/transaction_fraud/queries/shortest_path_length_from_transaction.gsql b/financial_crime/transaction_fraud/queries/shortest_path_length_from_transaction.gsql deleted file mode 100644 index e6cce225..00000000 --- a/financial_crime/transaction_fraud/queries/shortest_path_length_from_transaction.gsql +++ /dev/null @@ -1,53 +0,0 @@ -USE GRAPH Transaction_Fraud -CREATE QUERY shortest_path_length_from_transaction ( - int max_hops = 5, - VERTEX ver -) FOR GRAPH Transaction_Fraud RETURNS(INT) SYNTAX V3 { - - /* - This query identifies the shortest path from any given transaction to a known fraudulent - transaction within the network. It measures the path length, considering the route - through either a Card or a Merchant and onto another transaction, as one unit of length. - It also a sub query that can be called by the all pair shortest path length query. - */ - - OrAccum @or_visited; - MinAccum @path_len; - SetAccum @@target_vertices; - MinAccum @@shortest_path_length; - - seed={ver}; - - start = SELECT s - FROM (s:seed) - POST-ACCUM - s.@path_len = 0, - s.@or_visited += TRUE, - IF s.is_fraud == 1 THEN - @@shortest_path_length += 0 - END - ; - - WHILE (start.size() > 0) LIMIT max_hops DO - start = SELECT tgt - FROM (s:start) -[e1:Merchant_Receive_Transaction|Card_Send_Transaction]- (c2) - -[e2:Merchant_Receive_Transaction|Card_Send_Transaction]- (tgt:Payment_Transaction) - WHERE tgt.@or_visited == FALSE - ACCUM - tgt.@path_len += s.@path_len + 1, - IF tgt.is_fraud == 1 THEN - @@shortest_path_length += s.@path_len + 1 - END - POST-ACCUM - tgt.@or_visited += TRUE - ; - END; - - rlt = SELECT s - FROM (s:seed) - POST-ACCUM s.setAttr("shortest_path_length", @@shortest_path_length) - ; - - RETURN @@shortest_path_length; - -} \ No newline at end of file diff --git a/financial_crime/transaction_fraud/queries/tg_pagerank_wt_card.gsql b/financial_crime/transaction_fraud/queries/tg_pagerank_wt_card.gsql index ace301ab..9ec7287e 100644 --- a/financial_crime/transaction_fraud/queries/tg_pagerank_wt_card.gsql +++ b/financial_crime/transaction_fraud/queries/tg_pagerank_wt_card.gsql @@ -1,146 +1,116 @@ USE GRAPH Transaction_Fraud CREATE OR REPLACE QUERY tg_pagerank_wt_card ( - STRING v_type = "Card", - STRING e_type = "Card_Card", - STRING weight_attribute = "weight", - FLOAT max_change=0.001, - INT maximum_iteration=25, - FLOAT damping=0.85, - INT top_k = 100, - BOOL print_results = FALSE, - STRING result_attribute = "pagerank", - STRING file_path = "", - BOOL display_edges = FALSE + STRING v_type = "Card", + STRING e_type = "Card_Card", + STRING weight_attribute = "weight", + FLOAT max_change = 0.001, + INT maximum_iteration = 25, + FLOAT damping = 0.85, + INT top_k = 100, + BOOL print_results = FALSE, + STRING result_attribute = "pagerank", + STRING file_path = "", + BOOL display_edges = FALSE ) SYNTAX V3 { /* - First Author: - First Commit Date: - - Recent Author: - Recent Commit Date: - - - Repository: - https://github.com/tigergraph/gsql-graph-algorithms/tree/master/algorithms/Centrality - - Maturity: - Production - - Description: - Compute the pageRank score for each vertex in the GRAPH - In each iteration, compute a score for each vertex: - score = (1-damping) + damping*sum(received scores FROM its neighbors). - The pageRank algorithm stops when either of the following is true: - a) it reaches maximum_iteration iterations; - b) the max score change for any vertex compared to the last iteration <= max_change. - - This query supports only taking in a single edge for the time being (8/13/2020). - - Publications: - http://infolab.stanford.edu/~backrub/google.html - - TigerGraph Documentation: - https://docs.tigergraph.com/graph-ml/current/centrality-algorithms/pagerank - - Parameters: - v_type: - vertex types to traverse - print_results: - If True, print JSON output - e_type: - edge types to traverse - result_attribute: - INT attribute to store results to - weight_attribute: - attribute for edge weights - maximum_iteration: - max #iterations - file_path: - file to write CSV output to - top_k: - #top scores to output - display_edges: - output edges for visualization - max_change: - max allowed change between iterations to achieve convergence - damping: - importance of traversal vs. random teleport + Query Overview: + This query computes the PageRank score for each vertex in the graph using the PageRank algorithm. + Each vertex score is calculated iteratively, accounting for incoming scores from neighboring vertices + based on the specified damping factor. The query terminates when either a specified maximum iteration + count is reached or the score changes between iterations fall below the `max_change` threshold. + + Feature Storage Approaches: + - Approach 1: Store features as attributes of the Payment_Transaction vertex, allowing downstream ML models to access features directly from these attributes. + - Approach 2: Output features to CSV files, which downstream ML models can read for feature access. + + Note: This solution kit uses the first approach, storing features as attributes on the Payment_Transaction vertex for easier demonstration. + + Parameters: + - v_type: The vertex type to compute PageRank for (default: "Card"). + - e_type: The edge type to traverse for the PageRank calculation. + - weight_attribute: The attribute used as edge weight. + - max_change: Maximum allowed change between iterations to achieve convergence. + - maximum_iteration: Maximum number of iterations for the PageRank calculation. + - damping: Damping factor for PageRank. + - top_k: Number of top results to output. + - result_attribute: Attribute on vertices to store PageRank scores. + - print_results: Flag to control if results should be printed. + - file_path: Path for output CSV if saving results. + - display_edges: Flag to output edges for visualization. */ -TYPEDEF TUPLE Vertex_Score; -HeapAccum(top_k, score DESC) @@top_scores_heap; -SetAccum @@top_vertices; # vertices with top score -MaxAccum @@max_diff = 9999; # max score change in an iteration -SumAccum @sum_recvd_score = 0; # sum of scores each vertex receives FROM neighbors -SumAccum @sum_score = 1; # initial score for every vertex is 1. -SetAccum @@edge_set; # list of all edges, if display is needed -SumAccum @sum_total_wt; -MinAccum @pagerank_score; + TYPEDEF TUPLE Vertex_Score; + HeapAccum(top_k, score DESC) @@top_scores_heap; + SetAccum @@top_vertices; // vertices with top scores + MaxAccum @@max_diff = 9999; // max score change in an iteration + SumAccum @sum_recvd_score = 0; // sum of scores each vertex receives from neighbors + SumAccum @sum_score = 1; // initial score for every vertex is 1 + SetAccum @@edge_set; // list of all edges, if display is needed + SumAccum @sum_total_wt; + MinAccum @pagerank_score; -FILE f (file_path); + FILE f (file_path); -Start = {v_type}; + Start = {v_type}; - # Calculate the total weight for each vertex -Start = SELECT s + // Calculate the total weight for each vertex + Start = + SELECT s FROM (s:Start) -[e:e_type]- (t:v_type) - ACCUM s.@sum_total_wt += e.getAttr(weight_attribute, "FLOAT"); - -# PageRank iterations -# Start with all vertices of specified type(s) -WHILE @@max_diff > max_change LIMIT maximum_iteration DO - @@max_diff = 0; - V = SELECT s - FROM (s:Start) -[e:e_type]- (t:v_type) - ACCUM t.@sum_recvd_score += s.@sum_score * e.getAttr(weight_attribute, "FLOAT")/s.@sum_total_wt - POST-ACCUM s.@sum_score = (1.0-damping) + damping * s.@sum_recvd_score, - s.@sum_recvd_score = 0, - @@max_diff += abs(s.@sum_score - s.@sum_score'); -END; # END WHILE loop -# Output -IF file_path != "" THEN - f.println("Vertex_ID", "PageRank"); -END; + ACCUM s.@sum_total_wt += e.getAttr(weight_attribute, "FLOAT"); + + // PageRank iterations + WHILE @@max_diff > max_change LIMIT maximum_iteration DO + @@max_diff = 0; + V = SELECT s + FROM (s:Start) -[e:e_type]- (t:v_type) + ACCUM t.@sum_recvd_score += s.@sum_score * e.getAttr(weight_attribute, "FLOAT") / s.@sum_total_wt + POST-ACCUM + s.@sum_score = (1.0 - damping) + damping * s.@sum_recvd_score, + s.@sum_recvd_score = 0, + @@max_diff += abs(s.@sum_score - s.@sum_score'); + END; // End WHILE loop + + // Output results if file_path or print_results is specified + IF file_path != "" THEN + f.println("Vertex_ID", "PageRank"); + END; -V = SELECT s - FROM (s:Start) - POST-ACCUM - IF result_attribute != "" THEN - s.setAttr(result_attribute, s.@sum_score) - END, - - IF file_path != "" THEN - f.println(s, s.@sum_score) - END, - - IF print_results THEN - @@top_scores_heap += Vertex_Score(s, s.@sum_score) - END; - -IF print_results THEN - PRINT @@top_scores_heap; - IF display_edges THEN - - FOREACH vert IN @@top_scores_heap DO - @@top_vertices += vert.Vertex_ID; + V = SELECT s + FROM (s:Start) + POST-ACCUM + IF result_attribute != "" THEN + s.setAttr(result_attribute, s.@sum_score) + END, + IF file_path != "" THEN + f.println(s, s.@sum_score) + END, + IF print_results THEN + @@top_scores_heap += Vertex_Score(s, s.@sum_score) + END; + + IF print_results THEN + PRINT @@top_scores_heap; + IF display_edges THEN + FOREACH vert IN @@top_scores_heap DO + @@top_vertices += vert.Vertex_ID; + END; + + Top = {@@top_vertices}; + Top = SELECT s + FROM (s:Top) -[e:e_type]- (t:v_type) + WHERE @@top_vertices.contains(t) + ACCUM @@edge_set += e; + + PRINT @@edge_set; + PRINT Top; END; - - Top = {@@top_vertices}; - Top = SELECT s - FROM (s:Top) -[e:e_type]- (t:v_type) - WHERE @@top_vertices.contains(t) - ACCUM @@edge_set += e; - - PRINT @@edge_set; - PRINT Top; END; -END; - - // propagate the pagerank score to the result_attribute on Payment_Transaction vertex - res = SELECT s - FROM (s:Card) -[e:Card_Send_Transaction]-> (t:Payment_Transaction) - ACCUM t.@pagerank_score += s.@sum_score - POST-ACCUM t.setAttr("cd_pagerank", t.@pagerank_score); -} \ No newline at end of file + // Propagate the PageRank score to the result_attribute on Payment_Transaction vertices + res = SELECT s + FROM (s:Card) -[e:Card_Send_Transaction]-> (t:Payment_Transaction) + ACCUM t.@pagerank_score += s.@sum_score + POST-ACCUM t.setAttr("cd_pagerank", t.@pagerank_score); +} diff --git a/financial_crime/transaction_fraud/queries/tg_pagerank_wt_merchant.gsql b/financial_crime/transaction_fraud/queries/tg_pagerank_wt_merchant.gsql index ad4289d3..d9865d2e 100644 --- a/financial_crime/transaction_fraud/queries/tg_pagerank_wt_merchant.gsql +++ b/financial_crime/transaction_fraud/queries/tg_pagerank_wt_merchant.gsql @@ -1,146 +1,116 @@ USE GRAPH Transaction_Fraud CREATE OR REPLACE QUERY tg_pagerank_wt_merchant ( - STRING v_type = "Merchant", - STRING e_type = "Merchant_Merchant", - STRING weight_attribute = "weight", - FLOAT max_change=0.001, - INT maximum_iteration=25, - FLOAT damping=0.85, - INT top_k = 100, - BOOL print_results = FALSE, - STRING result_attribute = "pagerank", - STRING file_path = "", - BOOL display_edges = FALSE + STRING v_type = "Merchant", + STRING e_type = "Merchant_Merchant", + STRING weight_attribute = "weight", + FLOAT max_change = 0.001, + INT maximum_iteration = 25, + FLOAT damping = 0.85, + INT top_k = 100, + BOOL print_results = FALSE, + STRING result_attribute = "pagerank", + STRING file_path = "", + BOOL display_edges = FALSE ) SYNTAX V3 { /* - First Author: - First Commit Date: - - Recent Author: - Recent Commit Date: - - - Repository: - https://github.com/tigergraph/gsql-graph-algorithms/tree/master/algorithms/Centrality - - Maturity: - Production - - Description: - Compute the pageRank score for each vertex in the GRAPH - In each iteration, compute a score for each vertex: - score = (1-damping) + damping*sum(received scores FROM its neighbors). - The pageRank algorithm stops when either of the following is true: - a) it reaches maximum_iteration iterations; - b) the max score change for any vertex compared to the last iteration <= max_change. - - This query supports only taking in a single edge for the time being (8/13/2020). - - Publications: - http://infolab.stanford.edu/~backrub/google.html - - TigerGraph Documentation: - https://docs.tigergraph.com/graph-ml/current/centrality-algorithms/pagerank - - Parameters: - v_type: - vertex types to traverse - print_results: - If True, print JSON output - e_type: - edge types to traverse - result_attribute: - INT attribute to store results to - weight_attribute: - attribute for edge weights - maximum_iteration: - max #iterations - file_path: - file to write CSV output to - top_k: - #top scores to output - display_edges: - output edges for visualization - max_change: - max allowed change between iterations to achieve convergence - damping: - importance of traversal vs. random teleport + Query Overview: + This query computes the PageRank score for each merchant in the graph using the PageRank algorithm. + Each vertex score is calculated iteratively, considering incoming scores from neighboring vertices + weighted by the specified edge attribute. The algorithm stops when either the maximum iteration count + is reached or the score changes between iterations fall below the `max_change` threshold. + + Feature Storage Approaches: + - Approach 1: Store features as attributes of the Payment_Transaction vertex, allowing downstream ML models to access features directly from these attributes. + - Approach 2: Output features to CSV files, which downstream ML models can read for feature access. + + Note: This solution kit uses the first approach, storing features as attributes on the Payment_Transaction vertex for easier demonstration. + + Parameters: + - v_type: The vertex type for PageRank computation (default: "Merchant"). + - e_type: The edge type to traverse for PageRank calculation. + - weight_attribute: The attribute used as edge weight. + - max_change: Maximum allowed change between iterations to achieve convergence. + - maximum_iteration: Maximum number of iterations for the PageRank calculation. + - damping: Damping factor for PageRank. + - top_k: Number of top results to output. + - result_attribute: Attribute on vertices to store PageRank scores. + - print_results: Flag to control if results should be printed. + - file_path: Path for output CSV if saving results. + - display_edges: Flag to output edges for visualization. */ -TYPEDEF TUPLE Vertex_Score; -HeapAccum(top_k, score DESC) @@top_scores_heap; -SetAccum @@top_vertices; # vertices with top score -MaxAccum @@max_diff = 9999; # max score change in an iteration -SumAccum @sum_recvd_score = 0; # sum of scores each vertex receives FROM neighbors -SumAccum @sum_score = 1; # initial score for every vertex is 1. -SetAccum @@edge_set; # list of all edges, if display is needed -SumAccum @sum_total_wt; -MinAccum @pagerank_score; + TYPEDEF TUPLE Vertex_Score; + HeapAccum(top_k, score DESC) @@top_scores_heap; + SetAccum @@top_vertices; // vertices with top scores + MaxAccum @@max_diff = 9999; // max score change in an iteration + SumAccum @sum_recvd_score = 0; // sum of scores each vertex receives from neighbors + SumAccum @sum_score = 1; // initial score for every vertex is 1 + SetAccum @@edge_set; // list of all edges, if display is needed + SumAccum @sum_total_wt; + MinAccum @pagerank_score; -FILE f (file_path); + FILE f (file_path); -Start = {v_type}; + Start = {v_type}; - # Calculate the total weight for each vertex -Start = SELECT s + // Calculate the total weight for each vertex + Start = + SELECT s FROM (s:Start) -[e:e_type]- (t:v_type) - ACCUM s.@sum_total_wt += e.getAttr(weight_attribute, "FLOAT"); - -# PageRank iterations -# Start with all vertices of specified type(s) -WHILE @@max_diff > max_change LIMIT maximum_iteration DO - @@max_diff = 0; - V = SELECT s - FROM (s:Start) -[e:e_type]- (t:v_type) - ACCUM t.@sum_recvd_score += s.@sum_score * e.getAttr(weight_attribute, "FLOAT")/s.@sum_total_wt - POST-ACCUM s.@sum_score = (1.0-damping) + damping * s.@sum_recvd_score, - s.@sum_recvd_score = 0, - @@max_diff += abs(s.@sum_score - s.@sum_score'); -END; # END WHILE loop -# Output -IF file_path != "" THEN - f.println("Vertex_ID", "PageRank"); -END; + ACCUM s.@sum_total_wt += e.getAttr(weight_attribute, "FLOAT"); + + // PageRank iterations + WHILE @@max_diff > max_change LIMIT maximum_iteration DO + @@max_diff = 0; + V = SELECT s + FROM (s:Start) -[e:e_type]- (t:v_type) + ACCUM t.@sum_recvd_score += s.@sum_score * e.getAttr(weight_attribute, "FLOAT") / s.@sum_total_wt + POST-ACCUM + s.@sum_score = (1.0 - damping) + damping * s.@sum_recvd_score, + s.@sum_recvd_score = 0, + @@max_diff += abs(s.@sum_score - s.@sum_score'); + END; // End WHILE loop + + // Output results if file_path or print_results is specified + IF file_path != "" THEN + f.println("Vertex_ID", "PageRank"); + END; -V = SELECT s - FROM (s:Start) - POST-ACCUM - IF result_attribute != "" THEN - s.setAttr(result_attribute, s.@sum_score) - END, - - IF file_path != "" THEN - f.println(s, s.@sum_score) - END, - - IF print_results THEN - @@top_scores_heap += Vertex_Score(s, s.@sum_score) - END; - -IF print_results THEN - PRINT @@top_scores_heap; - IF display_edges THEN - - FOREACH vert IN @@top_scores_heap DO - @@top_vertices += vert.Vertex_ID; + V = SELECT s + FROM (s:Start) + POST-ACCUM + IF result_attribute != "" THEN + s.setAttr(result_attribute, s.@sum_score) + END, + IF file_path != "" THEN + f.println(s, s.@sum_score) + END, + IF print_results THEN + @@top_scores_heap += Vertex_Score(s, s.@sum_score) + END; + + IF print_results THEN + PRINT @@top_scores_heap; + IF display_edges THEN + FOREACH vert IN @@top_scores_heap DO + @@top_vertices += vert.Vertex_ID; + END; + + Top = {@@top_vertices}; + Top = SELECT s + FROM (s:Top) -[e:e_type]- (t:v_type) + WHERE @@top_vertices.contains(t) + ACCUM @@edge_set += e; + + PRINT @@edge_set; + PRINT Top; END; - - Top = {@@top_vertices}; - Top = SELECT s - FROM (s:Top) -[e:e_type]- (t:v_type) - WHERE @@top_vertices.contains(t) - ACCUM @@edge_set += e; - - PRINT @@edge_set; - PRINT Top; END; -END; - - // propagate the pagerank score to the result_attribute on Payment_Transaction vertex - res = SELECT s - FROM (s:Merchant) <-[e:Merchant_Receive_Transaction]- (t:Payment_Transaction) - ACCUM t.@pagerank_score += s.@sum_score - POST-ACCUM t.setAttr("mer_pagerank", t.@pagerank_score); -} \ No newline at end of file + // Propagate the PageRank score to the result_attribute on Payment_Transaction vertices + res = SELECT s + FROM (s:Merchant) <-[e:Merchant_Receive_Transaction]- (t:Payment_Transaction) + ACCUM t.@pagerank_score += s.@sum_score + POST-ACCUM t.setAttr("mer_pagerank", t.@pagerank_score); +} diff --git a/financial_crime/transaction_fraud/queries/tg_pagerank_wt_merchant_insights.gsql b/financial_crime/transaction_fraud/queries/tg_pagerank_wt_merchant_insights.gsql index 61695fb3..e7f6166c 100644 --- a/financial_crime/transaction_fraud/queries/tg_pagerank_wt_merchant_insights.gsql +++ b/financial_crime/transaction_fraud/queries/tg_pagerank_wt_merchant_insights.gsql @@ -4,6 +4,9 @@ CREATE OR REPLACE QUERY tg_pagerank_wt_merchant_insights ( STRING e_type = "Merchant_Merchant", INT top_k = 5 ) SYNTAX v3 { + +// This is an Insights Query to support merchant pagerank app. This query is better to be installed to provide better performance. + TYPEDEF TUPLE Vertex_Score; HeapAccum(top_k, score DESC) @@top_scores_heap; SetAccum @@top_vertices; # vertices with top score From 62d2f7ea575f41484ac9f72bed60776ef0c1250c Mon Sep 17 00:00:00 2001 From: Truman Wang Date: Tue, 12 Nov 2024 14:52:00 -0800 Subject: [PATCH 5/5] mule account add comments --- .../attributes_to_party_traversal.gsql | 82 ------- ...e_money_received_by_one_hop_neighbors.gsql | 68 +++--- ...ghts_get_binary_classification_ratios.gsql | 2 + ..._get_net_gain_numbers_and_percentages.gsql | 1 + .../queries/insights_shortest_path.gsql | 2 +- .../queries/n_hop_fraud_count_device.gsql | 116 ++++++---- .../queries/n_hop_fraud_count_ip.gsql | 116 ++++++---- .../n_hop_number_of_total_mule_account.gsql | 100 +++++---- .../number_of_mule_accounts_on_same_IP.gsql | 56 +++-- ...umber_of_mule_accounts_on_same_device.gsql | 56 +++-- .../queries/party_full_address.gsql | 30 --- .../ratio_of_mule_account_transfer.gsql | 61 ++--- .../queries/single_Party_PII.gsql | 67 ------ .../queries/tg_pagerank_wt_account.gsql | 212 +++++++++--------- .../tg_shortest_path_length_account.gsql | 80 ++++--- 15 files changed, 503 insertions(+), 546 deletions(-) delete mode 100644 financial_crime/mule_account_detection/queries/attributes_to_party_traversal.gsql delete mode 100644 financial_crime/mule_account_detection/queries/party_full_address.gsql delete mode 100644 financial_crime/mule_account_detection/queries/single_Party_PII.gsql diff --git a/financial_crime/mule_account_detection/queries/attributes_to_party_traversal.gsql b/financial_crime/mule_account_detection/queries/attributes_to_party_traversal.gsql deleted file mode 100644 index 8b645b09..00000000 --- a/financial_crime/mule_account_detection/queries/attributes_to_party_traversal.gsql +++ /dev/null @@ -1,82 +0,0 @@ -USE GRAPH Mule_Account_Detection - -CREATE DISTRIBUTED QUERY attributes_to_party_traversal( - STRING v_type, - STRING id) FOR GRAPH Mule_Account_Detection { - - /* - This query navigates from any given attribute of a party's identification information to locate all other personally - identifiable information (PII) associated with that party. - - * Parameters: - * - v_type: the vertex type of the given PII attribute - * - id: the primary id of the given PII vertex - */ - - SetAccum @@set; - SetAccum @@edge_type_set; - MinAccum @address, @full_name, @phone,@dob,@email,@ids,@device,@ips; - VERTEX v; - v = to_vertex(id,v_type); - - @@set += v; - start = @@set; - - rlt = SELECT t - FROM start:s -()- Party:t; - - - @@edge_type_set = ( - "Has_Address", - "Has_Full_Name", - "Has_Phone", - "Has_DOB", - "Has_Email", - "Has_ID", - "Has_Device", - "Has_IP" - ); - - - - rlt = SELECT s - FROM rlt:s -(@@edge_type_set:e)- :t - ACCUM - Case e.type - WHEN "Has_Address" THEN - s.@address += t - WHEN "Has_Full_Name" THEN - s.@full_name += t - WHEN "Has_Phone" THEN - s.@phone += t - WHEN "Has_DOB" THEN - s.@dob += t - WHEN "Has_Email" THEN - s.@email += t - WHEN "Has_ID" THEN - s.@ids += t - WHEN "Has_Device" THEN - s.@device += t - WHEN "Has_IP" THEN - s.@ips += t - END; - PRINT rlt[ - rlt.@address AS Address, - rlt.@full_name AS Full_Name, - rlt.@phone AS Phone, - rlt.@dob AS DOB, - rlt.@email AS Email, - rlt.@ids AS ID, - rlt.@device AS Device, - rlt.@ips AS IP - ]; - - - - -} - -UPDATE DESCRIPTION OF QUERY attributes_to_party_traversal "Originating from a chosen attribute of party identification information, this query methodically searches the transaction fraud graph to retrieve all associated PII for the specified party. Its utility is paramount in fraud detection and prevention frameworks, enabling a comprehensive analysis of party identities." - -UPDATE DESCRIPTION OF QUERY_PARAM attributes_to_party_traversal.v_type "The vertex type of party's identification information Vertex type." -UPDATE DESCRIPTION OF QUERY_PARAM attributes_to_party_traversal.id "The id of the party's identification information Vertex." diff --git a/financial_crime/mule_account_detection/queries/average_money_received_by_one_hop_neighbors.gsql b/financial_crime/mule_account_detection/queries/average_money_received_by_one_hop_neighbors.gsql index 976a7cf1..e59f17c1 100644 --- a/financial_crime/mule_account_detection/queries/average_money_received_by_one_hop_neighbors.gsql +++ b/financial_crime/mule_account_detection/queries/average_money_received_by_one_hop_neighbors.gsql @@ -1,35 +1,45 @@ USE GRAPH Mule_Account_Detection CREATE DISTRIBUTED QUERY average_money_received_by_one_hop_neighbors( -DATETIME min_createTime = to_datetime("2019-01-06"), -DATETIME max_createTime = to_datetime("2022-01-08") ) FOR GRAPH Mule_Account_Detection { + DATETIME min_createTime = to_datetime("2019-01-06"), + DATETIME max_createTime = to_datetime("2022-01-08") +) FOR GRAPH Mule_Account_Detection { - /* - * Feature: - average money, total money received by all 1 hop away neighbors from this account (considering transfer in as edges) - and the count of one hop accounts - */ - - SumAccum @total_amount; - AvgAccum @avg_amount; - SetAccum @accounts; - - - rlt = SELECT s + /* + Query Overview: + This is a feature engineering query: it calculates the average amount of money, total money received, + and the count of one-hop neighbors for each account within a specified time range. The result provides + insights into the transaction behavior of one-hop neighbors linked through incoming transfers. + + Parameters: + - min_createTime, max_createTime: Filter transactions within this time range. + + Outputs: + - Total_One_Hop_Amount: Total money received by all one-hop neighbors. + - Average_One_Hop_Amount: Average amount received by one-hop neighbors. + - Count_One_Hop_Accounts: Total count of unique one-hop accounts. + */ + + SumAccum @total_amount; + AvgAccum @avg_amount; + SetAccum @accounts; + + rlt = + SELECT s FROM Account:s -(Send_Transfer>)- Transfer_Transaction:ta -(Receive_Transfer>)- Account:d - -(= min_createTime - AND ta.transfer_time < max_createTime - AND td.transfer_time >= min_createTime - AND td.transfer_time < max_createTime - ACCUM s.@total_amount += td.amount, - s.@avg_amount +=td.amount, - s.@accounts += d.id; - - PRINT rlt[ - rlt.@total_amount AS Total_One_Hop_Amount, - rlt.@avg_amount AS Average_One_Hop_Amount, - rlt.@accounts.size() AS Count_One_Hop_Accounts - ]; - -} \ No newline at end of file + AND ta.transfer_time < max_createTime + AND td.transfer_time >= min_createTime + AND td.transfer_time < max_createTime + ACCUM + s.@total_amount += td.amount, + s.@avg_amount += td.amount, + s.@accounts += d.id; + + PRINT rlt[ + rlt.@total_amount AS Total_One_Hop_Amount, + rlt.@avg_amount AS Average_One_Hop_Amount, + rlt.@accounts.size() AS Count_One_Hop_Accounts + ]; +} diff --git a/financial_crime/mule_account_detection/queries/insights_get_binary_classification_ratios.gsql b/financial_crime/mule_account_detection/queries/insights_get_binary_classification_ratios.gsql index e44614ca..ca1a3d34 100644 --- a/financial_crime/mule_account_detection/queries/insights_get_binary_classification_ratios.gsql +++ b/financial_crime/mule_account_detection/queries/insights_get_binary_classification_ratios.gsql @@ -10,6 +10,8 @@ CREATE OR REPLACE QUERY insights_get_binary_classification_ratios ( INT tg_false_negative, INT tg_false_positive ) { + // This is an query to be installed to support Insights Application binary classification ratios + TYPEDEF TUPLE Table_Entry; ListAccum @@performance_metrics_stats; diff --git a/financial_crime/mule_account_detection/queries/insights_get_net_gain_numbers_and_percentages.gsql b/financial_crime/mule_account_detection/queries/insights_get_net_gain_numbers_and_percentages.gsql index cbb088d4..056e6ffd 100644 --- a/financial_crime/mule_account_detection/queries/insights_get_net_gain_numbers_and_percentages.gsql +++ b/financial_crime/mule_account_detection/queries/insights_get_net_gain_numbers_and_percentages.gsql @@ -8,6 +8,7 @@ CREATE OR REPLACE QUERY insights_get_net_gain_numbers_and_percentages ( INT tg_false_positive, INT tg_false_negative ) { + // This is an query to be installed to support Insights Application get net gain numbers and percentages INT additional_fraud_blocked = tg_true_positive - no_tg_true_positive; DOUBLE additional_fraud_blocked_pct = (100.0 * additional_fraud_blocked) / no_tg_true_positive; STRING additional_fraud_blocked_pct_str = to_string(round(additional_fraud_blocked_pct, 3)) + " %"; diff --git a/financial_crime/mule_account_detection/queries/insights_shortest_path.gsql b/financial_crime/mule_account_detection/queries/insights_shortest_path.gsql index 7ba5b8fa..d10635ef 100644 --- a/financial_crime/mule_account_detection/queries/insights_shortest_path.gsql +++ b/financial_crime/mule_account_detection/queries/insights_shortest_path.gsql @@ -2,7 +2,7 @@ CREATE DISTRIBUTED QUERY insights_shortest_path( Vertex ver // 0448743965 ) FOR GRAPH Mule_Account_Detection { - + //This is an insights query to be installed so that Insights App Shortest Path can be built upon. SetAccum @@fraud; OrAccum @or_visted; ListAccum @path_list; diff --git a/financial_crime/mule_account_detection/queries/n_hop_fraud_count_device.gsql b/financial_crime/mule_account_detection/queries/n_hop_fraud_count_device.gsql index 39983709..8ee0b534 100644 --- a/financial_crime/mule_account_detection/queries/n_hop_fraud_count_device.gsql +++ b/financial_crime/mule_account_detection/queries/n_hop_fraud_count_device.gsql @@ -1,52 +1,76 @@ USE GRAPH Mule_Account_Detection CREATE DISTRIBUTED QUERY n_hop_fraud_count_device( -INT max_hops=3) FOR GRAPH Mule_Account_Detection { - -/* - This query determines number of fraudulent accounts that can be reached in max_hops of device sharing - */ - - SetAccum @sources; - SumAccum @last_iter_source_size; - MinAccum @cnt; - - mule = SELECT a - FROM Account:a - WHERE a.is_fraud == 1; - - fraudster = SELECT p - FROM mule:a -(Party_Has_Account:e)- Party:p - WHERE a.is_fraud == 1 - ACCUM p.@sources += p.id - POST-ACCUM p.@last_iter_source_size = p.@sources.size(); + INT max_hops = 3 +) FOR GRAPH Mule_Account_Detection { + + /* + Query Overview: + This is a feature engineering query: it calculates the number of fraudulent accounts reachable within a + specified number of hops (max_hops) through device-sharing connections. This information can help assess + fraud risk by analyzing the spread of fraud across devices shared between accounts. + + Feature Storage Approaches: + - Approach 1: Store features as attributes of the Account vertex, allowing downstream ML models to access features directly from these attributes. + - Approach 2: Output features to CSV files, which downstream ML models can read for feature access. + + Note: This solution uses Approach 1 for direct feature updates on Account vertices. + + Parameters: + - max_hops: Specifies the maximum number of hops for device-sharing traversal. + + Outputs: + - fraud_device: Attribute on Account vertices representing the count of reachable fraudulent accounts. + */ + + SetAccum @sources; + SumAccum @last_iter_source_size; + MinAccum @cnt; + + // Identify fraudulent accounts + mule = + SELECT a + FROM Account:a + WHERE a.is_fraud == 1; + + // Initialize fraudster accounts linked to fraudulent accounts through shared devices + fraudster = + SELECT p + FROM mule:a -(Party_Has_Account:e)- Party:p + WHERE a.is_fraud == 1 + ACCUM p.@sources += p.id + POST-ACCUM p.@last_iter_source_size = p.@sources.size(); - start = fraudster; - - WHILE start.size() > 0 LIMIT max_hops DO - - dest = SELECT t - FROM start:s -(Has_Device:h1)- Device:d -(Has_Device:h2)- Party:t - WHERE t.id != s.id - ACCUM t.@sources += s.@sources; - - start = SELECT t - FROM dest:t - WHERE t.@sources.size() > t.@last_iter_source_size - POST-ACCUM t.@last_iter_source_size = t.@sources.size(); - - END; - - mule = SELECT a - FROM Account:a -(Party_Has_Account:e)- Party:p - ACCUM a.@cnt = p.@sources.size() - HAVING a.@cnt >0; - - PRINT mule.size(); - rlt = SELECT a + start = fraudster; + + // Traverse up to max_hops through shared devices + WHILE start.size() > 0 LIMIT max_hops DO + dest = + SELECT t + FROM start:s -(Has_Device:h1)- Device:d -(Has_Device:h2)- Party:t + WHERE t.id != s.id + ACCUM t.@sources += s.@sources; + + start = + SELECT t + FROM dest:t + WHERE t.@sources.size() > t.@last_iter_source_size + POST-ACCUM t.@last_iter_source_size = t.@sources.size(); + END; + + // Count and assign reachable fraudulent accounts for each account + mule = + SELECT a + FROM Account:a -(Party_Has_Account:e)- Party:p + ACCUM a.@cnt = p.@sources.size() + HAVING a.@cnt > 0; + + PRINT mule.size(); + + rlt = + SELECT a FROM mule:a - POST-ACCUM - a.setAttr("fraud_device", a.@cnt); + POST-ACCUM a.setAttr("fraud_device", a.@cnt); + PRINT "Finished successfully" AS status; - -} \ No newline at end of file +} diff --git a/financial_crime/mule_account_detection/queries/n_hop_fraud_count_ip.gsql b/financial_crime/mule_account_detection/queries/n_hop_fraud_count_ip.gsql index 44ff5cc7..84fbad5f 100644 --- a/financial_crime/mule_account_detection/queries/n_hop_fraud_count_ip.gsql +++ b/financial_crime/mule_account_detection/queries/n_hop_fraud_count_ip.gsql @@ -1,52 +1,76 @@ USE GRAPH Mule_Account_Detection CREATE DISTRIBUTED QUERY n_hop_fraud_count_ip( -INT max_hops=3) FOR GRAPH Mule_Account_Detection { - -/* - This query determines number of fraudulent accounts that can be reached in max_hops of ip sharing - */ - - SetAccum @sources; - SumAccum @last_iter_source_size; - MinAccum @cnt; - - mule = SELECT a - FROM Account:a - WHERE a.is_fraud == 1; - - fraudster = SELECT p - FROM mule:a -(Party_Has_Account:e)- Party:p - WHERE a.is_fraud == 1 - ACCUM p.@sources += p.id - POST-ACCUM p.@last_iter_source_size = p.@sources.size(); + INT max_hops = 3 +) FOR GRAPH Mule_Account_Detection { + + /* + Query Overview: + This is a feature engineering query: it calculates the number of fraudulent accounts reachable within a + specified number of hops (max_hops) through IP-sharing connections. This information helps assess the spread + of fraud across accounts linked by shared IP addresses. + + Feature Storage Approaches: + - Approach 1: Store features as attributes of the Account vertex, allowing downstream ML models to access features directly from these attributes. + - Approach 2: Output features to CSV files, which downstream ML models can read for feature access. + + Note: This solution uses Approach 1 for direct feature updates on Account vertices. + + Parameters: + - max_hops: Specifies the maximum number of hops for IP-sharing traversal. + + Outputs: + - fraud_ip: Attribute on Account vertices representing the count of reachable fraudulent accounts. + */ + + SetAccum @sources; + SumAccum @last_iter_source_size; + MinAccum @cnt; + + // Identify fraudulent accounts + mule = + SELECT a + FROM Account:a + WHERE a.is_fraud == 1; + + // Initialize fraudster accounts linked to fraudulent accounts through shared IPs + fraudster = + SELECT p + FROM mule:a -(Party_Has_Account:e)- Party:p + WHERE a.is_fraud == 1 + ACCUM p.@sources += p.id + POST-ACCUM p.@last_iter_source_size = p.@sources.size(); - start = fraudster; - - WHILE start.size() > 0 LIMIT max_hops DO - - dest = SELECT t - FROM start:s -(Has_IP:h1)- IP:ip -(Has_IP:h2)- Party:t - WHERE t.id != s.id - ACCUM t.@sources += s.@sources; - - start = SELECT t - FROM dest:t - WHERE t.@sources.size() > t.@last_iter_source_size - POST-ACCUM t.@last_iter_source_size = t.@sources.size(); - - END; - - mule = SELECT a - FROM Account:a -(Party_Has_Account:e)- Party:p - ACCUM a.@cnt = p.@sources.size() - HAVING a.@cnt >0; - - PRINT mule.size(); - rlt = SELECT a + start = fraudster; + + // Traverse up to max_hops through shared IPs + WHILE start.size() > 0 LIMIT max_hops DO + dest = + SELECT t + FROM start:s -(Has_IP:h1)- IP:ip -(Has_IP:h2)- Party:t + WHERE t.id != s.id + ACCUM t.@sources += s.@sources; + + start = + SELECT t + FROM dest:t + WHERE t.@sources.size() > t.@last_iter_source_size + POST-ACCUM t.@last_iter_source_size = t.@sources.size(); + END; + + // Count and assign reachable fraudulent accounts for each account + mule = + SELECT a + FROM Account:a -(Party_Has_Account:e)- Party:p + ACCUM a.@cnt = p.@sources.size() + HAVING a.@cnt > 0; + + PRINT mule.size(); + + rlt = + SELECT a FROM mule:a - POST-ACCUM - a.setAttr("fraud_ip", a.@cnt); + POST-ACCUM a.setAttr("fraud_ip", a.@cnt); + PRINT "Finished successfully" AS status; - -} \ No newline at end of file +} diff --git a/financial_crime/mule_account_detection/queries/n_hop_number_of_total_mule_account.gsql b/financial_crime/mule_account_detection/queries/n_hop_number_of_total_mule_account.gsql index 0b40e813..97e007e7 100644 --- a/financial_crime/mule_account_detection/queries/n_hop_number_of_total_mule_account.gsql +++ b/financial_crime/mule_account_detection/queries/n_hop_number_of_total_mule_account.gsql @@ -1,45 +1,67 @@ USE GRAPH Mule_Account_Detection CREATE DISTRIBUTED QUERY n_hop_number_of_total_mule_account( -INT max_hops=1) FOR GRAPH Mule_Account_Detection { - /* - * Feature: - for each account, count the number of distinct mule accounts within max_hops hops - - */ - SetAccum @sources; - SumAccum @last_iter_source_size; - //MinAccum @cnt; - - mule = SELECT a - FROM Account:a - WHERE a.is_fraud == 1 - ACCUM a.@sources += a.id - POST-ACCUM a.@last_iter_source_size = a.@sources.size(); - - start = mule; - - WHILE start.size() > 0 LIMIT max_hops DO - - dest = SELECT a2 - FROM start:s -(Account_Account:e)- Account:a2 - ACCUM a2.@sources += s.@sources; - - start = select d FROM dest:d - WHERE d.@sources.size() > d.@last_iter_source_size - POST-ACCUM d.@last_iter_source_size = d.@sources.size(); - - END; - - mule = SELECT a - FROM Account:a - HAVING a.@sources.size() >0; + INT max_hops = 1 +) FOR GRAPH Mule_Account_Detection { + + /* + Query Overview: + This is a feature engineering query: it calculates the number of distinct mule (fraudulent) accounts + within a specified number of hops (max_hops) from each account. The result provides an indication of + potential exposure to mule accounts within a given network radius. + + Feature Storage Approaches: + - Approach 1: Store features as attributes of the Account vertex, allowing downstream ML models to access features directly from these attributes. + - Approach 2: Output features to CSV files, which downstream ML models can read for feature access. + + Note: This solution uses Approach 1 for direct feature updates on Account vertices. + + Parameters: + - max_hops: Specifies the maximum number of hops for the mule account traversal. + + Outputs: + - mule_cnt: Attribute on Account vertices representing the count of distinct mule accounts within max_hops. + */ + + SetAccum @sources; + SumAccum @last_iter_source_size; + + // Initialize mule accounts (fraudulent accounts) + mule = + SELECT a + FROM Account:a + WHERE a.is_fraud == 1 + ACCUM a.@sources += a.id + POST-ACCUM a.@last_iter_source_size = a.@sources.size(); - PRINT mule.size(); - rlt = SELECT a + start = mule; + + // Traverse up to max_hops for mule account connections + WHILE start.size() > 0 LIMIT max_hops DO + dest = + SELECT a2 + FROM start:s -(Account_Account:e)- Account:a2 + ACCUM a2.@sources += s.@sources; + + start = + SELECT d + FROM dest:d + WHERE d.@sources.size() > d.@last_iter_source_size + POST-ACCUM d.@last_iter_source_size = d.@sources.size(); + END; + + // Filter accounts with at least one mule account connection within max_hops + mule = + SELECT a + FROM Account:a + HAVING a.@sources.size() > 0; + + PRINT mule.size(); + + rlt = + SELECT a FROM mule:a - POST-ACCUM - a.setAttr("mule_cnt", a.@sources.size()); + POST-ACCUM a.setAttr("mule_cnt", a.@sources.size()); + PRINT "Finished successfully" AS status; - -} \ No newline at end of file +} diff --git a/financial_crime/mule_account_detection/queries/number_of_mule_accounts_on_same_IP.gsql b/financial_crime/mule_account_detection/queries/number_of_mule_accounts_on_same_IP.gsql index 5c21cd70..a90dcf2e 100644 --- a/financial_crime/mule_account_detection/queries/number_of_mule_accounts_on_same_IP.gsql +++ b/financial_crime/mule_account_detection/queries/number_of_mule_accounts_on_same_IP.gsql @@ -1,33 +1,49 @@ USE GRAPH Mule_Account_Detection CREATE DISTRIBUTED QUERY number_of_mule_accounts_on_same_IP( - ) FOR GRAPH Mule_Account_Detection { +) FOR GRAPH Mule_Account_Detection { - /* - * Feature: - number of distinct mule_accounts using the same IP. - */ - - SetAccum @frauds; + /* + Query Overview: + This is a feature engineering query: it calculates the number of distinct mule (fraudulent) accounts + that share the same IP address with other accounts. The result provides insights into potential IP-sharing + risks among mule accounts. + Feature Storage Approaches: + - Approach 1: Store features as attributes of the Account vertex, allowing downstream ML models to access features directly from these attributes. + - Approach 2: Output features to CSV files, which downstream ML models can read for feature access. + + Note: This solution uses Approach 1 for direct feature updates on Account vertices. + + Outputs: + - ip_collision: Attribute on Account vertices representing the count of distinct mule accounts + using the same IP address. + */ + + SetAccum @frauds; + + // Identify fraudulent accounts + fraud = + SELECT s + FROM Account:s + WHERE s.is_fraud == 1; - fraud = SELECT s - FROM Account:s WHERE s.is_fraud ==1; - - rlt = SELECT a2 + // Count distinct mule accounts sharing the same IP + rlt = + SELECT a2 FROM fraud:a1 -(Party_Has_Account:e)- Party:pa - -(Has_IP:e1)- IP:ip-(Has_IP)- Party:pt + -(Has_IP:e1)- IP:ip -(Has_IP)- Party:pt -(Party_Has_Account:e2)- Account:a2 WHERE a1 != a2 ACCUM a2.@frauds += a1 - HAVING a2.@frauds.size() >0 ; - - PRINT rlt.size(); + HAVING a2.@frauds.size() > 0; + + PRINT rlt.size(); - rlt = SELECT a + rlt = + SELECT a FROM rlt:a - POST-ACCUM - a.setAttr("ip_collision", a.@frauds.size()); + POST-ACCUM a.setAttr("ip_collision", a.@frauds.size()); + PRINT "Finished successfully" AS status; - -} \ No newline at end of file +} diff --git a/financial_crime/mule_account_detection/queries/number_of_mule_accounts_on_same_device.gsql b/financial_crime/mule_account_detection/queries/number_of_mule_accounts_on_same_device.gsql index bb013c15..3a5b56f9 100644 --- a/financial_crime/mule_account_detection/queries/number_of_mule_accounts_on_same_device.gsql +++ b/financial_crime/mule_account_detection/queries/number_of_mule_accounts_on_same_device.gsql @@ -1,33 +1,49 @@ USE GRAPH Mule_Account_Detection CREATE DISTRIBUTED QUERY number_of_mule_accounts_on_same_device( - ) FOR GRAPH Mule_Account_Detection { +) FOR GRAPH Mule_Account_Detection { - /* - * Feature: - number of distinct mule_accounts using the same device. - */ - - SetAccum @frauds; + /* + Query Overview: + This is a feature engineering query: it calculates the number of distinct mule (fraudulent) accounts + that share the same device with other accounts. The result provides insights into potential device-sharing + risks among mule accounts. + Feature Storage Approaches: + - Approach 1: Store features as attributes of the Account vertex, allowing downstream ML models to access features directly from these attributes. + - Approach 2: Output features to CSV files, which downstream ML models can read for feature access. + + Note: This solution uses Approach 1 for direct feature updates on Account vertices. + + Outputs: + - device_collision: Attribute on Account vertices representing the count of distinct mule accounts + using the same device. + */ + + SetAccum @frauds; + + // Identify fraudulent accounts + fraud = + SELECT s + FROM Account:s + WHERE s.is_fraud == 1; - fraud = SELECT s - FROM Account:s WHERE s.is_fraud ==1; - - rlt = SELECT a2 + // Count distinct mule accounts sharing the same device + rlt = + SELECT a2 FROM fraud:a1 -(Party_Has_Account:e)- Party:pa - -(Has_Device:e1)- Device:d-(Has_Device)- Party:pt + -(Has_Device:e1)- Device:d -(Has_Device)- Party:pt -(Party_Has_Account:e2)- Account:a2 WHERE a1 != a2 ACCUM a2.@frauds += a1 - HAVING a2.@frauds.size() >0 ; - - PRINT rlt.size(); + HAVING a2.@frauds.size() > 0; + + PRINT rlt.size(); - rlt = SELECT a + rlt = + SELECT a FROM rlt:a - POST-ACCUM - a.setAttr("device_collision", a.@frauds.size()); + POST-ACCUM a.setAttr("device_collision", a.@frauds.size()); + PRINT "Finished successfully" AS status; - -} \ No newline at end of file +} diff --git a/financial_crime/mule_account_detection/queries/party_full_address.gsql b/financial_crime/mule_account_detection/queries/party_full_address.gsql deleted file mode 100644 index 95a72a35..00000000 --- a/financial_crime/mule_account_detection/queries/party_full_address.gsql +++ /dev/null @@ -1,30 +0,0 @@ -USE GRAPH Mule_Account_Detection - -CREATE DISTRIBUTED QUERY party_full_address( - Vertex p) FOR GRAPH Mule_Account_Detection { - - // This query retrieves the complete address for a party, encompassing the street address, city, state, and zipcode. - start = {p}; - MinAccum @@address, @@zipcode, @@state; - MinAccum @@city; - - - rlt = SELECT a - FROM start:s -(Has_Address)- Address:a -(Located_In)- City:c - (Located_In)- State:t - ACCUM - @@address += a, - @@city += c.city, - @@state += t; - rlt = SELECT a - FROM start:s -(Has_Address)- Address:a -(Assigned_To)- Zipcode:z - ACCUM @@zipcode += z; - - PRINT @@address AS Street_Address, @@city AS City, @@state AS State, @@zipcode AS Zipcode; - - - -} - -UPDATE DESCRIPTION OF QUERY party_full_address "This query retrieves the complete address for a party, encompassing the street address, city, state, and zipcode. It's essential for verifying and analyzing party location data, facilitating accurate and efficient address validation." - -UPDATE DESCRIPTION OF QUERY_PARAM party_full_address.p "The Party Vertex of interest." \ No newline at end of file diff --git a/financial_crime/mule_account_detection/queries/ratio_of_mule_account_transfer.gsql b/financial_crime/mule_account_detection/queries/ratio_of_mule_account_transfer.gsql index 4a36ea23..46c99f18 100644 --- a/financial_crime/mule_account_detection/queries/ratio_of_mule_account_transfer.gsql +++ b/financial_crime/mule_account_detection/queries/ratio_of_mule_account_transfer.gsql @@ -3,32 +3,43 @@ USE GRAPH Mule_Account_Detection CREATE DISTRIBUTED QUERY ratio_of_mule_account_transfer( ) FOR GRAPH Mule_Account_Detection { - /* - * Feature: - ratio of mule account transfers over all transfers - */ - -SumAccum @mule_out_amount, @total_out_amount,@mule_in_amount, @total_in_amount; - + /* + Query Overview: + This is a feature engineering query: it calculates the ratio of mule account transfers over total + transfers for each account, both incoming and outgoing. This feature provides insights into the extent + of transactions involving mule accounts. - mule = SELECT s - FROM Account:s -(Send_Transfer>)- Transfer_Transaction:ta -(Receive_Transfer>)- Account:d - // -(0 ; - - PRINT mule.size(); + Feature Storage Approaches: + - Approach 1: Store features as attributes of the Account vertex, allowing downstream ML models to access features directly from these attributes. + - Approach 2: Output features to CSV files, which downstream ML models can read for feature access. + + Note: This solution uses Approach 1 for direct feature updates on Account vertices. + + Outputs: + - trans_in_mule_ratio: Attribute on Account vertices representing the ratio of incoming transfers from mule accounts. + - trans_out_mule_ratio: Attribute on Account vertices representing the ratio of outgoing transfers to mule accounts. + */ + + SumAccum @mule_out_amount, @total_out_amount, @mule_in_amount, @total_in_amount; + + mule = + SELECT s + FROM Account:s -(Send_Transfer>)- Transfer_Transaction:ta -(Receive_Transfer>)- Account:d + ACCUM + s.@total_out_amount += ta.amount, + d.@total_in_amount += ta.amount, + IF d.is_fraud == 1 THEN s.@mule_out_amount += ta.amount END, + IF s.is_fraud == 1 THEN d.@mule_in_amount += ta.amount END + HAVING s.@mule_in_amount + s.@mule_out_amount > 0; + + PRINT mule.size(); - rlt = SELECT s + rlt = + SELECT s FROM Account:s POST-ACCUM - s.setAttr("trans_in_mule_ratio",s.@mule_in_amount/(s.@total_in_amount+0.01)), - s.setAttr("trans_out_mule_ratio",s.@mule_out_amount/(s.@total_out_amount+0.01)); - - PRINT "Finished successfully" AS status; - - -} \ No newline at end of file + s.setAttr("trans_in_mule_ratio", s.@mule_in_amount / (s.@total_in_amount + 0.01)), + s.setAttr("trans_out_mule_ratio", s.@mule_out_amount / (s.@total_out_amount + 0.01)); + + PRINT "Finished successfully" AS status; +} diff --git a/financial_crime/mule_account_detection/queries/single_Party_PII.gsql b/financial_crime/mule_account_detection/queries/single_Party_PII.gsql deleted file mode 100644 index 66892fe5..00000000 --- a/financial_crime/mule_account_detection/queries/single_Party_PII.gsql +++ /dev/null @@ -1,67 +0,0 @@ -USE GRAPH Mule_Account_Detection - -CREATE DISTRIBUTED QUERY single_Party_PII( - Vertex ver ) FOR GRAPH Mule_Account_Detection { - -/** - * Feature: - * This query returns the PII attributes of a single Party vertex. - * - * Parameters: - * - Vertex ver specifies a single input Vertex - - * Output: - * the query prints the results directly to the console. - - */ - SetAccum @@edge_type_set; - @@edge_type_set = ( - "Has_Address", - "Has_Full_Name", - "Has_Phone", - "Has_DOB", - "Has_Email", - "Has_ID", - "Has_Device", - "Has_IP" - ); - MinAccum @address, @full_name, @phone,@dob,@email,@ids,@device,@ips; - - rlt = {ver}; - rlt = SELECT s - FROM rlt:s -(@@edge_type_set:e)- :t - ACCUM - Case e.type - WHEN "Has_Address" THEN - s.@address += t - WHEN "Has_Full_Name" THEN - s.@full_name += t - WHEN "Has_Phone" THEN - s.@phone += t - WHEN "Has_DOB" THEN - s.@dob += t - WHEN "Has_Email" THEN - s.@email += t - WHEN "Has_ID" THEN - s.@ids += t - WHEN "Has_Device" THEN - s.@device += t - WHEN "Has_IP" THEN - s.@ips += t - END; - PRINT rlt[ - rlt.@address AS Address, - rlt.@full_name AS Full_Name, - rlt.@phone AS Phone, - rlt.@dob AS DOB, - rlt.@email AS Email, - rlt.@ids AS ID, - rlt.@device AS Device, - rlt.@ips AS IP - ]; - -} - -UPDATE DESCRIPTION OF QUERY single_Party_PII "This query retrieves party identification information, including details like full name, date of birth, email address, and other personal identifiers. It is used for verifying the identity of individuals associated with transactions, which is crucial for enhancing security measures and preventing identity theft or fraud within financial operations." - -UPDATE DESCRIPTION OF QUERY_PARAM single_Party_PII.ver "The single Party vertex of interest." diff --git a/financial_crime/mule_account_detection/queries/tg_pagerank_wt_account.gsql b/financial_crime/mule_account_detection/queries/tg_pagerank_wt_account.gsql index d8176327..e3f215cd 100644 --- a/financial_crime/mule_account_detection/queries/tg_pagerank_wt_account.gsql +++ b/financial_crime/mule_account_detection/queries/tg_pagerank_wt_account.gsql @@ -1,126 +1,118 @@ USE GRAPH Mule_Account_Detection CREATE QUERY tg_pagerank_wt_account ( - STRING v_type = "Account", - STRING e_type = "Account_Account", - STRING weight_attribute = "weight", - FLOAT max_change=0.001, - INT maximum_iteration=25, - FLOAT damping=0.85, - INT top_k = 100, - BOOL print_results = FALSE, - STRING result_attribute = "pagerank", - STRING file_path = "", - BOOL display_edges = FALSE) SYNTAX V1 { + STRING v_type = "Account", + STRING e_type = "Account_Account", + STRING weight_attribute = "weight", + FLOAT max_change = 0.001, + INT maximum_iteration = 25, + FLOAT damping = 0.85, + INT top_k = 100, + BOOL print_results = FALSE, + STRING result_attribute = "pagerank", + STRING file_path = "", + BOOL display_edges = FALSE +) SYNTAX V1 { /* - Description: - Compute the pageRank score for each vertex in the GRAPH - In each iteration, compute a score for each vertex: - score = (1-damping) + damping*sum(received scores FROM its neighbors). - The pageRank algorithm stops when either of the following is true: - a) it reaches maximum_iteration iterations; - b) the max score change for any vertex compared to the last iteration <= max_change. + Query Overview: + This is a feature engineering query: it calculates the PageRank score for each account vertex in the + graph using the PageRank algorithm, taking into account weighted edges between accounts. The PageRank + algorithm iterates until the maximum number of iterations is reached or until score changes fall below + a specified threshold, providing an indication of account centrality. - This query supports only taking in a single edge for the time being (8/13/2020). + Feature Storage Approaches: + - Approach 1: Store features as attributes of the Account vertex, allowing downstream ML models to access features directly from these attributes. + - Approach 2: Output features to CSV files, which downstream ML models can read for feature access. + + Note: This solution uses Approach 1 for direct feature updates on Account vertices. - TigerGraph Documentation: - https://docs.tigergraph.com/graph-ml/current/centrality-algorithms/pagerank + Parameters: + - v_type: Vertex type to compute PageRank for (default: "Account"). + - e_type: Edge type to traverse for the PageRank calculation. + - weight_attribute: Attribute used as edge weight. + - max_change: Maximum allowed change between iterations to achieve convergence. + - maximum_iteration: Maximum number of iterations for the PageRank calculation. + - damping: Damping factor for PageRank. + - top_k: Number of top results to output. + - result_attribute: Attribute on vertices to store PageRank scores. + - print_results: Flag to control if results should be printed. + - file_path: Path for output CSV if saving results. + - display_edges: Flag to output edges for visualization. - Parameters: - v_type: - vertex types to traverse - print_results: - If True, print JSON output - e_type: - edge types to traverse - result_attribute: - INT attribute to store results to - weight_attribute: - attribute for edge weights - maximum_iteration: - max #iterations - file_path: - file to write CSV output to - top_k: - #top scores to output - display_edges: - output edges for visualization - max_change: - max allowed change between iterations to achieve convergence - damping: - importance of traversal vs. random teleport + Outputs: + - pagerank: Attribute on Account vertices representing the computed PageRank score. */ -TYPEDEF TUPLE Vertex_Score; -HeapAccum(top_k, score DESC) @@top_scores_heap; -SetAccum @@top_vertices; # vertices with top score -MaxAccum @@max_diff = 9999; # max score change in an iteration -SumAccum @sum_recvd_score = 0; # sum of scores each vertex receives FROM neighbors -SumAccum @sum_score = 1; # initial score for every vertex is 1. -SetAccum @@edge_set; # list of all edges, if display is needed -SumAccum @sum_total_wt; -//MinAccum @pagerank_score; -FILE f (file_path); + TYPEDEF TUPLE Vertex_Score; + HeapAccum(top_k, score DESC) @@top_scores_heap; + SetAccum @@top_vertices; // vertices with top scores + MaxAccum @@max_diff = 9999; // max score change in an iteration + SumAccum @sum_recvd_score = 0; // sum of scores each vertex receives from neighbors + SumAccum @sum_score = 1; // initial score for every vertex is 1 + SetAccum @@edge_set; // list of all edges, if display is needed + SumAccum @sum_total_wt; + FILE f (file_path); + + Start = {v_type}; -Start = {v_type}; - # Calculate the total weight for each vertex -Start = SELECT s + // Calculate the total weight for each vertex + Start = + SELECT s FROM Start:s -(e_type:e) - v_type:t - ACCUM s.@sum_total_wt += e.getAttr(weight_attribute, "FLOAT"); - -# PageRank iterations -# Start with all vertices of specified type(s) -WHILE @@max_diff > max_change LIMIT maximum_iteration DO - @@max_diff = 0; - V = SELECT s - FROM Start:s -(e_type:e)- v_type:t - ACCUM t.@sum_recvd_score += s.@sum_score * e.getAttr(weight_attribute, "FLOAT")/s.@sum_total_wt - POST-ACCUM s.@sum_score = (1.0-damping) + damping * s.@sum_recvd_score, - s.@sum_recvd_score = 0, - @@max_diff += abs(s.@sum_score - s.@sum_score'); -END; # END WHILE loop -# Output -IF file_path != "" THEN - f.println("Vertex_ID", "PageRank"); -END; + ACCUM s.@sum_total_wt += e.getAttr(weight_attribute, "FLOAT"); -V = SELECT s - FROM Start:s - POST-ACCUM - IF result_attribute != "" THEN - s.setAttr(result_attribute, s.@sum_score) - END, - - IF file_path != "" THEN - f.println(s, s.@sum_score) - END, - - IF print_results THEN - @@top_scores_heap += Vertex_Score(s, s.@sum_score) - END; - -IF print_results THEN - PRINT @@top_scores_heap; - IF display_edges THEN - - FOREACH vert IN @@top_scores_heap DO - @@top_vertices += vert.Vertex_ID; + // PageRank iterations + WHILE @@max_diff > max_change LIMIT maximum_iteration DO + @@max_diff = 0; + V = SELECT s + FROM Start:s -(e_type:e)- v_type:t + ACCUM t.@sum_recvd_score += s.@sum_score * e.getAttr(weight_attribute, "FLOAT") / s.@sum_total_wt + POST-ACCUM + s.@sum_score = (1.0 - damping) + damping * s.@sum_recvd_score, + s.@sum_recvd_score = 0, + @@max_diff += abs(s.@sum_score - s.@sum_score'); + END; // END WHILE loop + + // Output results if file_path or print_results is specified + IF file_path != "" THEN + f.println("Vertex_ID", "PageRank"); + END; + + V = SELECT s + FROM Start:s + POST-ACCUM + IF result_attribute != "" THEN + s.setAttr(result_attribute, s.@sum_score) + END, + IF file_path != "" THEN + f.println(s, s.@sum_score) + END, + IF print_results THEN + @@top_scores_heap += Vertex_Score(s, s.@sum_score) + END; + + IF print_results THEN + PRINT @@top_scores_heap; + IF display_edges THEN + FOREACH vert IN @@top_scores_heap DO + @@top_vertices += vert.Vertex_ID; + END; + + Top = {@@top_vertices}; + Top = SELECT s + FROM Top:s -(e_type:e)- v_type:t + WHERE @@top_vertices.contains(t) + ACCUM @@edge_set += e; + + PRINT @@edge_set; + PRINT Top; END; - - Top = {@@top_vertices}; - Top = SELECT s - FROM Top:s -(e_type:e)- v_type:t - WHERE @@top_vertices.contains(t) - ACCUM @@edge_set += e; - - PRINT @@edge_set; - PRINT Top; END; -END; - // propagate the pagerank score to the result_attribute on Payment_Transaction vertex - res = SELECT s - FROM Account:s - POST-ACCUM s.setAttr("pagerank", s.@sum_score); -} \ No newline at end of file + // Propagate the PageRank score to the result_attribute on Account vertices + res = + SELECT s + FROM Account:s + POST-ACCUM s.setAttr("pagerank", s.@sum_score); +} diff --git a/financial_crime/mule_account_detection/queries/tg_shortest_path_length_account.gsql b/financial_crime/mule_account_detection/queries/tg_shortest_path_length_account.gsql index 7eadbf82..bc457c42 100644 --- a/financial_crime/mule_account_detection/queries/tg_shortest_path_length_account.gsql +++ b/financial_crime/mule_account_detection/queries/tg_shortest_path_length_account.gsql @@ -1,38 +1,56 @@ USE GRAPH Mule_Account_Detection CREATE DISTRIBUTED QUERY tg_shortest_path_length_account( -INT max_hops=5) FOR GRAPH Mule_Account_Detection { - -/* - This query determines the shortest path length from each account to a mule account - */ - - OrAccum @or_visited; - MinAccum @min_dis; - - - start = SELECT a - FROM Account:a - WHERE a.is_fraud == 1 - POST-ACCUM a.@or_visited += TRUE, a.@min_dis = 0; + INT max_hops = 5 +) FOR GRAPH Mule_Account_Detection { + + /* + Query Overview: + This is a feature engineering query: it calculates the shortest path length from each account to + a mule (fraudulent) account within a specified number of hops (max_hops). The result provides insights + into each account's proximity to mule accounts in the graph. + + Feature Storage Approaches: + - Approach 1: Store features as attributes of the Account vertex, allowing downstream ML models to access features directly from these attributes. + - Approach 2: Output features to CSV files, which downstream ML models can read for feature access. + + Note: This solution uses Approach 1 for direct feature updates on Account vertices. + + Parameters: + - max_hops: Specifies the maximum number of hops to consider for shortest path traversal. + + Outputs: + - shortest_path_length: Attribute on Account vertices representing the shortest path length to the nearest mule account. + */ + + OrAccum @or_visited; + MinAccum @min_dis; + + // Initialize fraudulent accounts as the starting points + start = + SELECT a + FROM Account:a + WHERE a.is_fraud == 1 + POST-ACCUM a.@or_visited += TRUE, a.@min_dis = 0; - accounts = start; - - WHILE start.size() > 0 LIMIT max_hops DO - start = SELECT a2 - FROM start:a1 -(Account_Account:e)- Account:a2 - WHERE a2.@or_visited == FALSE - ACCUM - a2.@min_dis += a1.@min_dis + 1 - POST-ACCUM - a2.@or_visited += TRUE; - accounts = accounts UNION start; - END; + accounts = start; + + // Traverse up to max_hops to determine the shortest path length to a mule account + WHILE start.size() > 0 LIMIT max_hops DO + start = + SELECT a2 + FROM start:a1 -(Account_Account:e)- Account:a2 + WHERE a2.@or_visited == FALSE + ACCUM a2.@min_dis += a1.@min_dis + 1 + POST-ACCUM a2.@or_visited += TRUE; + + accounts = accounts UNION start; + END; - rlt = SELECT a + rlt = + SELECT a FROM accounts:a - POST-ACCUM - a.setAttr("shortest_path_length", a.@min_dis); + POST-ACCUM a.setAttr("shortest_path_length", a.@min_dis); + PRINT "Finished successfully" AS status; - -} \ No newline at end of file +}