-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathanalyze_money_manager.py
More file actions
322 lines (266 loc) · 16 KB
/
analyze_money_manager.py
File metadata and controls
322 lines (266 loc) · 16 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
import re
from openpyxl import load_workbook
from datetime import datetime
from utils.helpers import write_to_file
def process_coffee_shops(food_data):
coffee_shops = [
"Starbucks", "Dunkin Donuts", "Pickup Coffee", "Coffee Project",
"Tim Hortons", "Dean & Deluca", "Cafe Amazon", "Highlands", "Bo's"
]
coffee_shops_iloilo = ["Coffeebreak", "Cafe Brewtherhood", "Teepee", "Tiring"]
# adjust depending your coffee shop config here
coffee_shops = coffee_shops + coffee_shops_iloilo
# filter food_data that matches coffee_shops and is_coffee_shop to true
food_data_coffee_shops_filtered = [
(name, data) for name, data in food_data.items()
if data[3] and any(re.search(rf'\b{re.escape(coffee_shop)}\b', name, re.IGNORECASE) for coffee_shop in coffee_shops)
]
top_10_coffee_shops = sorted(food_data_coffee_shops_filtered, key=lambda item: item[1][0], reverse=True)[:10]
return top_10_coffee_shops
def process_fast_foods(food_data):
fast_foods = ["Jollibee", "Mcdo", "KFC", "Chowking", "Mang Inasal", "Burger King", "Pizza Hut"]
# filter food_data that matches fast_foods
food_data_fast_foods_filtered = [
(name, data) for name, data in food_data.items()
if any(re.search(rf'\b{re.escape(fast_food)}\b', name, re.IGNORECASE) for fast_food in fast_foods)
]
top_10_fast_foods = sorted(food_data_fast_foods_filtered, key=lambda item: item[1][0], reverse=True)[:10]
return top_10_fast_foods
def analyze_money_manager(file_path):
workbook = load_workbook(file_path)
sheet = workbook.active
header_row = next(sheet.iter_rows(values_only=True))
period_index = header_row.index("Period") + 1
account_index = header_row.index("Accounts") + 1
category_index = header_row.index("Category") + 1
subcategory_index = header_row.index("Subcategory") + 1
amount_index = header_row.index("Amount") + 1
income_expense_column_index = header_row.index("Income/Expense") + 1
note_index = header_row.index("Note") + 1
description_index = header_row.index("Description") + 1
# totals
total_expense = 0
total_expense_count = 0
total_income = 0
total_income_count = 0
# data
expense_account_data = {}
income_from_data = {}
purchase_from_data = {}
food_data = {}
grocery_data = {}
# special cases
total_shopee_count = 0
shopee_orders = []
total_lazada_count = 0
lazada_orders = []
total_amazon_count = 0
amazon_orders = []
total_grab_food_count = 0
total_grab_car_count = 0
total_foodpanda_count = 0
total_711 = 0
total_christian_solo_expense_count = 0
total_christian_solo_amount = 0
total_shane_solo_expense_count = 0
total_shane_solo_amount = 0
for row in sheet.iter_rows(min_row=2):
period_cell = row[period_index - 1]
account_cell = row[account_index - 1]
category_cell = row[category_index - 1]
subcategory_cell = row[subcategory_index - 1]
amount_cell = row[amount_index - 1]
income_expense_cell = row[income_expense_column_index - 1]
note_cell = row[note_index - 1]
description_cell = row[description_index - 1]
# get the values from the cells
period_value = period_cell.value
category_value = category_cell.value
subcategory_value = subcategory_cell.value
amount_value = amount_cell.value
income_expense_value = income_expense_cell.value
account_value = account_cell.value
note_value = note_cell.value
description_value = description_cell.value
if isinstance(amount_value, (int, float)):
if income_expense_value == "Exp.":
total_expense_count += 1
total_expense += amount_value
# count expense accounts
if account_value:
expense_account_data[account_value] = expense_account_data.get(account_value, 0) + 1
if note_value:
# count purchase_from entry, total amount, and first instance
if note_value not in purchase_from_data:
purchase_from_data[note_value] = (0, 0, period_value.strftime("%B %d, %Y"))
current_count, current_amount_total, current_period_value = purchase_from_data[note_value]
# check if current_period_value is older than period_value then update it
if datetime.strptime(current_period_value, "%B %d, %Y") > period_value:
current_period_value = period_value.strftime("%B %d, %Y")
purchase_from_data[note_value] = (current_count + 1, round(current_amount_total + amount_value, 2), current_period_value)
if category_value == "Food":
# include Grab and Foodpanda for delivery
is_coffee_shop = subcategory_value == "Cafe Hopping" or subcategory_value == "Grab" or subcategory_value == "Foodpanda"
if note_value not in food_data:
food_data[note_value] = (0, 0, period_value.strftime("%B %d, %Y"), is_coffee_shop)
current_count, current_amount_total, current_period_value, _ = food_data[note_value]
# check if current_period_value is older than period_value then update it
if datetime.strptime(current_period_value, "%B %d, %Y") > period_value:
current_period_value = period_value.strftime("%B %d, %Y")
food_data[note_value] = (current_count + 1, round(current_amount_total + amount_value, 2), current_period_value, is_coffee_shop)
# catch GrabFood
if subcategory_value == "Grab":
total_grab_food_count += 1
# catch Foodpanda
if subcategory_value == "Foodpanda":
total_foodpanda_count += 1
elif category_value == "Grocery":
if note_value not in grocery_data:
grocery_data[note_value] = (0, 0, period_value.strftime("%B %d, %Y"))
current_count, current_amount_total, current_period_value = grocery_data[note_value]
# check if current_period_value is older than period_value then update it
if datetime.strptime(current_period_value, "%B %d, %Y") > period_value:
current_period_value = period_value.strftime("%B %d, %Y")
grocery_data[note_value] = (current_count + 1, round(current_amount_total + amount_value, 2), current_period_value)
elif category_value == "Transportation":
# catch grabcar and grabtaxi
if re.search(r'^Grab', note_value, re.IGNORECASE):
total_grab_car_count += 1
# catch shopee orders
if re.search(r'Shopee$', note_value, re.IGNORECASE):
total_shopee_count += 1
if description_value:
ordered_item = re.sub(r'@Christian|@Shane', '', description_value).strip()
if ordered_item:
shopee_orders.append(ordered_item)
# catch lazada orders
if re.search(r'Lazada$', note_value, re.IGNORECASE):
total_lazada_count += 1
if description_value:
ordered_item = re.sub(r'@Christian|@Shane', '', description_value).strip()
if ordered_item:
lazada_orders.append(ordered_item)
# catch amazon orders
if re.search(r'Amazon$', note_value, re.IGNORECASE):
total_amazon_count += 1
if description_value:
ordered_item = re.sub(r'@Christian|@Shane', '', description_value).strip()
if ordered_item:
amazon_orders.append(ordered_item)
# catch 711 orders
if re.search(r'^711', note_value, re.IGNORECASE):
total_711 += 1
if description_value:
# catch @Christian and @Shane tagged expenses
if re.search(r'@Christian', description_value, re.IGNORECASE):
total_christian_solo_expense_count += 1
total_christian_solo_amount += amount_value
if re.search(r'@Shane', description_value, re.IGNORECASE):
total_shane_solo_expense_count += 1
total_shane_solo_amount += amount_value
elif income_expense_value == "Income":
total_income_count += 1
total_income += amount_value
if note_value:
# count income_from entry, total amount, and first instance
if note_value not in income_from_data:
income_from_data[note_value] = (0, 0, period_value.strftime("%B %d, %Y"))
current_count, current_amount_total, current_period_value = income_from_data[note_value]
# check if current_period_value is older than period_value then update it
if datetime.strptime(current_period_value, "%B %d, %Y") > period_value:
current_period_value = period_value.strftime("%B %d, %Y")
income_from_data[note_value] = (current_count + 1, round(current_amount_total + amount_value, 2), current_period_value)
output = []
output.append("# Money Manager Analysis")
output.append("@crrmacarse")
output.append("\n## Summary")
output.append(f"- Total Income: PHP {total_income:,.2f}")
output.append(f"- Total Expense: PHP {total_expense:,.2f}")
balance = total_income - total_expense
output.append(f"- Balance: PHP {balance:,.2f}")
output.append(f"- Total Income Entry: {total_income_count}")
output.append(f"- Total Expense Entry: {total_expense_count}")
# TODO: Most common category, subcategory
output.append("\n## Expense Accounts")
output.append("| Account | Number of Entries ↓ |")
output.append("|-------------------|-------------------|")
sorted_expense_accounts = sorted(expense_account_data.items(), key=lambda item: item[1], reverse=True)
for account, count in sorted_expense_accounts:
output.append(f"| {account} | {count} |")
output.append("\n## Top 10 Income From")
output.append("| Income from | Number of Entries | Total Amount ↓ | First Instance |")
output.append("|-------------------|-------------------|--------------|------------------|")
top_10_income_from_data = sorted(income_from_data.items(), key=lambda item: item[1][1], reverse=True)[:10]
for income_from, (count, total, first_instance) in top_10_income_from_data:
output.append(f"| {income_from} | {count} | PHP {total:,.2f} | {first_instance} |")
# TODO: Top 5 Highest income earned with date
output.append("\n## Top 30 Expense From")
top_30_purchase_from_data = sorted(purchase_from_data.items(), key=lambda item: item[1][0], reverse=True)[:30]
output.append("| Expense from | Number of Entries ↓ | Total Amount | First Instance |")
output.append("|-------------------|-------------------|--------------|------------------|")
for purchase_from, (count, total, first_instance) in top_30_purchase_from_data:
output.append(f"| {purchase_from} | {count} | PHP {total:,.2f} | {first_instance} |")
output.append("\n## Top 10 Expense From by Amount")
top_30_purchase_from_data = sorted(purchase_from_data.items(), key=lambda item: item[1][1], reverse=True)[:10]
output.append("| Expense from | Total Amount ↓ |")
output.append("|-------------------|-------------------|")
for purchase_from, (_, total, _) in top_30_purchase_from_data:
output.append(f"| {purchase_from} | PHP {total:,.2f} |")
# TODO: Top 10 Most expensive expense with date
output.append("\n## Top 10 Grocery Store")
output.append("| Grocery | Number of Entries ↓ | Total Amount | First Instance |")
output.append("|-------------------|-------------------|--------------|------------------|")
top_10_grocery_data = sorted(grocery_data.items(), key=lambda item: item[1][0], reverse=True)[:10]
for grocery, (count, total, first_instance) in top_10_grocery_data:
output.append(f"| {grocery} | {count} | PHP {total:,.2f} | {first_instance} |")
output.append("\n## Top 10 Grocery Store by Amount")
top_10_grocery_data = sorted(grocery_data.items(), key=lambda item: item[1][1], reverse=True)[:10]
output.append("| Grocery | Total Amount ↓ |")
output.append("|-------------------|-------------------|")
for grocery, (_, total, _) in top_10_grocery_data:
output.append(f"| {grocery} | PHP {total:,.2f} |")
output.append("\n## Top 30 Food Establishments")
top_30_food_data = sorted(food_data.items(), key=lambda item: item[1][0], reverse=True)[:30]
output.append("| Food Establishments | Number of Entries ↓ | Total Amount | First Instance |")
output.append("|-------------------|-------------------|--------------|------------------|")
for food_establishment, (count, total, first_instance, _) in top_30_food_data:
output.append(f"| {food_establishment} | {count} | PHP {total:,.2f} | {first_instance} |")
output.append("\n## Top 10 Fast Foods")
top_10_fast_foods = process_fast_foods(food_data)
output.append("| Fast Food | Number of Entries ↓ | Total Amount |")
output.append("|-------------------|-------------------|-------------------|")
for fast_food, (count, total, _, _) in top_10_fast_foods:
output.append(f"| {fast_food} | {count} | PHP {total:,.2f} |")
output.append("\n## Top 10 Coffee Shops")
top_10_coffee_shops = process_coffee_shops(food_data)
output.append("| Coffee Shop | Number of Entries ↓ | Total Amount |")
output.append("|-------------------|-------------------|-------------------|")
for coffee_shop, (count, total, _, _) in top_10_coffee_shops:
output.append(f"| {coffee_shop} | {count} | PHP {total:,.2f} |")
output.append("\n## Special Cases")
output.append(f"- Total Shopee Order Count: {total_shopee_count}")
output.append(f"- Total Lazada Order Count: {total_lazada_count}")
output.append(f"- Total Amazon Order Count: {total_amazon_count}")
output.append(f"- Total Foodpanda Count: {total_foodpanda_count}")
output.append(f"- Total GrabFood Count: {total_grab_food_count}")
output.append(f"- Total GrabCar Count: {total_grab_car_count}")
output.append(f"- Total 711 Count: {total_711}")
output.append(f"- Total @Christian tagged Expense({total_christian_solo_expense_count}): PHP {total_christian_solo_amount:,.2f}")
output.append(f"- Total @Shane tagged Expense({total_shane_solo_expense_count}): PHP {total_shane_solo_amount:,.2f}")
output.append("\n## Data Dump")
output.append(f"\n### Shopee Orders({len(shopee_orders)}):")
output.append(', '.join(shopee_orders))
output.append(f"\n### Lazada Orders({len(lazada_orders)}):")
output.append(', '.join(lazada_orders))
output.append(f"\n### Amazon Orders({len(amazon_orders)}):")
output.append(', '.join(amazon_orders))
return output
if __name__ == "__main__":
try:
# temporarily added a default file path
file_path = input("Enter the path to the .xlsx file: ") or "./dump/mm.xlsx"
output_file_path = input("Enter the path to the output .md file (default: dump/output.md): ") or "dump/output.md"
result = analyze_money_manager(file_path)
write_to_file(output_file_path, "\n".join(result))
except Exception as e:
print(f"Error: {e}")