FDM_MiniProject/Call.py at main · YuvinNP/FDM_MiniProject · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
import pandas as pd
import numpy as np
from collections import Counter

# initialize global dataframes
Df_dataset = pd.DataFrame()
Df_dataset_top_cluster = pd.DataFrame()
top_cluster = []


def importCSV(path):
    global Df_dataset
    # read the csv
    Df_dataset = pd.read_csv(path)


def GetTopClusters():
    global Df_dataset

    global Df_dataset_top_cluster

    global top_cluster

    # get the Df_dataset to another dataframe
    df_cal = Df_dataset.copy(deep=True)

    # function to get the top 3 segmentations
    # take all the platinum having rows
    Df_dataset_final_platinum = df_cal.loc[df_cal['RFM_Loyalty_Level'] == 'Platinum'].sort_values('id', ascending=False)

    # get the each segment platnum count
    Df_dataset_final_platinum = Df_dataset_final_platinum.groupby(['Cluster'])['id'].aggregate(
        'count').reset_index().sort_values('id', ascending=False)

    # if the total ids greater than 1000 then take those as main clusters
    # loop the dataframe and check for the above condition
    # get the cluster numbers to list
    top_cluster = [gid['Cluster'] for index, gid in Df_dataset_final_platinum.iterrows() if gid['id'] > 1000]

    # get all the rows which have cluster number in the above
    Df_dataset_top_cluster = df_cal.loc[(df_cal['Cluster'].isin(top_cluster))].sort_values('Cluster', ascending=True)

    # now we have the required segment data


def CustomCluster(clusterno):
    global Df_dataset

    global Df_dataset_top_cluster

    global top_cluster

    # get the Df_dataset to another dataframe
    df_cal = Df_dataset.copy(deep=True)

    # get the cluster numbers to list
    top_cluster = [clusterno]
    # get all the rows which have cluster number in the above
    Df_dataset_top_cluster = df_cal.loc[(df_cal['Cluster'].isin(top_cluster))].sort_values('Cluster', ascending=True)


# get the country language and employee skill of each clusters
# cluster number needed if it is a customer cluster details needed

def GetDetailsOfCluster(path, roi, processtype, clusternumber):
    global Df_dataset_top_cluster

    global Df_dataset

    global top_cluster

    # import the dataset
    importCSV(path)

    if (processtype == 'all'):
        # get the top clusters
        GetTopClusters()

    elif (processtype == 'custom'):
        CustomCluster(clusternumber)

    # to save all the date
    clusterdict = {}

    # to save employee requirement satisfy
    # to check whether is employee for main countries
    employeelanguage = ['FRA', 'GBR']

    # to filter unwanted months by getting mean and getting needed months
    meanmonth = 0

    # ROI calculation
    ROI = roi

    for cluster in top_cluster:

        # use this to remove unwanted months which have low frequencies of guest
        meanmonthcount = 0;

        # to store the lead time
        meancanceldays = 0

        # to store the  loyalty count of each segment
        loyalty_dict = {}

        # to store the is cancel data
        cancel_data_dict = {}

        # to save the year's each month revenue and total revenue
        # cal Monetary of each year by month
        yr_15 = {}
        yr_15_total = 0
        yr_15_roi_percentage = 0

        yr_16 = {}
        yr_16_total = 0
        yr_16_roi_percentage = 0

        yr_17 = {}
        yr_17_total = 0
        yr_17_roi_percentage = 0

        # for food
        Eachclusterfoodcount = {}

        # for room type
        Eachclusterroomcount_dict = {}

        # for employees
        Eachclustercountrycount = {}

        # combine dictionary
        combine_dict = {}

        # to save employee requirement satisfy
        employee_ava_dict = {}

        # to save each segment month visiting and the count
        Eachclustermonthcount = {}

        # copy the dataframe data for further calculations
        df_temp = Df_dataset_top_cluster.loc[Df_dataset_top_cluster['Cluster'] == cluster][
            ['meal', 'country', 'arrival_date_month', 'arrival_date_year', 'Monetary', 'RFM_Loyalty_Level',
             'is_canceled', 'id', 'lead_time', 'reserved_room_type']].copy(deep=True)

        # get all the loyalty level
        temployalty = df_temp['RFM_Loyalty_Level'].tolist()

        # get the is cancel column  detail to get the percentage of cancel and coming
        tempiscancel = df_temp.groupby(['is_canceled'])[['id']].aggregate('count').sort_values('id', ascending=False)[
            'id'].tolist()

        # getting the data in ascending order of id
        # there for first index in 0 and next is 1
        cancelindex = 0;
        for cancel in tempiscancel:
            cancel_data_dict[cancelindex] = cancel
            cancelindex = cancelindex + 1

        # store the number of rows
        noofrows = len(temployalty)

        # get the cancel and uncancel means, we used this array to store the o and 1s
        tempcancelleadtime = df_temp.loc[df_temp['is_canceled'] == 1]['lead_time'].tolist()

        # cal mean lead days to cancel
        if (len(tempcancelleadtime) != 0):
            meancanceldays = sum(tempcancelleadtime) / len(tempcancelleadtime)

        # get all the meals
        tempfood = df_temp['meal'].tolist()

        # get the countries
        tempcountry = df_temp['country'].tolist()
        countrynames = list(set(tempcountry))

        # get the months
        tempmonth = df_temp['arrival_date_month'].tolist()
        monthname = list(set(tempmonth))

        # get the room types
        temproom = df_temp['reserved_room_type'].tolist()

        # get the Monetary
        tempmonetary = df_temp.groupby(['arrival_date_year', 'arrival_date_month'])['Monetary'].aggregate(
            'sum').reset_index().sort_values('arrival_date_year', ascending=False)

        # replace undefined with special requests
        tempfood = list(map(lambda st: str.replace(st, "Undefined", "Special Requests"), tempfood))

        # used to loop to make time less
        foodname = list(set(tempfood))

        # count each food frequencies
        for foodele in foodname:
            Eachclusterfoodcount[foodele] = tempfood.count(foodele)

        # count each month frequencies
        for monthele in monthname:
            # get the month count
            monthcounter = tempmonth.count(monthele)
            Eachclustermonthcount[monthele] = monthcounter
            meanmonthcount = meanmonthcount + monthcounter

        # now get the mean and remove unwanted months
        meanmonth = meanmonthcount / len(monthname)

        # loop and kemove those keys which are less than mean
        for removeindex in list(Eachclustermonthcount.keys()):

            if (Eachclustermonthcount[removeindex] < meanmonth):
                del Eachclustermonthcount[removeindex]

                # to keep track of only main countries
        countcountries = 0

        for countryele in countrynames:

            count = tempcountry.count(countryele)

            # if countcountries == 3 that means stops practicaly in a hotel we can have 3 segments
            if (countcountries == 3):
                break
                # there should be 1000 atleast coustomers for each country to make it considerable
            if (count > 1000):
                Eachclustercountrycount[countryele] = count
                countcountries = countcountries + 1

            # check for the lack of employee for each country language
        for coun in Eachclustercountrycount.keys():

            # check whether country language incldued
            if coun in employeelanguage:
                employee_ava_dict[coun] = 'Yes'
            else:
                employee_ava_dict[coun] = 'No'

        # count each loyalty frequency
        loyalty_dict = Counter(temployalty)

        # count each room frequency
        Eachclusterroomcount_dict = Counter(temproom)

        # iterate the dataframe
        for index, row in tempmonetary.iterrows():

            # get each years month revenue based on the year
            if (row['arrival_date_year'] == 2015):

                yr_15[row['arrival_date_month']] = row['Monetary']
                yr_15_total = yr_15_total + row['Monetary']

            elif (row['arrival_date_year'] == 2016):

                yr_16[row['arrival_date_month']] = row['Monetary']
                yr_16_total = yr_16_total + row['Monetary']

            else:

                yr_17[row['arrival_date_month']] = row['Monetary']
                yr_17_total = yr_17_total + row['Monetary']

        yr_15_roi_percentage = int((yr_15_total / ROI) * 100)
        yr_16_roi_percentage = int((yr_16_total / ROI) * 100)
        yr_17_roi_percentage = int((yr_17_total / ROI) * 100)

        # combine the above dictionaries
        combine_dict = {'food': Eachclusterfoodcount.copy(),
                        'country': Eachclustercountrycount.copy(),
                        'employee_skill': employee_ava_dict.copy(),
                        'month_name': Eachclustermonthcount.copy(),
                        'loyalty': loyalty_dict.copy(),
                        'roomcount': Eachclusterroomcount_dict.copy(),
                        '2015_Revenue_month': yr_15.copy(),
                        '2016_Revenue_month': yr_16.copy(),
                        '2017_Revenue_month': yr_17.copy(),
                        '2015_total_revenue': yr_15_total,
                        '2016_total_revenue': yr_16_total,
                        '2017_total_revenue': yr_17_total,
                        'ROI_percentage_2015': yr_15_roi_percentage,
                        'ROI_percentage_2016': yr_16_roi_percentage,
                        'ROI_percentage_2017': yr_17_roi_percentage,
                        'meancanceldays': meancanceldays,
                        'canceldetails': cancel_data_dict.copy()
                        }

        # save each of those data to main dictionary

        clusterdict[cluster] = combine_dict

    return clusterdict


def CalTotalGivenTimeRange(year, monthrange, cluster):
    global Df_dataset_top_cluster

    # to store each month count , help to get the mean
    monthcount = {}

    # to store the each month monetary
    monthmonetary = {}

    # group  by year
    df_full = Df_dataset_top_cluster.loc[Df_dataset_top_cluster['Cluster'].isin(cluster)].groupby(
        ['arrival_date_year', 'arrival_date_month'])['Monetary'].aggregate('sum').reset_index().sort_values(
        'arrival_date_year', ascending=False).copy(deep=True)

    # now get the required year data

    for yr in year:

        # get the respective
        df_year_filer = df_full.loc[df_full['arrival_date_year'] == yr].sort_values('arrival_date_year',
                                                                                    ascending=True).copy(deep=True)

        # now loop and get each month monetary
        for index, row in df_year_filer.iterrows():

            if (row['arrival_date_month'] in monthrange):

                # check for the exist of the dictionry
                if (row['arrival_date_month'] in monthmonetary):

                    # add to the monetary count of the month
                    monthmonetary[row['arrival_date_month']] = monthmonetary[row['arrival_date_month']] + row[
                        'Monetary']

                    # add to the month count
                    monthcount[row['arrival_date_month']] = monthcount[row['arrival_date_month']] + 1

                else:

                    # create to the monetary count of the month
                    monthmonetary[row['arrival_date_month']] = row['Monetary']

                    # create to the month count
                    monthcount[row['arrival_date_month']] = 1

    # now get the mean of each month
    meanmonths = {key: value / monthcount[key] for key, value in monthmonetary.items()}

    return meanmonths

import operator


def FinancialOfferCalculations(year, monthrange, maxdiscountPercentage):
    # to store each segment volumne
    requirecount_dict = {}

    # to store each segment earning
    requireearn_dict = {}

    # to store each segmnet discount
    discount_seg = {}

    # to store each segment discoutn loss by taking fom total revenue
    discount_loss = {}

    # to store discoutn loss per person
    discount_loss_per_guest = {}

    # to store revenue each month
    revenue_month = {}

    # to temporalu to store each segment month frequency and get count
    tempmonthcount = {}

    # combine dictionary to get all the output to single dictioanry
    combine_dict = {}

    # get the details of the cluster
    output = GetDetailsOfCluster("cltData.csv", 1200000, 'all', 0)

    # get each segment mean guest volume
    for key in list(output.keys()):

        # initialize intial count as 0
        requirecount_dict[key] = 0

        # check month dictionary of the respective segment
        monthdict = output[key]['month_name'].copy()

        # loop the monthdict and get the total volume for given time range
        for mon_name in monthrange:

            # check if month name exits in the month dictionary otherwise it will give error
            if mon_name in monthdict:
                # updte the count of each segmnet
                requirecount_dict[key] = requirecount_dict[key] + monthdict[mon_name]

    # no purpose of having no guests in the given range
    if (0 in requirecount_dict):
        del requirecount_dict[0]

        # get sum of all values of the segment whuch will use the below code to cal the dicount percentage
    sumsegcount = sum(list(requirecount_dict.values()))

    # get the discount for each segment
    discount_seg = {seg: (val / sumsegcount) * maxdiscountPercentage for seg, val in requirecount_dict.items()}

    # now get the loss for each segment because of the discount
    # before that get the total revue of each given month for each segment from CalTotalGivenTimeRange fucntion
    for clus in list(requirecount_dict.keys()):
        temprevenue_month = CalTotalGivenTimeRange(year, monthrange, [clus])
        revenue_month[clus] = sum(list(temprevenue_month.values()))

    # now calculate the loss for each cluster from the discount
    # loop the each segment and cal
    for clus in list(revenue_month.keys()):
        # now cal the loss and insert to new dictionary
        discount_loss[clus] = revenue_month[clus] * discount_seg[clus]
        discount_loss_per_guest[clus] = discount_loss[clus] / sumsegcount

    # combine the output
    combine_dict = {'segment_discount_percentage': discount_seg.copy(),
                    'segment_loss_from_discount': discount_loss.copy(),
                    'recover_amount_per_guest': discount_loss_per_guest.copy(),
                    'sum_of_guest': sumsegcount,
                    'mean_guest_count': requirecount_dict.copy()

                    }

    # output the discount , loss from discount, loss dicount to cover from person
    # what cross selling to insert in the recommendation
    return combine_dict

FinancialOfferCalculations([2015,2017],['January','August','July'],40)