ev_charging_predictor/datacuration.py at main · noahcasey21/ev_charging_predictor · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
import pandas as pd
from algos.frank import choose_new_location as frank_choose_new_location
from algos.noah_c import choose_new_location as noah_choose_new_location
from algos.noah_s import choose_new_location_kmeans

def CreateOpenStationsfile(csv_file_path, parquet_file_path):
    # Read the CSV file
    df = pd.read_csv(csv_file_path)
    keep_cols = ['Groups With Access Code', 'Fuel Type Code', 'Station Name', 'Street Address', 'City', 'State', 'ZIP', 'Status Code'
                    , 'Latitude', 'Longitude', 'Open Date']
    df = df[keep_cols].dropna(how='any')

    df = df[(df['Status Code'] == 'E') & (df['Fuel Type Code'] == 'ELEC') & (df['Groups With Access Code'].str.contains('Public'))]
    df.drop(['Status Code', 'Fuel Type Code', 'Groups With Access Code'], axis=1, inplace=True)
    df['ZIP'] = df['ZIP'].astype(str)
    df['Year'] = df['Open Date'].apply(lambda x: int(x.split('-')[0]))
    df['Algorithm'] = 'Original'
    # Convert to Parquet file
    df.to_parquet(parquet_file_path, engine='fastparquet')
    print(f"OpenStations file has been created to {parquet_file_path}")

def GeneratePredictions(parquet_file_path, output_file_path):
    # Read the list of open stations
    df = pd.read_parquet(parquet_file_path)
    # Iterate over each row in the DataFrame and call the model to get the new location
    print(f"Total number of open stations: {len(df)}")
    print("Top 5 open stations:")
    print(df.head())
    # Create a DataFrame to store the predictions
    predictions = pd.DataFrame(columns=['Algorithm', 'Year', 'City', 'State', 'Latitude', 'Longitude'])
    for year in range(2010, 2024):
        df_year = df[df['Year'] < year]
        city_state_group = df_year.groupby(['City', 'State']).filter(lambda x: len(x) > 10).groupby(['City', 'State'])

        for (city, state), group in list(city_state_group):
            print(year, city, state)
            locations = list(group[['Latitude', 'Longitude']].itertuples(index=False, name=None))

            #Prediction 1
            new_location = frank_choose_new_location(locations)
            predictions = pd.concat([predictions, pd.DataFrame([{
                'Algorithm': 'Frank',
                'Year': year,
                'City': city,
                'State': state,
                'Latitude': new_location[0],
                'Longitude': new_location[1]
            }])], ignore_index=True)

            #Prediction 2
            new_location = noah_choose_new_location(locations)
            predictions = pd.concat([predictions, pd.DataFrame([{
                 'Algorithm': 'Noah_C',
                 'Year': year,
                 'City': city,
                 'State': state,
                 'Latitude': new_location[0],
                 'Longitude': new_location[1]
            }])], ignore_index=True)

            #Prediction 3
            new_location = choose_new_location_kmeans(locations)
            predictions = pd.concat([predictions, pd.DataFrame([{
                 'Algorithm': 'Noah_S',
                 'Year': year,
                 'City': city,
                 'State': state,
                 'Latitude': new_location[0],
                 'Longitude': new_location[1]
            }])], ignore_index=True)

    # Save the predictions DataFrame to a Parquet file
    predictions.to_parquet(output_file_path, engine='fastparquet')
    print(f"Predictions have been saved to {output_file_path}")

# Combine OpenStations and Predictions dataframes
def CombineDataFrames(openstations_file_path, predictions_file_path, combined_file_path):
    # Read the OpenStations and Predictions Parquet files
    openstations_df = pd.read_parquet(openstations_file_path)
    predictions_df = pd.read_parquet(predictions_file_path)

    # Merge the DataFrames on common columns
    combined_df = pd.merge(openstations_df, predictions_df, how='outer', on=['City', 'State', 'Latitude', 'Longitude','Year','Algorithm'])

    # Display the OpenStations DataFrame
    print("OpenStations DataFrame:")
    print(openstations_df.head())

    # Display the Predictions DataFrame
    print("Predictions DataFrame:")
    print(predictions_df.head())

    # Display the Combined DataFrame
    print("Combined DataFrame:")
    print(combined_df.head())


    # Save the combined DataFrame to a Parquet file
    combined_df.to_parquet(combined_file_path, engine='fastparquet')
    print(f"Combined data has been saved to {combined_file_path}")

# Do it
rawfile = 'data/alt_fuel_station.csv'
openstations = 'data/OpenStations.parquet'
predictions = 'data/Predictions.parquet'
CreateOpenStationsfile(rawfile, openstations)
GeneratePredictions(openstations, predictions)
CombineDataFrames(openstations, predictions, 'data/MapData.parquet')