-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathgenerate_data.py
More file actions
38 lines (28 loc) · 1.09 KB
/
generate_data.py
File metadata and controls
38 lines (28 loc) · 1.09 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
import pandas as pd
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split
N_SAMPLES = 5000 # Define the number of samples (data points)
N_FEATURES = 10 # Define the number of features
N_CLASSES = 2 # Set the number of classes (binary classification = 2)
TEST_SIZE = 0.2 # Sie of test set
SEED = 42
def generate_data():
x, y = make_classification(
n_samples=N_SAMPLES,
n_features=N_FEATURES,
n_classes=N_CLASSES)
x_train, x_test, y_train, y_test = train_test_split(
x,
y,
test_size=TEST_SIZE,
random_state=SEED)
df_train = pd.DataFrame(x_train, columns=[f"feature_{i}" for i in range(N_FEATURES)])
df_train["target"] = y_train
df_test = pd.DataFrame(x_test, columns=[f"feature_{i}" for i in range(N_FEATURES)])
df_test["target"] = y_test
print(f"Train: {df_train.shape}")
print(f"Test: {df_test.shape}")
df_train.to_csv("train.csv", index=False)
df_test.to_csv("test.csv", index=False)
if __name__ == '__main__':
generate_data()