-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathModified_LLE.py
More file actions
executable file
·157 lines (113 loc) · 5.04 KB
/
Modified_LLE.py
File metadata and controls
executable file
·157 lines (113 loc) · 5.04 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
#!/usr/bin/env python
# coding: utf-8
# In[5]:
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib import offsetbox
import numpy as np
from time import time
from itertools import zip_longest
def runModifiedLLE():
data_set = pd.read_csv('data_as_csv.csv') #turned data's file to csv file
data_set.info() #get general information about dataset
#from sklearn.cross_validation import train_test_split
# Use all numeric columns except the last one for X, last column for y
numeric_cols = data_set.select_dtypes(include=['number']).columns.tolist()
if len(numeric_cols) >= 2:
# Use all numeric columns except the last one for features
X = data_set[numeric_cols[:-1]]
# Use last numeric column for target (or last column if no numeric)
y = data_set[numeric_cols[-1]] if len(numeric_cols) > 1 else data_set.iloc[:, -1]
else:
# Fallback: use all columns except last for X, last for y
X = data_set.iloc[:, :-1]
y = data_set.iloc[:, -1]
#n_samples is 1500 because of the dataset's length, n_neighbors will consider 30 neighbors for each point
n_samples = 1500
n_features = 64
n_neighbors = 30
print(type(X))
#the LLE methods that we used reqires numpy.ndarray so we convert
newdata=y.to_numpy()
newX=X.to_numpy()
newdata = newX.copy()
#newdata = np.reshape(newdata, (-1, 2))
newdata1=data_set.to_numpy()
import numpy as np
from matplotlib import offsetbox
from sklearn.preprocessing import MinMaxScaler
def helper_function(X, title, ax):
# from sklearn.preprocessing import MinMaxScaler
X = MinMaxScaler().fit_transform(X)
# Plot scatter points with colors - much more readable
if len(y) > 0:
# Use unique colors for different y values if y is categorical
try:
unique_y = np.unique(y)
if len(unique_y) <= 10:
# Categorical data - use distinct colors
colors = plt.cm.tab10(np.linspace(0, 1, len(unique_y)))
for idx, uy in enumerate(unique_y):
mask = y == uy
ax.scatter(X[mask, 0], X[mask, 1], c=[colors[idx]], s=20, alpha=0.6, label=f'Class {uy}')
else:
# Continuous data - use colormap
scatter = ax.scatter(X[:, 0], X[:, 1], c=y, cmap='viridis', s=20, alpha=0.6)
plt.colorbar(scatter, ax=ax)
except:
# Fallback: simple scatter
ax.scatter(X[:, 0], X[:, 1], s=20, alpha=0.6, c='blue')
else:
ax.scatter(X[:, 0], X[:, 1], s=20, alpha=0.6, c='blue')
# Only show labels for a small subset of points (max 20 labels)
n_labels = min(20, X.shape[0])
if n_labels > 0:
# Sample evenly spaced indices
label_indices = np.linspace(0, X.shape[0]-1, n_labels, dtype=int)
for i in label_indices:
ax.text(X[i, 0], X[i, 1], str(i), fontsize=7, alpha=0.7,
bbox=dict(boxstyle='round,pad=0.2', facecolor='white', alpha=0.7))
ax.set_title(title, fontsize=12, fontweight='bold')
ax.grid(True, alpha=0.3)
ax.set_xlabel('Component 1', fontsize=10)
ax.set_ylabel('Component 2', fontsize=10)
# In[7]:
from sklearn.manifold import LocallyLinearEmbedding
embeddings = {
"Modified LLE": LocallyLinearEmbedding(
n_neighbors=n_neighbors, n_components=2, method="modified" #to let helper function know that we called it for standard LLE method
)
}
# In[10]:
projections, timing = {}, {}
for name, transformer in embeddings.items():
if name.startswith("Modified LLE"):
newdata=X.to_numpy()
newX=y.to_numpy()
newdata = newX.copy()
newdata = np.reshape(newdata, (-1, 2))
newdata=newdata
newX = np.reshape(newX, (-1, 2))
newdata.flatten() #that was required by functions that we used for calculating time
# newdata.flat[:: newX.shape[1] + 1] += 0.01 # Make X invertible
#print(newdata)
#print(newX)
else:
newdata = newX
print(f"{name}...")
print(type(newdata))
start_time = time()
projections[name] = transformer.fit_transform(newdata, y)
timing[name] = time() - start_time #time is calculated
# In[11]:
from itertools import zip_longest
fig, axs = plt.subplots(nrows=2, ncols=2, figsize=(17, 24)) #creating figures
#print(type(ax0))
for name, ax0 in zip_longest(timing, axs.ravel()): #check the timing for each LLE method
if name is None:
ax0.axis("off")
continue
title = f"{name} (time {timing[name]:.3f}s)" #wrote methods name and its running time
helper_function(projections[name], title, ax0) #calling helper function for each methods
plt.show()
# In[ ]: