From 0d0791a729b5962d8dea4e13575232fcc4f314b1 Mon Sep 17 00:00:00 2001 From: ShubhamMishra1611 Date: Wed, 5 Oct 2022 22:00:33 +0530 Subject: [PATCH 1/3] Added more distant_metrics for vector space and also for boolean vector space --- MLlib/distance_metrics.py | 307 +++++++++++++++++++++++++++++++++++++- 1 file changed, 304 insertions(+), 3 deletions(-) diff --git a/MLlib/distance_metrics.py b/MLlib/distance_metrics.py index 802aa80..435f983 100644 --- a/MLlib/distance_metrics.py +++ b/MLlib/distance_metrics.py @@ -4,9 +4,9 @@ class Distance_metrics: Calculate distance between each corresponding points of two arrays using different distance metrics """ - def Eucledian_Distance(X1,X2): + def Euclidean_Distance(X1,X2): """" - Returns the list of eucledian distance + Returns the list of euclidean distance between two corresponding points of two arrays @@ -22,7 +22,7 @@ def Eucledian_Distance(X1,X2): ========= distance:list - Returns the list of eucledian distance + Returns the list of euclidean distance between two corresponding points of two arrays """ @@ -182,3 +182,304 @@ def Hamming_Distance(X1,X2): s += abs(e1-e2) distance = s/len(X1) return distance + + def sEuclidean_distance(X1,X2,V): + """ + Returns the list of standardized euclidean distance + between two corresponding points of + two arrays + + PARAMETERS + ========== + X1:ndarray(dtype=int,axis=1) + input array with more than 1 dimension + + X2:ndarray(dtype=int,axis=1) + input array with more than 1 dimension + + V:list + input array with 1 dimension + + RETURNS + ========= + + distance:list + Returns the list of standardized euclidean distance + between two corresponding points of + two arrays + """ + distance=[] + for i in range(len(X1)): + single=0 + single=np.sum(((X1[i]-X2[i])/V[i])**2) + distance.append(np.sqrt(single)) + return(distance) + + def Mahalanobis_Distance(X,d,V=None): + """ + Returns the mahalanobis distance between + points and distribution + + PARAMETERS + ========== + X:ndarray(dtype=int,axis=1) + input array with more than 1 dimension. + Represents the points + + d:ndarray(dtype=int,axis=1) + input array with more than 1 dimension.Represent + the distribution from which Mahalanobis + distance is to be calculated + + V:ndarray(dtype=float64,axis=1) + input array with more than 1 dimension.Represent + the covariance matrix.If None is given,then will + be computed from the data + + RETURNS + ========= + distance:list + Returns the list of mahalanobis distance + between points and given distribution + """ + distance=[] + for i in range(len(X)): + x_minus_mu = X[i]-np.mean(d,axis=0) + if V==None: + V=np.cov(d.T) + VI = np.linalg.inv(V) + d = np.sqrt(np.dot(np.dot(x_minus_mu,VI),x_minus_mu.T)) + distance.append(d) + return(distance) + + def __boolean_opr(self,X1,X2): + """ + Returns result of some bianry operation + between two arrays.Any non zero value is + considered as 1 + + PARAMETERS + ========== + X1:ndarray(dtype=int,axis=1) + input array with 1 dimension + + X2:ndarray(dtype=int,axis=1) + input array with 1 dimension + + RETURNS + ========= + result:tuple + + result[0]:Number of dimensions + result[1]:Number of dims in which both values are True + result[2]:Number of dims in which the first value is True and second is False + result[3]:Number of dims in which the first value is False and second is True + result[4]:Number of dims in which both values are False + """ + if len(X1)!=len(X2): + raise TypeError("X1 and X2 must have same length") + result=[] + for i in range(len(X1)): + if X1[i]!=0: + X1[i]=1 + if X2[i]!=0: + X2[i]=1 + result.append(len(X1)) + result.append(np.sum((X1==1)&(X2==1))) + result.append(np.sum((X1==1)&(X2==0))) + result.append(np.sum((X1==0)&(X2==1))) + result.append(np.sum((X1==0)&(X2==0))) + return(tuple(result)) + + + def Jaccard_Distance(self,X1,X2): + """ + Returns the list of Jaccard distance between + two corresponding vectors of two binary arrays + + PARAMETERS + ========== + X1:ndarray(dtype=int,axis=1) + input array with 1 dimension + + X2:ndarray(dtype=int,axis=1) + input array with 1 dimension + + RETURNS + ========= + distance:list + Returns the list of Jaccard distance + """ + distance=[] + for i in range(len(X1)): + result=self.__boolean_opr(X1[i],X2[i]) + distance.append((result[2]+result[3])/(result[1]+result[2]+result[3])) + return(distance) + + def Matching_Distance(self,X1,X2): + """ + Returns the list of Matching distance between + two corresponding vectors of two binary arrays + + PARAMETERS + ========== + X1:ndarray(dtype=int,axis=1) + input array with 1 dimension + + X2:ndarray(dtype=int,axis=1) + input array with 1 dimension + + RETURNS + ========= + distance:list + Returns the list of Matching distance + """ + distance=[] + for i in range(len(X1)): + result=self.__boolean_opr(X1[i],X2[i]) + distance.append((result[2]+result[3])/result[0]) + return(distance) + + def Dice_Distance(self,X1,X2): + """ + Returns the list of Dice distance between + two corresponding vectors of two binary arrays + + PARAMETERS + ========== + X1:ndarray(dtype=int,axis=1) + input array with 1 dimension + + X2:ndarray(dtype=int,axis=1) + input array with 1 dimension + + RETURNS + ========= + distance:list + Returns the list of Dice distance + """ + distance=[] + for i in range(len(X1)): + result=self.__boolean_opr(X1[i],X2[i]) + distance.append((result[2]+result[3])/(2*result[1]+result[2]+result[3])) + return(distance) + + def Kulsinki_Distance(self,X1,X2): + """ + Returns the list of Kulsinki distance between + two corresponding vectors of two binary arrays + + PARAMETERS + ========== + X1:ndarray(dtype=int,axis=1) + input array with 1 dimension + + X2:ndarray(dtype=int,axis=1) + input array with 1 dimension + + RETURNS + ========= + distance:list + Returns the list of Kulsinki distance + """ + distance=[] + for i in range(len(X1)): + result=self.__boolean_opr(X1[i],X2[i]) + distance.append(result[2]+result[3]+result[0]-result[1])/(result[2]+result[3]+result[0]) + return(distance) + + def Rogers_Tanimoto_Distance(self,X1,X2): + """ + Returns the list of Rogers-Tanimoto distance between + two corresponding vectors of two binary arrays + + PARAMETERS + ========== + X1:ndarray(dtype=int,axis=1) + input array with 1 dimension + + X2:ndarray(dtype=int,axis=1) + input array with 1 dimension + + RETURNS + ========= + distance:list + Returns the list of Rogers-Tanimoto distance + """ + distance=[] + for i in range(len(X1)): + result=self.__boolean_opr(X1[i],X2[i]) + distance.append(2*(result[2]+result[3])/(result[2]+result[3]+result[0])) + return(distance) + + def Russell_Rao_Distance(self,X1,X2): + """ + Returns the list of Russell-Rao distance between + two corresponding vectors of two binary arrays + + PARAMETERS + ========== + X1:ndarray(dtype=int,axis=1) + input array with 1 dimension + + X2:ndarray(dtype=int,axis=1) + input array with 1 dimension + + RETURNS + ========= + distance:list + Returns the list of Russell-Rao distance + """ + distance=[] + for i in range(len(X1)): + result=self.__boolean_opr(X1[i],X2[i]) + distance.append((result[0]-result[1])/result[0]) + return(distance) + + def Sokal_Michener_Distance(self,X1,X2): + """ + Returns the list of Sokal-Michener distance between + two corresponding vectors of two binary arrays + + PARAMETERS + ========== + X1:ndarray(dtype=int,axis=1) + input array with 1 dimension + + X2:ndarray(dtype=int,axis=1) + input array with 1 dimension + + RETURNS + ========= + distance:list + Returns the list of Sokal-Michener distance + """ + distance=[] + for i in range(len(X1)): + result=self.__boolean_opr(X1[i],X2[i]) + distance.append(2*(result[2]+result[3])/(result[0]+result[2]+result[3])) + return(distance) + + def Sokal_Sneath_Distance(self,X1,X2): + """ + Returns the list of Sokal-Sneath distance between + two corresponding vectors of two binary arrays + + PARAMETERS + ========== + X1:ndarray(dtype=int,axis=1) + input array with 1 dimension + + X2:ndarray(dtype=int,axis=1) + input array with 1 dimension + + RETURNS + ========= + distance:list + Returns the list of Sokal-Sneath distance + """ + distance=[] + for i in range(len(X1)): + result=self.__boolean_opr(X1[i],X2[i]) + distance.append((result[2]+result[3])/(0.5*result[1]+result[2]+result[3])) + return(distance) From 943dd16015a670d20d34b409f5eca1f9366f0e95 Mon Sep 17 00:00:00 2001 From: ShubhamMishra1611 Date: Fri, 7 Oct 2022 10:53:50 +0530 Subject: [PATCH 2/3] Distance_metric_example.py added & fixed Kulsinki_Distance function in distance_metric.py --- Examples/Distance_metric_example.py | 64 +++++++++++++++++++++++++++++ MLlib/distance_metrics.py | 2 +- 2 files changed, 65 insertions(+), 1 deletion(-) create mode 100644 Examples/Distance_metric_example.py diff --git a/Examples/Distance_metric_example.py b/Examples/Distance_metric_example.py new file mode 100644 index 0000000..ee999c9 --- /dev/null +++ b/Examples/Distance_metric_example.py @@ -0,0 +1,64 @@ +from MLlib.distance_metrics import Distance_metrics +import numpy as np + +data=np.genfromtxt('dataset/salaryinp.csv',delimiter=',') +X1=np.array(data[:len(data)//2]) +X2=np.array(data[len(data)//2:]) + + +Euc=Distance_metrics.Euclidean_Distance(X1=X1,X2=X2) +print("Euclidean Distance: ",Euc) + +Mah=Distance_metrics.Manhattan_Distance(X1=X1,X2=X2) +print("Manhattan Distance: ",Mah) + +Che=Distance_metrics.Chebyshev_Distance(X1=X1,X2=X2) +print("Chebyshev Distance: ",Che) + +Mink=Distance_metrics.Minkowski_Distance(X1=X1,X2=X2,p=3)#order will be as per user's choice +print("Minkowski Distance: ",Mink) + +WMink=Distance_metrics.WMinkowski_Distance(X1=X1,X2=X2,p=3,w=0.5)#order and weight will be as per user's choice +print("Weighted Minkowski Distance: ",WMink) + +V=np.random.randint(0,10,size=(len(X1),1))#Variance to be provided by user +SEuc=Distance_metrics.sEuclidean_distance(X1=X1,X2=X2,V=V) +print("Standardized Euclidean Distance: ",SEuc) + +Maha=Distance_metrics.Mahalanobis_distance(X1=X1,X2=X2) +print("Mahalanobis Distance: ",Maha) + +#Generating Boolean data +#User can generate boolean data as per his/her requirement +X1=np.random.randint(0,2,size=(100,5)) +X2=np.random.randint(0,2,size=(100,5)) + +Ham=Distance_metrics.Hamming_Distance(X1=X1,X2=X2) +print("Hamming Distance: ",Ham) + +Dist_met=Distance_metrics() + +Jacc=Dist_met.Jaccard_Distance(X1=X1,X2=X2) +print("Jaccard Distance: ",Jacc) + +Match_dis=Dist_met.Matching_Distance(X1=X1,X2=X2) +print("Matching Distance: ",Match_dis) + +Dice=Dist_met.Dice_Distance(X1=X1,X2=X2) +print("Dice Distance: ",Dice) + +Kuls=Dist_met.Kulsinki_Distance(X1=X1,X2=X2) +print("Kulsinki Distance: ",Kuls) + +Rog=Dist_met.Rogers_Tanimoto_Distance(X1=X1,X2=X2) +print("Rogers Tanimoto Distance: ",Rog) + +Rus=Dist_met.Russell_Rao_Distance(X1=X1,X2=X2) +print("Russell Rao Distance: ",Rus) + +Sok=Dist_met.Sokal_Sneath_Distance(X1=X1,X2=X2) +print("Sokal Sneath Distance: ",Sok) + +Sok_M=Dist_met.Sokal_Michener_Distance(X1=X1,X2=X2) +print("Sokal Michener Distance: ",Sok_M) + diff --git a/MLlib/distance_metrics.py b/MLlib/distance_metrics.py index 435f983..b91ac74 100644 --- a/MLlib/distance_metrics.py +++ b/MLlib/distance_metrics.py @@ -385,7 +385,7 @@ def Kulsinki_Distance(self,X1,X2): distance=[] for i in range(len(X1)): result=self.__boolean_opr(X1[i],X2[i]) - distance.append(result[2]+result[3]+result[0]-result[1])/(result[2]+result[3]+result[0]) + distance.append((result[2]+result[3]+result[0]-result[1])/(result[2]+result[3]+result[0])) return(distance) def Rogers_Tanimoto_Distance(self,X1,X2): From 504892f1a8f4aa05ec4a621566dc7422710c8763 Mon Sep 17 00:00:00 2001 From: ShubhamMishra1611 Date: Fri, 7 Oct 2022 14:28:11 +0530 Subject: [PATCH 3/3] Updated features in README.md and code formatting --- Examples/Distance_metric_example.py | 86 +++++------ MLlib/distance_metrics.py | 219 ++++++++++++++-------------- README.md | 29 ++-- 3 files changed, 175 insertions(+), 159 deletions(-) diff --git a/Examples/Distance_metric_example.py b/Examples/Distance_metric_example.py index ee999c9..90a6bf8 100644 --- a/Examples/Distance_metric_example.py +++ b/Examples/Distance_metric_example.py @@ -1,64 +1,66 @@ from MLlib.distance_metrics import Distance_metrics import numpy as np -data=np.genfromtxt('dataset/salaryinp.csv',delimiter=',') -X1=np.array(data[:len(data)//2]) -X2=np.array(data[len(data)//2:]) +data = np.genfromtxt('dataset/salaryinp.csv', delimiter=',') +X1 = np.array(data[:len(data)//2]) +X2 = np.array(data[len(data)//2:]) -Euc=Distance_metrics.Euclidean_Distance(X1=X1,X2=X2) -print("Euclidean Distance: ",Euc) +Euc = Distance_metrics.Euclidean_Distance(X1=X1, X2=X2) +print("Euclidean Distance: ", Euc) -Mah=Distance_metrics.Manhattan_Distance(X1=X1,X2=X2) -print("Manhattan Distance: ",Mah) +Mah = Distance_metrics.Manhattan_Distance(X1=X1, X2=X2) +print("Manhattan Distance: ", Mah) -Che=Distance_metrics.Chebyshev_Distance(X1=X1,X2=X2) -print("Chebyshev Distance: ",Che) +Che = Distance_metrics.Chebyshev_Distance(X1=X1, X2=X2) +print("Chebyshev Distance: ", Che) -Mink=Distance_metrics.Minkowski_Distance(X1=X1,X2=X2,p=3)#order will be as per user's choice -print("Minkowski Distance: ",Mink) +# order will be as per user's requirement +Mink = Distance_metrics.Minkowski_Distance(X1=X1, X2=X2, p=3) +print("Minkowski Distance: ", Mink) -WMink=Distance_metrics.WMinkowski_Distance(X1=X1,X2=X2,p=3,w=0.5)#order and weight will be as per user's choice -print("Weighted Minkowski Distance: ",WMink) +# order and weight will be as per user's requirement +WMink = Distance_metrics.WMinkowski_Distance(X1=X1, X2=X2, p=3, w=0.5) +print("Weighted Minkowski Distance: ", WMink) -V=np.random.randint(0,10,size=(len(X1),1))#Variance to be provided by user -SEuc=Distance_metrics.sEuclidean_distance(X1=X1,X2=X2,V=V) -print("Standardized Euclidean Distance: ",SEuc) +# Variance to be provided by user +V = np.random.randint(0, 10, size=(len(X1), 1)) +SEuc = Distance_metrics.sEuclidean_distance(X1=X1, X2=X2, V=V) +print("Standardized Euclidean Distance: ", SEuc) -Maha=Distance_metrics.Mahalanobis_distance(X1=X1,X2=X2) -print("Mahalanobis Distance: ",Maha) +Maha = Distance_metrics.Mahalanobis_distance(X1=X1, X2=X2) +print("Mahalanobis Distance: ", Maha) -#Generating Boolean data -#User can generate boolean data as per his/her requirement -X1=np.random.randint(0,2,size=(100,5)) -X2=np.random.randint(0,2,size=(100,5)) +# Generating Boolean data +# User can generate boolean data as per requirement +X1 = np.random.randint(0, 2, size=(100, 5)) +X2 = np.random.randint(0, 2, size=(100, 5)) -Ham=Distance_metrics.Hamming_Distance(X1=X1,X2=X2) -print("Hamming Distance: ",Ham) +Ham = Distance_metrics.Hamming_Distance(X1=X1, X2=X2) +print("Hamming Distance: ", Ham) -Dist_met=Distance_metrics() +Dist_met = Distance_metrics() -Jacc=Dist_met.Jaccard_Distance(X1=X1,X2=X2) -print("Jaccard Distance: ",Jacc) +Jacc = Dist_met.Jaccard_Distance(X1=X1, X2=X2) +print("Jaccard Distance: ", Jacc) -Match_dis=Dist_met.Matching_Distance(X1=X1,X2=X2) -print("Matching Distance: ",Match_dis) +Match_dis = Dist_met.Matching_Distance(X1=X1, X2=X2) +print("Matching Distance: ", Match_dis) -Dice=Dist_met.Dice_Distance(X1=X1,X2=X2) -print("Dice Distance: ",Dice) +Dice = Dist_met.Dice_Distance(X1=X1, X2=X2) +print("Dice Distance: ", Dice) -Kuls=Dist_met.Kulsinki_Distance(X1=X1,X2=X2) -print("Kulsinki Distance: ",Kuls) +Kuls = Dist_met.Kulsinki_Distance(X1=X1, X2=X2) +print("Kulsinki Distance: ", Kuls) -Rog=Dist_met.Rogers_Tanimoto_Distance(X1=X1,X2=X2) -print("Rogers Tanimoto Distance: ",Rog) +Rog = Dist_met.Rogers_Tanimoto_Distance(X1=X1, X2=X2) +print("Rogers Tanimoto Distance: ", Rog) -Rus=Dist_met.Russell_Rao_Distance(X1=X1,X2=X2) -print("Russell Rao Distance: ",Rus) +Rus = Dist_met.Russell_Rao_Distance(X1=X1, X2=X2) +print("Russell Rao Distance: ", Rus) -Sok=Dist_met.Sokal_Sneath_Distance(X1=X1,X2=X2) -print("Sokal Sneath Distance: ",Sok) - -Sok_M=Dist_met.Sokal_Michener_Distance(X1=X1,X2=X2) -print("Sokal Michener Distance: ",Sok_M) +Sok = Dist_met.Sokal_Sneath_Distance(X1=X1, X2=X2) +print("Sokal Sneath Distance: ", Sok) +Sok_M = Dist_met.Sokal_Michener_Distance(X1=X1, X2=X2) +print("Sokal Michener Distance: ", Sok_M) diff --git a/MLlib/distance_metrics.py b/MLlib/distance_metrics.py index b91ac74..dfb96fb 100644 --- a/MLlib/distance_metrics.py +++ b/MLlib/distance_metrics.py @@ -1,13 +1,15 @@ import numpy as np + + class Distance_metrics: """ Calculate distance between each corresponding points of two arrays using different distance metrics """ - def Euclidean_Distance(X1,X2): + def Euclidean_Distance(X1, X2): """" Returns the list of euclidean distance - between two corresponding points of + between two corresponding points of two arrays PARAMETERS @@ -23,17 +25,17 @@ def Euclidean_Distance(X1,X2): distance:list Returns the list of euclidean distance - between two corresponding points of + between two corresponding points of two arrays """ - distance=[] + distance = [] for i in range(len(X1)): - single=0 - single=np.sum((X1[i]-X2[i])**2) + single = 0 + single = np.sum((X1[i]-X2[i])**2) distance.append(np.sqrt(single)) - return(distance) + return (distance) - def Manhattan_Distance(X1,X2): + def Manhattan_Distance(X1, X2): """" Returns the list of manhattan distance between two corresponding points of @@ -52,17 +54,17 @@ def Manhattan_Distance(X1,X2): distance:list Returns the list of manhattan distance - between two corresponding points of + between two corresponding points of two arrays """ - distance=[] + distance = [] for i in range(len(X1)): - single=0 - single=np.sum(abs(X1[i]-X2[i])) + single = 0 + single = np.sum(abs(X1[i]-X2[i])) distance.append(single) - return(distance) + return (distance) - def Chebyshev_Distance(X1,X2): + def Chebyshev_Distance(X1, X2): """" Returns the list of chebyshev distance between two corresponding points of @@ -84,14 +86,14 @@ def Chebyshev_Distance(X1,X2): between two corresponding points of two arrays """ - distance=[] + distance = [] for i in range(len(X1)): - single=0 - single=np.sum(max(X1[i]-X2[i])) + single = 0 + single = np.sum(max(X1[i]-X2[i])) distance.append(single) - return(distance) + return (distance) - def Minkowski_Distance(X1,X2,p): + def Minkowski_Distance(X1, X2, p): """" Returns list of minkowski distance of order 'p' between two corresponding vectors of @@ -116,14 +118,14 @@ def Minkowski_Distance(X1,X2,p): between two corresponding vectors of two arrays """ - distance=[] + distance = [] for i in range(len(X1)): - single=0 - single=np.sum((abs(X1[i]-X2[i]))**p) + single = 0 + single = np.sum((abs(X1[i]-X2[i]))**p) distance.append((single)**(1/p)) - return(distance) - - def WMinkowski_Distance(X1,X2,p,W): + return (distance) + + def WMinkowski_Distance(X1, X2, p, W): """" Returns list of weighted minkowski distance of order 'p' between two corresponding vectors weighted by W of @@ -151,14 +153,14 @@ def WMinkowski_Distance(X1,X2,p,W): between two corresponding vectors of two arrays """ - distance=[] + distance = [] for i in range(len(X1)): - single=0 - single=np.sum((abs(W*(X1[i]-X2[i])))**p) + single = 0 + single = np.sum((abs(W*(X1[i]-X2[i])))**p) distance.append((single)**(1/p)) - return(distance) + return (distance) - def Hamming_Distance(X1,X2): + def Hamming_Distance(X1, X2): """ Returns the Hamming distance between two binary arrays @@ -178,12 +180,12 @@ def Hamming_Distance(X1,X2): two binary arrays """ s = 0 - for e1,e2 in zip(X1,X2): + for e1, e2 in zip(X1, X2): s += abs(e1-e2) distance = s/len(X1) return distance - def sEuclidean_distance(X1,X2,V): + def sEuclidean_distance(X1, X2, V): """ Returns the list of standardized euclidean distance between two corresponding points of @@ -208,14 +210,14 @@ def sEuclidean_distance(X1,X2,V): between two corresponding points of two arrays """ - distance=[] + distance = [] for i in range(len(X1)): - single=0 - single=np.sum(((X1[i]-X2[i])/V[i])**2) + single = 0 + single = np.sum(((X1[i]-X2[i])/V[i])**2) distance.append(np.sqrt(single)) - return(distance) + return (distance) - def Mahalanobis_Distance(X,d,V=None): + def Mahalanobis_Distance(X, d, V=None): """ Returns the mahalanobis distance between points and distribution @@ -242,17 +244,17 @@ def Mahalanobis_Distance(X,d,V=None): Returns the list of mahalanobis distance between points and given distribution """ - distance=[] + distance = [] for i in range(len(X)): - x_minus_mu = X[i]-np.mean(d,axis=0) - if V==None: - V=np.cov(d.T) + x_minus_mu = X[i]-np.mean(d, axis=0) + if V is None: + V = np.cov(d.T) VI = np.linalg.inv(V) - d = np.sqrt(np.dot(np.dot(x_minus_mu,VI),x_minus_mu.T)) + d = np.sqrt(np.dot(np.dot(x_minus_mu, VI), x_minus_mu.T)) distance.append(d) - return(distance) - - def __boolean_opr(self,X1,X2): + return (distance) + + def __boolean_opr(self, X1, X2): """ Returns result of some bianry operation between two arrays.Any non zero value is @@ -276,27 +278,26 @@ def __boolean_opr(self,X1,X2): result[3]:Number of dims in which the first value is False and second is True result[4]:Number of dims in which both values are False """ - if len(X1)!=len(X2): + if len(X1) != len(X2): raise TypeError("X1 and X2 must have same length") - result=[] + result = [] for i in range(len(X1)): - if X1[i]!=0: - X1[i]=1 - if X2[i]!=0: - X2[i]=1 + if X1[i] != 0: + X1[i] = 1 + if X2[i] != 0: + X2[i] = 1 result.append(len(X1)) - result.append(np.sum((X1==1)&(X2==1))) - result.append(np.sum((X1==1)&(X2==0))) - result.append(np.sum((X1==0)&(X2==1))) - result.append(np.sum((X1==0)&(X2==0))) - return(tuple(result)) - + result.append(np.sum((X1 == 1) & (X2 == 1))) + result.append(np.sum((X1 == 1) & (X2 == 0))) + result.append(np.sum((X1 == 0) & (X2 == 1))) + result.append(np.sum((X1 == 0) & (X2 == 0))) + return (tuple(result)) - def Jaccard_Distance(self,X1,X2): + def Jaccard_Distance(self, X1, X2): """ Returns the list of Jaccard distance between two corresponding vectors of two binary arrays - + PARAMETERS ========== X1:ndarray(dtype=int,axis=1) @@ -310,17 +311,18 @@ def Jaccard_Distance(self,X1,X2): distance:list Returns the list of Jaccard distance """ - distance=[] + distance = [] for i in range(len(X1)): - result=self.__boolean_opr(X1[i],X2[i]) - distance.append((result[2]+result[3])/(result[1]+result[2]+result[3])) - return(distance) - - def Matching_Distance(self,X1,X2): + result = self.__boolean_opr(X1[i], X2[i]) + distance.append((result[2]+result[3]) / + (result[1]+result[2]+result[3])) + return (distance) + + def Matching_Distance(self, X1, X2): """ Returns the list of Matching distance between two corresponding vectors of two binary arrays - + PARAMETERS ========== X1:ndarray(dtype=int,axis=1) @@ -334,17 +336,17 @@ def Matching_Distance(self,X1,X2): distance:list Returns the list of Matching distance """ - distance=[] + distance = [] for i in range(len(X1)): - result=self.__boolean_opr(X1[i],X2[i]) + result = self.__boolean_opr(X1[i], X2[i]) distance.append((result[2]+result[3])/result[0]) - return(distance) + return (distance) - def Dice_Distance(self,X1,X2): + def Dice_Distance(self, X1, X2): """ Returns the list of Dice distance between two corresponding vectors of two binary arrays - + PARAMETERS ========== X1:ndarray(dtype=int,axis=1) @@ -358,17 +360,18 @@ def Dice_Distance(self,X1,X2): distance:list Returns the list of Dice distance """ - distance=[] + distance = [] for i in range(len(X1)): - result=self.__boolean_opr(X1[i],X2[i]) - distance.append((result[2]+result[3])/(2*result[1]+result[2]+result[3])) - return(distance) - - def Kulsinki_Distance(self,X1,X2): + result = self.__boolean_opr(X1[i], X2[i]) + distance.append((result[2]+result[3]) / + (2*result[1]+result[2]+result[3])) + return (distance) + + def Kulsinki_Distance(self, X1, X2): """ Returns the list of Kulsinki distance between two corresponding vectors of two binary arrays - + PARAMETERS ========== X1:ndarray(dtype=int,axis=1) @@ -382,17 +385,18 @@ def Kulsinki_Distance(self,X1,X2): distance:list Returns the list of Kulsinki distance """ - distance=[] + distance = [] for i in range(len(X1)): - result=self.__boolean_opr(X1[i],X2[i]) - distance.append((result[2]+result[3]+result[0]-result[1])/(result[2]+result[3]+result[0])) - return(distance) - - def Rogers_Tanimoto_Distance(self,X1,X2): + result = self.__boolean_opr(X1[i], X2[i]) + distance.append( + (result[2]+result[3]+result[0]-result[1])/(result[2]+result[3]+result[0])) + return (distance) + + def Rogers_Tanimoto_Distance(self, X1, X2): """ Returns the list of Rogers-Tanimoto distance between two corresponding vectors of two binary arrays - + PARAMETERS ========== X1:ndarray(dtype=int,axis=1) @@ -406,17 +410,18 @@ def Rogers_Tanimoto_Distance(self,X1,X2): distance:list Returns the list of Rogers-Tanimoto distance """ - distance=[] + distance = [] for i in range(len(X1)): - result=self.__boolean_opr(X1[i],X2[i]) - distance.append(2*(result[2]+result[3])/(result[2]+result[3]+result[0])) - return(distance) + result = self.__boolean_opr(X1[i], X2[i]) + distance.append(2*(result[2]+result[3]) / + (result[2]+result[3]+result[0])) + return (distance) - def Russell_Rao_Distance(self,X1,X2): + def Russell_Rao_Distance(self, X1, X2): """ Returns the list of Russell-Rao distance between two corresponding vectors of two binary arrays - + PARAMETERS ========== X1:ndarray(dtype=int,axis=1) @@ -430,17 +435,17 @@ def Russell_Rao_Distance(self,X1,X2): distance:list Returns the list of Russell-Rao distance """ - distance=[] + distance = [] for i in range(len(X1)): - result=self.__boolean_opr(X1[i],X2[i]) + result = self.__boolean_opr(X1[i], X2[i]) distance.append((result[0]-result[1])/result[0]) - return(distance) + return (distance) - def Sokal_Michener_Distance(self,X1,X2): + def Sokal_Michener_Distance(self, X1, X2): """ Returns the list of Sokal-Michener distance between two corresponding vectors of two binary arrays - + PARAMETERS ========== X1:ndarray(dtype=int,axis=1) @@ -454,17 +459,18 @@ def Sokal_Michener_Distance(self,X1,X2): distance:list Returns the list of Sokal-Michener distance """ - distance=[] + distance = [] for i in range(len(X1)): - result=self.__boolean_opr(X1[i],X2[i]) - distance.append(2*(result[2]+result[3])/(result[0]+result[2]+result[3])) - return(distance) + result = self.__boolean_opr(X1[i], X2[i]) + distance.append(2*(result[2]+result[3]) / + (result[0]+result[2]+result[3])) + return (distance) - def Sokal_Sneath_Distance(self,X1,X2): + def Sokal_Sneath_Distance(self, X1, X2): """ Returns the list of Sokal-Sneath distance between two corresponding vectors of two binary arrays - + PARAMETERS ========== X1:ndarray(dtype=int,axis=1) @@ -478,8 +484,9 @@ def Sokal_Sneath_Distance(self,X1,X2): distance:list Returns the list of Sokal-Sneath distance """ - distance=[] + distance = [] for i in range(len(X1)): - result=self.__boolean_opr(X1[i],X2[i]) - distance.append((result[2]+result[3])/(0.5*result[1]+result[2]+result[3])) - return(distance) + result = self.__boolean_opr(X1[i], X2[i]) + distance.append((result[2]+result[3]) / + (0.5*result[1]+result[2]+result[3])) + return (distance) diff --git a/README.md b/README.md index 68c5a43..f587ab3 100644 --- a/README.md +++ b/README.md @@ -84,14 +84,21 @@ Follow the following steps to get started with contributing to the repository. | | | | | Z_Score | [models.py](https://github.com/RoboticsClubIITJ/ML-DL-implementation/blob/master/MLlib/models.py#L1637) | | | | | | Sequential Neural Network | [models.py](https://github.com/RoboticsClubIITJ/ML-DL-implementation/blob/master/MLlib/models.py#L1680) | -| Loss Functions | Location | Regularizer | Location | Metrics | Location | -| :------------ | ------------: | :------------ | ------------: | :------------ | ------------: | -|**LOSS FUNCTIONS**| |**REGULARIZER**| |**METRICS**| | -| Mean Squared Error | [loss_func.py](https://github.com/RoboticsClubIITJ/ML-DL-implementation/blob/master/MLlib/loss_func.py#L5) | L1_Regularizer| [regularizer.py](https://github.com/RoboticsClubIITJ/ML-DL-implementation/blob/master/MLlib/regularizer.py#L9) | Confusion Matrix | [metrics.py](https://github.com/RoboticsClubIITJ/ML-DL-implementation/blob/master/MLlib/metrics.py#L25) -| Logarithmic Error | [loss_func.py](https://github.com/RoboticsClubIITJ/ML-DL-implementation/blob/master/MLlib/loss_func.py#L57) | L2_Regularizer | [regularizer.py](https://github.com/RoboticsClubIITJ/ML-DL-implementation/blob/master/MLlib/regularizer.py#L58) | Precision | [metrics.py](https://github.com/RoboticsClubIITJ/ML-DL-implementation/blob/master/MLlib/metrics.py#L81) -| Absolute Error | [loss_func.py](https://github.com/RoboticsClubIITJ/ML-DL-implementation/blob/master/MLlib/loss_func.py#L113) | | | Accuracy | [metrics.py](https://github.com/RoboticsClubIITJ/ML-DL-implementation/blob/master/MLlib/metrics.py#L80) -| Cosine Similarity | [loss_func.py](https://github.com/RoboticsClubIITJ/ML-DL-implementation/blob/master/MLlib/loss_func.py#L173) | | | Recall | [metrics.py](https://github.com/RoboticsClubIITJ/ML-DL-implementation/blob/master/MLlib/metrics.py#L82) -| Log_cosh | [loss_func.py](https://github.com/RoboticsClubIITJ/ML-DL-implementation/blob/master/MLlib/loss_func.py#L248) | | | F1 Score | [metrics.py](https://github.com/RoboticsClubIITJ/ML-DL-implementation/blob/master/MLlib/metrics.py#L85) -| Huber | [loss_func.py](https://github.com/RoboticsClubIITJ/ML-DL-implementation/blob/master/MLlib/loss_func.py#L300) | | | F-B Theta | [metrics.py](https://github.com/RoboticsClubIITJ/ML-DL-implementation/blob/master/MLlib/metrics.py#L88) -| Mean Squared Log Error | [loss_func.py](https://github.com/RoboticsClubIITJ/ML-DL-implementation/blob/master/MLlib/loss_func.py#L367) | | | Specificity | [metrics.py](https://github.com/RoboticsClubIITJ/ML-DL-implementation/blob/master/MLlib/metrics.py#L86) -| Mean Absolute Percentage Error | [loss_func.py](https://github.com/RoboticsClubIITJ/ML-DL-implementation/blob/master/MLlib/loss_func.py#L399) +| Loss Functions | Location | Regularizer | Location | Metrics | Location | Distance Metrics | Location | +| :------------ | ------------: | :------------ | ------------: | :------------ | ------------: | :------------ | ------------: | +|**LOSS FUNCTIONS**| |**REGULARIZER**| |**METRICS**| |**DISTANCE METRICS**| | +| Mean Squared Error | [loss_func.py](https://github.com/RoboticsClubIITJ/ML-DL-implementation/blob/master/MLlib/loss_func.py#L5) | L1_Regularizer| [regularizer.py](https://github.com/RoboticsClubIITJ/ML-DL-implementation/blob/master/MLlib/regularizer.py#L9) | Confusion Matrix | [metrics.py](https://github.com/RoboticsClubIITJ/ML-DL-implementation/blob/master/MLlib/metrics.py#L25) | Euclidean Distance | [distance_metrics.py]() | +| Logarithmic Error | [loss_func.py](https://github.com/RoboticsClubIITJ/ML-DL-implementation/blob/master/MLlib/loss_func.py#L57) | L2_Regularizer | [regularizer.py](https://github.com/RoboticsClubIITJ/ML-DL-implementation/blob/master/MLlib/regularizer.py#L58) | Precision | [metrics.py](https://github.com/RoboticsClubIITJ/ML-DL-implementation/blob/master/MLlib/metrics.py#L81) | Manhattan Distance | [distance_metrics.py]() | +| Absolute Error | [loss_func.py](https://github.com/RoboticsClubIITJ/ML-DL-implementation/blob/master/MLlib/loss_func.py#L113) | | | Accuracy | [metrics.py](https://github.com/RoboticsClubIITJ/ML-DL-implementation/blob/master/MLlib/metrics.py#L80) | Minkowski Distance | [distance_metrics.py]() | +| Cosine Similarity | [loss_func.py](https://github.com/RoboticsClubIITJ/ML-DL-implementation/blob/master/MLlib/loss_func.py#L173) | | | Recall | [metrics.py](https://github.com/RoboticsClubIITJ/ML-DL-implementation/blob/master/MLlib/metrics.py#L82) | Chebyshev Distance | [distance_metrics.py]() | +| Log_cosh | [loss_func.py](https://github.com/RoboticsClubIITJ/ML-DL-implementation/blob/master/MLlib/loss_func.py#L248) | | | F1 Score | [metrics.py](https://github.com/RoboticsClubIITJ/ML-DL-implementation/blob/master/MLlib/metrics.py#L85) | WMinskowski Distance | [distance_metrics.py]() | +| Huber | [loss_func.py](https://github.com/RoboticsClubIITJ/ML-DL-implementation/blob/master/MLlib/loss_func.py#L300) | | | F-B Theta | [metrics.py](https://github.com/RoboticsClubIITJ/ML-DL-implementation/blob/master/MLlib/metrics.py#L88) | Hamming Distance | [distance_metrics.py]() | +| Mean Squared Log Error | [loss_func.py](https://github.com/RoboticsClubIITJ/ML-DL-implementation/blob/master/MLlib/loss_func.py#L367) | | | Specificity | [metrics.py](https://github.com/RoboticsClubIITJ/ML-DL-implementation/blob/master/MLlib/metrics.py#L86) | sEuclidean Distance | [distance_metrics.py]() | +| Mean Absolute Percentage Error | [loss_func.py](https://github.com/RoboticsClubIITJ/ML-DL-implementation/blob/master/MLlib/loss_func.py#L399) | | | | | Mahalanobis Distance | [distance_metrics.py]()| +| | | | | | | Jaccard Distance | [distance_metrics.py]() | +| | | | | | | Matching Distance | [distance_metrics.py]() | +| | | | | | | Dice Distance | [distance_metrics.py]() | +| | | | | | | Kulsinki Distance | [distance_metrics.py]() | +| | | | | | | Rogers-Tanimoto Distance | [distance_metrics.py]() | +| | | | | | | Russel-Rao Distance | [distance_metrics.py]() | +| | | | | | | Sokal-Michener Distance | [distance_metrics.py]() |