sexta-feira, 2 de setembro de 2022

PCA/LDA - convex hulls

 

#PCA

import pandas as pd

import numpy as np

from sklearn import datasets

from scipy.spatial import ConvexHull, convex_hull_plot_2d

import matplotlib.pyplot as plt

from sklearn.preprocessing import StandardScaler


#from sklearn.decomposition import PCA

#LDA

#pip install lda

from sklearn.discriminant_analysis import LinearDiscriminantAnalysis as LDA


#inicio

scaler = StandardScaler()

##

class LDAs:

    def __init__(self, n_components):

        self.n_components = n_components

        self.linear_discriminants = None

    def fit(self, x, y):

        n_features = x.shape[1]

        class_labels = np.unique(y)

        mean_overall = np.mean(x, axis=0)

        S_W = np.zeros((n_features,n_features))

        S_B = np.zeros((n_features,n_features))

        for c in class_labels:

            X_c = x[y==c]

            mean_c = np.mean(X_c, axis=0)

            S_W+= (X_c - mean_c).T.dot(X_c-mean_c)

            n_c = X_c.shape[0]

            mean_diff = (mean_c - mean_overall).reshape(n_features,1)

            S_B += n_c*(mean_diff).dot(mean_diff.T)

        A = np.linalg.inv(S_W).dot(S_B)

        eigenvalues, eigenvectors = np.linalg.eig(A)

        eigenvectors = eigenvectors.T

        idxs= np.argsort(abs(eigenvalues))[::-1]

        eigenvalues = eigenvalues[idxs]

        eigenvectors = eigenvectors[idxs]

        self.linear_discriminants = eigenvectors[0:self.n_components]

        print(self.linear_discriminants)

        

    def transform (self, x):

        return np.dot(x, self.linear_discriminants.T)

#Abrindo CSV

#data = datasets.load_iris() #iris

df= pd.read_csv('EBIs.csv', names=['target','CPOD','CED','CPD','CEA','CBDIA','CESC','CEIC ','CAC','CBD','CC','DO','DEN','CF','DI','CRM','ASO','COP','LD','AD','DLNB','CEPeitoral','CT','CA','CENA','CPC','Lcie','LSC','CEPelvica','APC'])

features = ['CPOD','CED','CPD','CEA','CBDIA','CESC','CEIC ','CAC','CBD','CC','DO','DEN','CF','DI','CRM','ASO','COP','LD','AD','DLNB','CEPeitoral','CT','CA','CENA','CPC','Lcie','LSC','CEPelvica','APC']



#X=data.data #iris

#Y=data.target #iris

x = df.loc[:, features].values #mesmo que x do iris

y = df.values[:,0] #values #esse são todos os nomes repetidas vezes([])

target = set(y) #mesmo que target {} ta diferente


#pca = PCA(n_components=2) #dois eixos

lda= LDAs(2) #(dois exios)

#principalComponents = pca.fit_transform(x)


#trava aqui

#teste

#xs = x*100

#xs=xs.astype('int')



lda.fit (x,y) 

X_projected = lda.transform(x) 



#hull = ConvexHull(principalComponents)#PCA

hull = ConvexHull(X_projected)#LDA


colors = ['r','blue', 'orange', 'green', 'green','green','green','green','green', 'green']

lw=2




#hulls LDA#



for color, i  in zip(colors, [1., 2., 9., 4., 5., 6.,7.,8.,3.,10.]):#nos colchetes o nome deve estar igual ao 'nomes'

    plt.scatter(X_projected[y == i, 0], X_projected[y == i, 1], color=color)

    hull = ConvexHull(X_projected[y == i])

    for simplex in hull.simplices:

        plt.plot(X_projected[y == i][simplex, 0], X_projected[y==i][simplex, 1], 'k-')

        plt.xlabel('LD 1')

        plt.ylabel('LD 2')

resultado:





Arquivo csv (modelo)



Nenhum comentário:

Postar um comentário

Observação: somente um membro deste blog pode postar um comentário.