# -*- coding: utf-8 -*- """ Created on Wed Oct 4 08:14:52 2017 @author: Miguel Almeida """ import numpy as np import matplotlib.pyplot as plt from random import shuffle import matplotlib.patches as mpatches from sklearn.cross_validation import train_test_split from sklearn.cross_validation import KFold from sklearn.cross_validation import StratifiedKFold from sklearn.linear_model import LogisticRegression def load_data(file_name): """Return matrix with orbital radius and period""" matrix = np.loadtxt(file_name, delimiter=',') return matrix def poly_16features(X): """Expand data polynomially """ X_exp = np.zeros((X.shape[0],X.shape[1]+14)) X_exp[:,:2] = X X_exp[:,2] = X[:,0]*X[:,1] X_exp[:,3] = X[:,0]**2 X_exp[:,4] = X[:,1]**2 X_exp[:,5] = X[:,0]**3 X_exp[:,6] = X[:,1]**3 X_exp[:,7] = X[:,0]**2*X[:,1] X_exp[:,8] = X[:,1]**2*X[:,0] X_exp[:,9] = X[:,0]**4 X_exp[:,10] = X[:,1]**4 X_exp[:,11] = X[:,0]**3*X[:,1] X_exp[:,12] = X[:,1]**3*X[:,0] X_exp[:,13] = X[:,0]**2*X[:,0]**2 X_exp[:,14] = X[:,0]**5 X_exp[:,15] = X[:,1]**5 return X_exp def calc_fold(feats, X,Y, train_ix,valid_ix,C=1e12): """return error for train and validation sets""" reg = LogisticRegression(C=C, tol=1e-10) reg.fit(X[train_ix,:feats],Y[train_ix]) prob = reg.predict_proba(X[:,:feats])[:,1] squares = (prob-Y)**2 return np.mean(squares[train_ix]),np.mean(squares[valid_ix]) matrix = load_data("data.txt") #shuffle np.random.shuffle(matrix) #normalizar os dados means = np.mean(matrix[:,1:], axis=0) devs = np.std(matrix[:,1:], axis=0) matrix[:,1:] = (matrix[:,1:]-means)/devs #split data do train features = matrix[:,1:] classes = matrix[:,0] #expandir matrix features = poly_16features(features) feature_tr , feature_t, classe_tr, classe_t = train_test_split (features, classes, test_size = 0.33, stratify = classes) folds = 5 stratKf = StratifiedKFold(classe_tr, folds) errorTrain = [] errorValidation = [] for features in range(2,16): tr_err = va_err = 0 for tr_ix, val_ix in stratKf: r, v = calc_fold(features, feature_tr, classe_tr, tr_ix, val_ix) tr_err += r va_err += v errorTrain.append(tr_err/folds) errorValidation.append(va_err/folds) print(features, ':', tr_err/folds, va_err/folds) fig = plt.plot( errorTrain, color="red", linewidth=1.0) red_patch = mpatches.Patch(color='red', label='Training Error') fig = plt.plot( errorValidation, color="black", linewidth=1.0) black_patch = mpatches.Patch(color='black', label='Validation Error') plt.legend(handles=[red_patch, black_patch]) plt.title('Train and Validation Error', fontsize=18) plt.xlabel('', fontsize=18) plt.ylabel('Error Value', fontsize=18)