# -*- coding: utf-8 -*- """ Created on Wed Sep 27 08:15:42 2017 @author: Miguel Almeida """ import numpy as np import matplotlib.pyplot as plt from random import shuffle def load_data(file_name): """Return matrix with orbital radius and period""" matrix = np.loadtxt(file_name, skiprows=1, delimiter='\t') return matrix def mean_square_error(train, test, coef): test_pred = np.polyval(coefs, test[:,0]) test_Error = np.mean((test[:,1]-test_pred)**2) train_pred = np.polyval(coefs, train[:,0]) train_Error = np.mean((train[:,1]-train_pred)**2) return train_Error, test_Error matrix = load_data("bluegills.txt") #shuffle ranks = np.arange(matrix.shape[0]) shuffle(ranks) matrix = matrix[ranks,:] #split data: train(50%) , test(25%) and validate(25%) half = int(matrix.shape[0]*0.5) quarter = int(matrix.shape[0]*0.75) train = matrix[:half,:] valid = matrix[half:quarter,:] test = matrix[quarter:,:] plt.figure(figsize=(13,8), frameon = False) pxs = np.linspace(min(matrix[:,0]), max(matrix[:,0]),500) best_Error = 10000 legends = [] for degree in range(1,6): coefs = np.polyfit(train[:,0], train[:,1], degree) train_Error, valid_Error = mean_square_error(train, valid, coefs) poly = np.polyval(coefs,pxs) legends.append("{}/{:3.4f}/{:3.4f}".format(degree, train_Error, valid_Error)) plt.plot(pxs,poly) if valid_Error < best_Error: best_error = valid_Error best_coef = np.copy(coefs) best_degree = degree train_Error, test_Error = mean_square_error(train, valid, best_coef) print(best_degree, test_Error) plt.legend(legends, bbox_to_anchor = (1,0.5)) plt.plot(train[:,0], train[:,1],'ob') plt.plot(valid[:,0], valid[:,1],'og') plt.plot(test[:,0], test[:,1],'or') plt.title('Blue gill size') plt.savefig('chast.png', dfi=300)