Documentation ¶
Overview ¶
Example ¶
package main import ( "fmt" "math" "math/rand" "github.com/MaxHalford/eaopt" "github.com/davidkleiven/gogafit/gafit" "gonum.org/v1/gonum/mat" ) // Create a fictitious dataset func sampleData() gafit.Dataset { data := gafit.Dataset{ X: mat.NewDense(20, 5, nil), Y: mat.NewVecDense(20, nil), ColNames: []string{"const", "x", "x^2", "x^3", "x^4"}, } for i := 0; i < 20; i++ { x := 0.1 * float64(i) for j := 0; j < 5; j++ { data.X.Set(i, j, math.Pow(x, float64(j))) } data.Y.SetVec(i, 5.0-2.0*x*x*x) } return data } func main() { // Set a seed such that the run is deterministic rand.Seed(4) // Initialize GA with default configuration var ga, err = eaopt.NewDefaultGAConfig().NewGA() if err != nil { fmt.Println(err) return } // Set the number of generations to run for ga.NGenerations = 100 // Add a custom print function to track progress ga.Callback = func(ga *eaopt.GA) { // Optionally print progress information (commented out for this example) // fmt.Printf("Best fitness at generation %d: %f\n", ga.Generations, ga.HallOfFame[0].Fitness) } // Initialize a dataset data := sampleData() // Initialize the linear model factory factory := gafit.LinearModelFactory{ Config: gafit.LinearModelConfig{ Data: data, // We use AICC as a measure of the quality of the model Cost: gafit.Aicc, }, } // Find the minimum err = ga.Minimize(factory.Generate) if err != nil { fmt.Println(err) return } // Print the selected features best := ga.HallOfFame[0].Genome.(*gafit.LinearModel) // Run local optimization on the best genome res := best.Optimize() fmt.Printf("%v\n", data.IncludedFeatures(res.Include)) }
Output: [const x^3]
Index ¶
- Constants
- func Aic(X *mat.Dense, y *mat.VecDense, coeff *mat.VecDense, names []string) float64
- func Aicc(X *mat.Dense, y *mat.VecDense, coeff *mat.VecDense, names []string) float64
- func AllEqualInt(s1 []int, s2 []int) bool
- func Bic(X *mat.Dense, y *mat.VecDense, coeff *mat.VecDense, names []string) float64
- func CovMatrix(X *mat.Dense, rss float64) (*mat.SymDense, error)
- func DemoCostFuncPython(pyExec string) (string, error)
- func Fit(X *mat.Dense, y *mat.VecDense) *mat.VecDense
- func FitSVD(X *mat.Dense, y *mat.VecDense) *mat.VecDense
- func GAProgressLogger(ga *eaopt.GA)
- func GeneralizedCV(rmse float64, X *mat.Dense) float64
- func HatMatrix(X *mat.Dense) *mat.Dense
- func LogLikelihood(X *mat.Dense, y *mat.VecDense, coeff *mat.VecDense) float64
- func Pred(X *mat.Dense, coeff *mat.VecDense) *mat.VecDense
- func Rmse(X *mat.Dense, y *mat.VecDense, coeff *mat.VecDense) float64
- func Rss(X *mat.Dense, y *mat.VecDense, coeff *mat.VecDense) float64
- func SaveModel(fname string, model Model) error
- func SavePredictions(fname string, pred []Prediction) error
- func Write(fname string, X *mat.Dense, y *mat.VecDense, featNames []string, ...) error
- func WriteFile(f *os.File, X *mat.Dense, y *mat.VecDense, featNames []string, ...) error
- type CaptureFunction
- type CaptureResult
- type CostFunction
- type CostFunctionHook
- type Dataset
- func (data Dataset) Columns(pattern string) []int
- func (data Dataset) Copy() Dataset
- func (data Dataset) Dot(coeff map[string]float64) *mat.VecDense
- func (data Dataset) IncludedFeatures(indicator []int) []string
- func (data Dataset) IsEqual(other Dataset) bool
- func (data Dataset) NumData() int
- func (data Dataset) NumFeatures() int
- func (data Dataset) Submatrix(names []string) *mat.Dense
- type DemoCostFunc
- type EBic
- type FittedModel
- type GABackupCB
- type Hook
- type LinearModel
- func (l *LinearModel) Clone() eaopt.Genome
- func (l *LinearModel) Crossover(other eaopt.Genome, rng *rand.Rand)
- func (l *LinearModel) Evaluate() (float64, error)
- func (l *LinearModel) GetCoeff() *mat.VecDense
- func (l *LinearModel) IncludedCols() []int
- func (l *LinearModel) IsEmpty() bool
- func (l *LinearModel) IsEqual(other LinearModel) bool
- func (l *LinearModel) Mutate(rng *rand.Rand)
- func (l *LinearModel) MutationRate() float64
- func (l *LinearModel) NumIncluded() int
- func (l *LinearModel) NumSplits() uint
- func (l *LinearModel) Optimize() OptimizeResult
- type LinearModelConfig
- type LinearModelFactory
- type Model
- type ModelIterator
- type OptimizeResult
- type Prediction
- type PredictionErrorFIC
- type Score
- type SubMatrix
Examples ¶
Constants ¶
const CostFunctionIdentifier = "GOGAFIT_COST:"
CostFunctionIdentifier is a pattarn that is search for in the output of the hook the floating point number that follows is extracted
Variables ¶
This section is empty.
Functions ¶
func AllEqualInt ¶
AllEqualInt check if all elements in s1 equals s2
func DemoCostFuncPython ¶
DemoCostFuncPython generates a demo script for python
func GAProgressLogger ¶
GAProgressLogger can be used as a callback to the GA algorithm. It logs the progress of the method
func GeneralizedCV ¶
GeneralizedCV returns the generalized CV, given by rmse/(1 - Tr(H)/N), where H is the HatMatrix and N is the number of datapoints
func HatMatrix ¶
HatMatrix returns the matrix that maps training data onto predictions. y = Hy', where y' are training points. In case of linear regression, y = Xc, where c is a coefficient vector that is given by c = (X^TX)^{-1}X^Ty', the hat matrix H = X(X^TX)^{-1}X^T. Internally, H is calculated by using the QR decomposition of R
func LogLikelihood ¶
LogLikelihood returns the logarithm of the likelihood function, assuming normal distributed variable
func SavePredictions ¶
func SavePredictions(fname string, pred []Prediction) error
SavePredictions stores the predictions in a file
Types ¶
type CaptureFunction ¶
type CaptureFunction func(out string) (CaptureResult, error)
CaptureFunction is a type used to capture results from a string
type CaptureResult ¶
type CaptureResult struct { Floats map[string]float64 Ints map[string]int Strings map[string]string }
CaptureResult is a type used to represent results captured from
func NewCaptureResult ¶
func NewCaptureResult() CaptureResult
NewCaptureResult returns a new initialized instance of CaptureResult
func (CaptureResult) GetFloat ¶
func (cr CaptureResult) GetFloat(name string) float64
GetFloat returns captured float values
func (CaptureResult) GetInt ¶
func (cr CaptureResult) GetInt(name string) int
GetInt returns captured int values
func (CaptureResult) GetString ¶
func (cr CaptureResult) GetString(name string) string
GetString return captured string values
type CostFunction ¶
CostFunction is a type used to represent cost functions for fitting
type CostFunctionHook ¶
CostFunctionHook is a type used to represent external cost functions
func NewCostFunctionHook ¶
func NewCostFunctionHook(script string) CostFunctionHook
NewCostFunctionHook returns a new instance of a cost function
func (CostFunctionHook) Cleanup ¶
func (cfh CostFunctionHook) Cleanup()
Cleanup erases temporary file created by the application
type Dataset ¶
type Dataset struct { X *mat.Dense Y *mat.VecDense // ColNames gives the name of the "feature" stored in each column of X ColNames []string TargetName string }
Dataset is a type that represents a linear model
func AddPoly ¶
AddPoly return a new dataset where polynomial versions of the passed columns are inserted
func ReadFile ¶
ReadFile creates a dataset from the passed file, If targetName is an empty string, the entire file will be added to the X matrix. If targetName is not empty string and is not found in the header, the function will return with an error
func (Dataset) Columns ¶
Columns return the column numbers of all features where <pattern> is part of the name
func (Dataset) Dot ¶
Dot perform dot product between X and a sparse coefficient vector given as a map of strings, where the key is a column name
func (Dataset) IncludedFeatures ¶
IncludedFeatures returns the features being included according to the passed indicator. 1: feature is included, 0: feature is not included
func (Dataset) NumFeatures ¶
NumFeatures return the number of features
type DemoCostFunc ¶
DemoCostFunc is a type holds parameters for demo scripts
type EBic ¶
EBic is a type used to calculate the extended BIC criterion. An implicit underlying assumption for BIC is that the prior distribution is constant for all models. This may not be feasible when the number of features are large. EBIC tries to penalize large models higher than BIC, by setting the prior distribution inversely proportional to the total number of models with a given size. If we have N features, and k featurea are selected then the prior p(s) is proportional to tau^{gamma}, where tau is the total number of models with that size (e.g. tau = N!/(k!(N-k)!)) and 0 <= gamma <= 1 is a tuning constnat. If gamma is zero, then EBIC is equal to BIC
func NewDefaultEBic ¶
NewDefaultEBic returns a new Ebic function
type FittedModel ¶
type FittedModel struct { Rows int Cols int X []float64 Y []float64 Coeff []float64 Names []string }
FittedModel is a type that holds the design matrix, the target values and the coefficients
type GABackupCB ¶
GABackupCB is a default type used to construct a default backup function
func (*GABackupCB) Build ¶
func (gab *GABackupCB) Build() func(ga *eaopt.GA)
Build constructs the callback function
type Hook ¶
type Hook struct { Script string Capture CaptureFunction }
Hook is a type that runs the script and capture results from the output using the Capture function
type LinearModel ¶
type LinearModel struct { Config LinearModelConfig Include []int }
LinearModel represent a genome
func (*LinearModel) Crossover ¶
func (l *LinearModel) Crossover(other eaopt.Genome, rng *rand.Rand)
Crossover performs a cross over
func (*LinearModel) Evaluate ¶
func (l *LinearModel) Evaluate() (float64, error)
Evaluate evaluates the fitness
func (*LinearModel) GetCoeff ¶
func (l *LinearModel) GetCoeff() *mat.VecDense
GetCoeff return the coefficients corresponding to the current selection
func (*LinearModel) IncludedCols ¶
func (l *LinearModel) IncludedCols() []int
IncludedCols return the index of the columns that are included according to the 1/0 values in inclue (1: included, 0: excluded)
func (*LinearModel) IsEmpty ¶
func (l *LinearModel) IsEmpty() bool
IsEmpty returns true if the model contains no features
func (*LinearModel) IsEqual ¶
func (l *LinearModel) IsEqual(other LinearModel) bool
IsEqual returns true of the two models are equal
func (*LinearModel) Mutate ¶
func (l *LinearModel) Mutate(rng *rand.Rand)
Mutate introduces mutations
func (*LinearModel) MutationRate ¶
func (l *LinearModel) MutationRate() float64
MutationRate returns the mutation rate. If not specified in Config (e.g. 0.0), a default value of 0.5 is used
func (*LinearModel) NumIncluded ¶
func (l *LinearModel) NumIncluded() int
NumIncluded returns the number of included columns
func (*LinearModel) NumSplits ¶
func (l *LinearModel) NumSplits() uint
NumSplits returns the number of splits used in cross over. If not, set 2 is used as default
func (*LinearModel) Optimize ¶
func (l *LinearModel) Optimize() OptimizeResult
Optimize flips all inclusions in. After a call to this function, the included features are affected and set to the best genome
type LinearModelConfig ¶
type LinearModelConfig struct { Data Dataset Cost CostFunction MutationRate float64 NumSplits uint // MaxFeatToDataRatio specifies the maximum value of #feat/#data. If not given, // a default value of 0.5 is used MaxFeatToDataRatio float64 }
LinearModelConfig contains static configuration for a linear model It contains meta-information needed to fully define a LinearModel
func (*LinearModelConfig) GetCostFunction ¶
func (lmc *LinearModelConfig) GetCostFunction() CostFunction
GetCostFunction returns the cost function. If not given, AICC is used as default
func (LinearModelConfig) IsEqual ¶
func (lmc LinearModelConfig) IsEqual(other LinearModelConfig) bool
IsEqual if other is equal to lmc, return true. Otherwise, return false.
func (LinearModelConfig) LargestModel ¶
func (lmc LinearModelConfig) LargestModel() int
LargestModel returns the largest model consistent with the feature to data ratio
type LinearModelFactory ¶
type LinearModelFactory struct { Config LinearModelConfig // Probability of initialition each features. If not, set default value of 0.5 // is used. Example: a value of 0.2 will lead to 20% of all features being included // in the initial pool Prob float64 }
LinearModelFactory produces random models
type Model ¶
Model is convenience type used to store information about a model
type ModelIterator ¶
ModelIterator iterates through all models by sequentually flipping bits
func (*ModelIterator) UndoLastFlip ¶
func (m *ModelIterator) UndoLastFlip()
UndoLastFlip undo the prvious flip
type OptimizeResult ¶
OptimizeResult is returned by local optimization of the linear model
func OrthogonalMatchingPursuit ¶
func OrthogonalMatchingPursuit(dataset Dataset, cost CostFunction, maxFeatures int) OptimizeResult
OrthogonalMatchingPursuit optimizes the cost function by selecting the model that leads to the largest decrease in the cost function
func (*OptimizeResult) IsEqual ¶
func (or *OptimizeResult) IsEqual(other OptimizeResult) bool
IsEqual returns ture if the two optimize results are equal
type Prediction ¶
Prediction is a type that represent a prediction (the expected valud and the standard deviation)
func GetPredictions ¶
func GetPredictions(data Dataset, model Model, predData *Dataset) []Prediction
GetPredictions together with the standard deviations for all data in predData. If predData is nil, data will be used (e.g. in sample prediction errors)
func ReadPredictions ¶
func ReadPredictions(fname string) ([]Prediction, error)
ReadPredictions reads the predictions from a csv file (same as stored by SavePredictions)
func (Prediction) IsEqual ¶
func (p Prediction) IsEqual(other Prediction) bool
IsEqual returns true of the two predictions are equal
type PredictionErrorFIC ¶
type PredictionErrorFIC struct {
Data []int
}
PredictionErrorFIC tries to select the model that has the highest precision for a subset of the data