Documentation ¶
Overview ¶
Package cluster 文本聚类
Index ¶
- Constants
- func AverageDistance(o Observation, observations Observations) float64
- type Cluster
- type ClusterEngine
- type Clusters
- type Coordinates
- type CounterFeaturesGetter
- type Dbscan
- type Feature
- type FeaturesGetter
- type Kmeans
- type Observation
- type Observations
- type TfIdfFeaturesGetter
- type Tokenizer
Constants ¶
const ( NOISE = false CLUSTERED = true )
Variables ¶
This section is empty.
Functions ¶
func AverageDistance ¶
func AverageDistance(o Observation, observations Observations) float64
AverageDistance returns the average distance between o and all observations
Types ¶
type Cluster ¶
type Cluster struct { Center Coordinates Observations Observations }
func (*Cluster) Append ¶
func (c *Cluster) Append(point Observation)
Append adds an observation to the Cluster
func (Cluster) PointsInDimension ¶
func (c Cluster) PointsInDimension(n int) Coordinates
PointsInDimension returns all coordinates in a given dimension
type ClusterEngine ¶
type ClusterEngine interface {
Clusterize(Observations) (Clusters, error)
}
ClusterEngine 聚类引擎接口
type Clusters ¶
type Clusters []Cluster
Clusters is a slice of clusters
func Clusterize ¶
func Clusterize(corpus []string, engine ClusterEngine, featureGetter FeaturesGetter) (Clusters, error)
Clusterize 文本聚类
func RandClusters ¶
func RandClusters(k int, dataset Observations) (Clusters, error)
RandClusters sets up a new set of clusters and randomly seeds their initial positions
func (Clusters) CentersInDimension ¶
func (c Clusters) CentersInDimension(n int) Coordinates
CentersInDimension returns all cluster centroids' coordinates in a given dimension
func (Clusters) Nearest ¶
func (c Clusters) Nearest(point Observation) int
Nearest returns the index of the cluster nearest to point
type Coordinates ¶
type Coordinates []float64
Coordinates is a slice of float64
func (Coordinates) Coordinates ¶
func (c Coordinates) Coordinates() Coordinates
Coordinates implements the Observation interface for a plain set of float64 coordinates
func (Coordinates) Distance ¶
func (c Coordinates) Distance(p2 Coordinates) float64
Distance returns the euclidean distance between two coordinates
type CounterFeaturesGetter ¶
type CounterFeaturesGetter struct {
// contains filtered or unexported fields
}
CounterFeaturesGetter 词频特征
func NewCounterFeaturesGetter ¶
func NewCounterFeaturesGetter(tokenizer Tokenizer) *CounterFeaturesGetter
func (*CounterFeaturesGetter) Features ¶
func (g *CounterFeaturesGetter) Features(corpus []string) (Observations, []string)
Features implement FeaturesGetter
type Dbscan ¶
type Dbscan struct {
// contains filtered or unexported fields
}
Dbscan DBSCAN 算法实现
func (*Dbscan) Clusterize ¶
func (d *Dbscan) Clusterize(objects Observations) (Clusters, error)
Clusterize 输入数据,完成聚类
type Feature ¶
type Feature struct { ID string Values Coordinates }
Feature 特征值
func (Feature) Coordinates ¶
func (f Feature) Coordinates() Coordinates
func (Feature) Distance ¶
func (f Feature) Distance(p2 Coordinates) float64
type FeaturesGetter ¶
type FeaturesGetter interface {
Features(corpus []string) (Observations, []string)
}
FeaturesGetter 获取features接口
type Kmeans ¶
type Kmeans struct {
// contains filtered or unexported fields
}
Kmeans configuration/option struct
func NewKmeans ¶
func NewKmeans() *Kmeans
New returns a Kmeans configuration struct with default settings
func (*Kmeans) Clusterize ¶
func (m *Kmeans) Clusterize(dataset Observations) (Clusters, error)
Clusterize executes the k-means algorithm on the given dataset and partitions it into k clusters
func (*Kmeans) SetDeltaThreshold ¶
func (*Kmeans) SetMaxIter ¶
type Observation ¶
type Observation interface { Coordinates() Coordinates Distance(p2 Coordinates) float64 GetID() string }
type Observations ¶
type Observations []Observation
func (Observations) Center ¶
func (c Observations) Center() (Coordinates, error)
Center returns the center coordinates of a set of Cluster
type TfIdfFeaturesGetter ¶
type TfIdfFeaturesGetter struct {
// contains filtered or unexported fields
}
TfIdfFeaturesGetter TF-IDF特征
func NewTfIdfFeaturesGetter ¶
func NewTfIdfFeaturesGetter(tokenizer Tokenizer) *TfIdfFeaturesGetter
func (*TfIdfFeaturesGetter) Features ¶
func (g *TfIdfFeaturesGetter) Features(corpus []string) (Observations, []string)
Features implement FeaturesGetter