Versions in this module Expand all Collapse all v0 v0.9.1 Feb 29, 2020 v0.9.0 Feb 20, 2020 Changes in this version + func GetPodRequestsFromPodTemplate(template *corev1.PodTemplateSpec) *cluster.PodRequest + func GetPodRequestsFromTFJobReplica(replica *common.ReplicaSpec) *cluster.PodRequest + func InitClientSets(itk kubeclientset.Interface, itt tfjobclientset.Interface, ...) + func NewWorkerID(n int) string + func ScheduleJob(requestsGroups *[]*cluster.PodRequests, constNodeRes cluster.NodeResources) (okNum []int, placementPlansPtr *[]*JobPlacementPlan) + func SchedulingAlgorithm(waitingQueue *JobQueue, runningQueue *JobQueue, ...) + func SortNodeFromJob(job *TrainingJob) (sortedNodes []string) + func SortNodeFromNodeRes(nodes cluster.NodeResources, maxNum string) (sortedNodes []string) + type JobPlacementPlan map[string]*NodeResPlacePlan + func (this *JobPlacementPlan) Count() (sum int) + func (this *JobPlacementPlan) DeepCopy() *JobPlacementPlan + func (this *JobPlacementPlan) PrintMe() + type JobQueue []*TrainingJob + func (this *JobQueue) Add(job *TrainingJob) + func (this *JobQueue) PrintMe(whoami string) + func (this *JobQueue) Remove(job *TrainingJob) error + type JobsPlacementPlan map[*TrainingJob]*JobPlacementPlan + func ScaleDown(highPriorityJob *cluster.PodRequests, runningQueue JobQueue, ...) (can bool, scaleDownTarget JobsPlacementPlan, ...) + func ScaleUp(runningQueue JobQueue, constNodeRes cluster.NodeResources) (can bool, scaleUpTarget JobsPlacementPlan) + func (this *JobsPlacementPlan) DeepCopy() *JobsPlacementPlan + func (this *JobsPlacementPlan) PrintMe() + type NodeResPlacePlan map[string]*WorkerResources + func (this *NodeResPlacePlan) DeepCopy() *NodeResPlacePlan + func (this *NodeResPlacePlan) PrintMe(prefix string) + type TrainingJob struct + ReplicaRequest map[tfv1.TFReplicaType]*cluster.PodRequest + ReplicasPlacementPlan map[tfv1.TFReplicaType]*JobPlacementPlan + func NewTrainingJob(tfjob *tfv1.TFJob) *TrainingJob + func (this *TrainingJob) GetMinInstanceWorkerPodRequests() *cluster.PodRequests + func (this *TrainingJob) GetPodRequests(rt tfv1.TFReplicaType) *cluster.PodRequests + func (this *TrainingJob) UpdateTFJobTime() error + type WorkerResources struct + Critical bool + Workers map[string]string + func (this *WorkerResources) DeepCopy() *WorkerResources