types

package
v0.1.1-0...-c572a67 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: May 22, 2018 License: Apache-2.0 Imports: 2 Imported by: 1

Documentation

Index

Constants

This section is empty.

Variables

View Source
var (
	ErrEmptyCrawlerName       = errors.New("types/types.go empty crawler_name of crawler conf")
	ErrUnSupportedCrawlerType = errors.New("types/types.go unsupported crawler_type of crawler conf")
	ErrEmptyStartUrls         = errors.New("types/types.go empty start_urls of crawler conf")
	ErrEmptyUrlsFile          = errors.New("types/types.go empty urls_file of crawler conf")
	ErrNoStartRule            = errors.New("types/types.go empty start task conf rule of crawler conf")
)

Functions

This section is empty.

Types

type CrawlerConf

type CrawlerConf struct {
	CrawlerType     string               `json:"crawler_type" bson:"crawler_type"`
	CrawlerName     string               `json:"crawler_name" bson:"crawler_name"`
	CrawlerDesp     string               `json:"crawler_desp" bson:"crawler_desp"`
	StartUrls       []string             `json:"start_urls" bson:"start_urls"`
	UrlsFile        string               `json:"urls_file" bson:"urls_file"`
	ParseConfs      map[string]ParseConf `json:"parse_confs" bson:"parse_confs"`
	StartParserName string               `json:"start_parser_name" bson:"start_parser_name"`
	EsUri           string               `json:"es_uri" bson:"es_uri"`
}

func (*CrawlerConf) Id

func (self *CrawlerConf) Id() string

func (*CrawlerConf) IsValid

func (conf *CrawlerConf) IsValid() (bool, error)

func (*CrawlerConf) Type

func (self *CrawlerConf) Type() string

type CrawlerItem

type CrawlerItem struct {
	CrawlerName string      `json:"crawler_name" bson:"crawler_name"`
	Conf        CrawlerConf `json:"conf" bson:"conf"`
	Weight      int         `json:"weight" bson:"weight"`
	Status      string      `json:"status" bson:"status"`
	CreateTime  int64       `json:"create_time" bson:"create_time"`
	ModifyTime  int64       `json:"modify_time" bson:"modify_time"`
	Author      string      `json:"author" bson:"author"`
}

type ParseConf

type ParseConf struct {
	ParserType      string                 `json:"parser_type" bson:"parser_type"`
	ParserName      string                 `json:"parser_name" bson:"parser_name"`
	NoDefaultFields bool                   `json:"no_default_fields" bson:"no_default_fields"`
	ExampleUrl      string                 `json:"example_url" bson:"example_url"`
	Rules           map[string][]ParseRule `json:"rules" bson:"rules"` // RuleName to ParseRules
	PostProcessor   string                 `json:"post_processor" bson:"post_processor"`
	RevisitInterval int64                  `json:"revisit_interval" bson:"revisit_interval"`
}

func (*ParseConf) String

func (this *ParseConf) String() string

type ParseRule

type ParseRule struct {
	// four RuleTypes: url, dom, string, html
	RuleType string `json:"rule_type" bson:"rule_type"`

	// when RuleType is dom, ItemKey stores the next RuleName
	ItemKey string `json:"item_key" bson:"item_key"`
	// IsSeedUrl indicates whether the generated item is a seed or not
	IsSeedUrl bool   `json:"is_seed_url" bson:"is_seed_url"`
	Xpath     string `json:"xpath" bson:"xpath"`
	Regex     string `json:"regex" bson:"regex"`
	Js        string `json:"js" bson:"js"`
}

type StoreItem

type StoreItem interface {
	Id() string
}

type Task

type Task struct {
	CrawlerName     string `json:"crawler_name" bson:"crawler_name"`
	ParserName      string `json:"parser_name" bson:"parser_name"`
	IsSeedUrl       bool   `json:"is_seed_url" bson:"is_seed_url"`
	Url             string `json:"url" bson:"url"`
	Data            string `json:"data" bson:"data"`
	LastAccessTime  int64  `json:"last_access_time" bson:"last_access_time"`
	RevisitInterval int64  `json:"revisit_interval" bson:"revisit_interval"`
}

func (*Task) Id

func (self *Task) Id() string

func (*Task) String

func (this *Task) String() string

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL