Documentation ¶
Index ¶
- Constants
- Variables
- func CloneHeader(h map[string][]string) map[string][]string
- func CopyM(h http.Header) http.Header
- func Log() *logging.Logger
- func MergeCookie(before []*http.Cookie, after []*http.Cookie) []*http.Cookie
- func NewClient() (*http.Client, error)
- func NewHeader(ua interface{}, host string, refer interface{}) map[string][]string
- func NewJar() *cookiejar.Jar
- func NewProxyClient(proxystring string) (*http.Client, error)
- func OutputMaps(info string, args map[string][]string)
- func RandomUa() string
- func SetGlobalTimeout(num int)
- func SetLogLevel(level string)
- func TooSortSizes(data []byte, sizes float64) error
- func UaInit()
- func Wait(waittime int)
- type Spider
- func (this *Spider) Get() (body []byte, e error)
- func (this *Spider) Go() (body []byte, e error)
- func (this *Spider) JsonToString() (string, error)
- func (this *Spider) NewHeader(ua interface{}, host string, refer interface{})
- func (this *Spider) Post() (body []byte, e error)
- func (this *Spider) PostFILE() (body []byte, e error)
- func (this *Spider) PostJSON() (body []byte, e error)
- func (this *Spider) PostXML() (body []byte, e error)
- func (this *Spider) ToString() string
- type SpiderConfig
- func (config *SpiderConfig) Clear() *SpiderConfig
- func (config *SpiderConfig) SetBData(data []byte) *SpiderConfig
- func (config *SpiderConfig) SetForm(form url.Values) *SpiderConfig
- func (config *SpiderConfig) SetFormParm(k, v string) *SpiderConfig
- func (config *SpiderConfig) SetHeader(header http.Header) *SpiderConfig
- func (config *SpiderConfig) SetHeaderParm(k, v string) *SpiderConfig
- func (config *SpiderConfig) SetHost(host string) *SpiderConfig
- func (config *SpiderConfig) SetMethod(method string) *SpiderConfig
- func (config *SpiderConfig) SetRefer(refer string) *SpiderConfig
- func (config *SpiderConfig) SetUa(ua string) *SpiderConfig
- func (config *SpiderConfig) SetUrl(url string) *SpiderConfig
- func (config *SpiderConfig) SetWaitTime(num int) *SpiderConfig
Constants ¶
View Source
const ( // 暂停时间 default wait time WaitTime = 5 // HTTP方法 POST = "POST" POSTJSON = "POSTJSON" POSTXML = "POSTXML" POSTFILE = "POSTFILE" PUT = "PUT" GET = "GET" )
Variables ¶
View Source
var ( // 浏览器头部 default header ua // 默认的 FoxfireLinux = "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:46.0) Gecko/20100101 Firefox/46.0" SpiderHeader = map[string][]string{ "User-Agent": { FoxfireLinux, }, } // http get and post No timeout // 不设置时没有超时时间 DefaultTimeOut = 0 )
View Source
var ( //default client to ask get or post // 默认的官方客户端,带cookie,方便使用,没有超时时间,不带cookie的客户端不提供 Client = &http.Client{ CheckRedirect: func(req *http.Request, via []*http.Request) error { Logger.Debugf("-----------Redirect:%v------------", req.URL) return nil }, Jar: NewJar(), } )
View Source
var LevelNames = []string{
"CRITICAL",
"ERROR",
"WARNING",
"NOTICE",
"INFO",
"DEBUG",
}
level name you can refer
View Source
var Logger = logging.MustGetLogger("GoSpider")
全局日志
Functions ¶
func CloneHeader ¶
clone a header 克隆头部,因为是引用
func CopyM ¶
Header map[string][]string ,can use to copy a http header, so that they are not effect each other
func MergeCookie ¶
merge Cookie,后来的覆盖前来的 暂时没有用的
func NewProxyClient ¶
a proxy client 带代理客户端,全部有带cookie
func TooSortSizes ¶
if a file size small than sizes(KB) ,it will be throw a error
Types ¶
type Spider ¶
type Spider struct { *SpiderConfig Preurl string // pre url 上一次访问的URL Raw []byte // 抓取到的二进制流 UrlStatuscode int // the last url response code,such as 404 响应状态码 Client *http.Client // 真正客户端 Fetchtimes int // url fetch number times 抓取次数 Errortimes int // error times 失败次数 Ipstring string // spider ip,just for user to record their proxyip 代理IP地址,没有代理默认localhost // contains filtered or unexported fields }
爬虫结构体
func (*Spider) JsonToString ¶
将抓到的数据变成字符串,但数据是编码的JSON
type SpiderConfig ¶
type SpiderConfig struct { Url string // now fetch url 这次要抓取的Url Method string // Get Post 请求方法 Header http.Header // 请求头部 Data url.Values // post form data 表单字段 BData []byte // binary data 文件上传二进制流 Wait int // sleep time 等待时间 }
func (*SpiderConfig) Clear ¶
func (config *SpiderConfig) Clear() *SpiderConfig
func (*SpiderConfig) SetBData ¶
func (config *SpiderConfig) SetBData(data []byte) *SpiderConfig
func (*SpiderConfig) SetForm ¶
func (config *SpiderConfig) SetForm(form url.Values) *SpiderConfig
func (*SpiderConfig) SetFormParm ¶
func (config *SpiderConfig) SetFormParm(k, v string) *SpiderConfig
func (*SpiderConfig) SetHeader ¶
func (config *SpiderConfig) SetHeader(header http.Header) *SpiderConfig
Java Bean链式结构
func (*SpiderConfig) SetHeaderParm ¶
func (config *SpiderConfig) SetHeaderParm(k, v string) *SpiderConfig
func (*SpiderConfig) SetHost ¶
func (config *SpiderConfig) SetHost(host string) *SpiderConfig
func (*SpiderConfig) SetMethod ¶
func (config *SpiderConfig) SetMethod(method string) *SpiderConfig
func (*SpiderConfig) SetRefer ¶
func (config *SpiderConfig) SetRefer(refer string) *SpiderConfig
func (*SpiderConfig) SetUa ¶
func (config *SpiderConfig) SetUa(ua string) *SpiderConfig
func (*SpiderConfig) SetUrl ¶
func (config *SpiderConfig) SetUrl(url string) *SpiderConfig
func (*SpiderConfig) SetWaitTime ¶
func (config *SpiderConfig) SetWaitTime(num int) *SpiderConfig
Click to show internal directories.
Click to hide internal directories.