crawlab

package module
v0.0.0-...-b6ee4ce Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: May 24, 2023 License: BSD-3-Clause Imports: 2 Imported by: 0

README

Crawlab Go SDK

Crawlab Go SDK supports Golang-based spiders integration with Crawlab. It contains a number of APIs including saving crawled items into different data sources including MongoDB, MySQL, Postgres, ElasticSearch and Kafka.

Basic Usage

package main

import (
	"github.com/xulei324/spiderlab-go-sdk"
	"github.com/xulei324/spiderlab-go-sdk/entity"
)

func main() {
    item := entity.Item{}
    item["url"] = "http://example.com"
    item["title"] = "hello world"
    _ = crawlab.SaveItem(item)
}

Example Using Colly

package main

import (
	"fmt"
	"github.com/apex/log"
	"github.com/xulei324/spiderlab-go-sdk"
	"github.com/xulei324/spiderlab-go-sdk/entity"
	"github.com/gocolly/colly/v2"
	"runtime/debug"
)

func main() {
	startUrl := "https://www.baidu.com/s?wd=crawlab"

	c := colly.NewCollector(
		colly.AllowedDomains("www.baidu.com"),
		colly.Async(true),
		colly.UserAgent("Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/84.0.4147.105 Safari/537.36"),
	)

	c.OnHTML("#content_left > .c-container", func(e *colly.HTMLElement) {
		item := entity.Item{}
		item["title"] = e.ChildText("h3.t > a")
		item["url"] = e.ChildAttr("h3.t > a", "href")
		if err := crawlab.SaveItem(item); err != nil {
			log.Errorf("save item error: " + err.Error())
			debug.PrintStack()
			return
		}
	})

	c.OnRequest(func(r *colly.Request) {
		log.Debugf(fmt.Sprintf("Visiting %s", r.URL.String()))
	})

	if err := c.Visit(startUrl); err != nil {
		log.Errorf("visit error: " + err.Error())
		debug.PrintStack()
		panic(fmt.Sprintf("Unable to visit %s", startUrl))
	}

	c.Wait()
}

Documentation

Index

Constants

This section is empty.

Variables

This section is empty.

Functions

func SaveItem

func SaveItem(item entity.Item) (err error)

Types

This section is empty.

Directories

Path Synopsis

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL