pdfutils

package module
v0.0.5 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: May 10, 2022 License: AGPL-3.0 Imports: 18 Imported by: 0

Documentation

Index

Constants

View Source
const (
	Highlight   string = "highlight"
	Strike             = "strike"
	Underline          = "underline"
	Text               = "text"
	Rectangle          = "rectangle"
	Image              = "image"
	Unsupported        = "unsupported"
)

Variables

This section is empty.

Functions

func ApplyPageRotation

func ApplyPageRotation(page *model.PdfPage, rect []float64) []float64

func CheckForTesseract

func CheckForTesseract(path string) bool

func CondenseSpaces

func CondenseSpaces(str string) string

func CropImage

func CropImage(img *image.Image, crop image.Rectangle) (image.Image, error)

func GetAnnotationColor

func GetAnnotationColor(annotation *model.PdfAnnotation) string

func GetAnnotationColorCategory

func GetAnnotationColorCategory(annotation *model.PdfAnnotation) string

func GetAnnotationDate

func GetAnnotationDate(annot *model.PdfAnnotation) *time.Time

func GetAnnotationID

func GetAnnotationID(ids map[string]bool, pageIndex int, x float64, y float64, annotType string) string

func GetAnnotationRects

func GetAnnotationRects(page *model.PdfPage, annotation *model.PdfAnnotation) []r2.Rect

func GetAnnotationType

func GetAnnotationType(t interface{}) string

func GetBoundsFromAnnotMarks

func GetBoundsFromAnnotMarks(annotRect r2.Rect, markRects []r2.Rect) r2.Rect

func GetCoordinates

func GetCoordinates(annotation *model.PdfAnnotation) (float64, float64)

func GetFallbackText

func GetFallbackText(text string, annotRect r2.Rect, markRects []r2.Rect, marks []extractor.TextMark) string

func GetMarkRect

func GetMarkRect(mark extractor.TextMark) r2.Rect

func GetQuadPoint

func GetQuadPoint(annotation *model.PdfAnnotation) *core.PdfObjectArray

func GetTextByAnnotBounds

func GetTextByAnnotBounds(fitzDoc *fitz.Document, pageIndex int, page *model.PdfPage, bounds r2.Rect) (string, error)

func HandleImageOCR

func HandleImageOCR(
	page *model.PdfPage,
	ocrImg *image.Image,
	annotRect []float64,
	tessPath string,
	lang string,
	dataDir string,
) string

func IsWithinOverlapThresh

func IsWithinOverlapThresh(annot r2.Rect, mark r2.Rect) bool

func OCRImage

func OCRImage(img image.Image, tessPath, lang, dataDir string) (string, error)

func PDFObjToColorCategory

func PDFObjToColorCategory(c core.PdfObject) string

func PDFObjToHex

func PDFObjToHex(c core.PdfObject) string

func RemoveNul added in v0.0.5

func RemoveNul(str string) string

func ShouldUseFallback

func ShouldUseFallback(str string) bool

func ValidateLang

func ValidateLang(tessPath, code string) bool

func WriteImage

func WriteImage(img *image.Image, name string, format string, quality int) error

Types

type Annotation

type Annotation struct {
	AnnotatedText string  `json:"annotatedText,omitempty"`
	Color         string  `json:"color,omitempty"`
	ColorCategory string  `json:"colorCategory,omitempty"`
	Comment       string  `json:"comment,omitempty"`
	Date          string  `json:"date,omitempty"`
	ID            string  `json:"id"`
	ImagePath     string  `json:"imagePath,omitempty"`
	OCRText       string  `json:"ocrText,omitempty"`
	Page          int     `json:"page"`
	Type          string  `json:"type"`
	X             float64 `json:"x"`
	Y             float64 `json:"y"`
}

func HandleImageAnnot

func HandleImageAnnot(args ImageAnnotArgs) (*Annotation, error)

type ByX

type ByX []*Annotation

func (ByX) Len

func (a ByX) Len() int

func (ByX) Less

func (a ByX) Less(i, j int) bool

func (ByX) Swap

func (a ByX) Swap(i, j int)

type ByY

type ByY []*Annotation

func (ByY) Len

func (a ByY) Len() int

func (ByY) Less

func (a ByY) Less(i, j int) bool

func (ByY) Swap

func (a ByY) Swap(i, j int)

type ImageAnnotArgs

type ImageAnnotArgs struct {
	Page            *model.PdfPage
	PageImg         *image.Image
	OCRImg          *image.Image
	PageIndex       int
	Annotation      *model.PdfAnnotation
	X               float64
	Y               float64
	ID              string
	Write           bool
	AttemptOCR      bool
	ImageOutputPath string
	ImageBaseName   string
	ImageFormat     string
	ImageQuality    int
	TessPath        string
	TessLang        string
	TessDataDir     string
}

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL