engine

package
v0.0.5 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Nov 6, 2023 License: Apache-2.0 Imports: 10 Imported by: 0

Documentation

Index

Constants

View Source
const (
	CandidatesFreqLimit = 1
)

Variables

This section is empty.

Functions

func Open added in v0.0.2

func Open(conf *DBConf) (*sql.DB, error)

func RunPg

func RunPg(corpusID, vertPath string, coOccSpan int, conf *SyntaxProps, db *sql.DB) error

Types

type CTItem

type CTItem struct {
	Lemma  string
	PLemma string
	Deprel string
	Upos   string
	PUpos  string
	Freq   int64
}

type Candidate

type Candidate struct {
	Lemma      string
	Upos       string
	FreqXY     int64
	FreqY      int64
	CoOccScore float64
}

type CoOccTable added in v0.0.4

type CoOccTable map[string]*CoTItem

func (CoOccTable) Add added in v0.0.4

func (table CoOccTable) Add(lemma, upos, coLemma, coUpos string, val int64)

func (CoOccTable) Has added in v0.0.4

func (table CoOccTable) Has(lemma, upos, coLemma, coUpos string) bool

type CoTItem added in v0.0.4

type CoTItem struct {
	Lemma   string
	CoLemma string
	Upos    string
	CoUpos  string
	Freq    int64
}

type CoVertProcessor added in v0.0.4

type CoVertProcessor struct {
	Span   int
	Window [][2]string

	CoOccTable  CoOccTable
	TokenCounts FyTable
	// contains filtered or unexported fields
}

func (*CoVertProcessor) ProcStruct added in v0.0.4

func (cvp *CoVertProcessor) ProcStruct(strc *vertigo.Structure, line int, err error) error

func (*CoVertProcessor) ProcStructClose added in v0.0.4

func (cvp *CoVertProcessor) ProcStructClose(strc *vertigo.StructureClose, line int, err error) error

func (*CoVertProcessor) ProcToken added in v0.0.4

func (cvp *CoVertProcessor) ProcToken(token *vertigo.Token, line int, err error) error

type CollDatabase

type CollDatabase struct {
	// contains filtered or unexported fields
}

CollDatabase note: the lifecycle of the instance is "per request"

func NewCollDatabase

func NewCollDatabase(db *sql.DB, corpusID string) *CollDatabase

func (*CollDatabase) GetCollCandidatesOfChild added in v0.0.2

func (cdb *CollDatabase) GetCollCandidatesOfChild(lemma, upos, deprel string, minFreq int) ([]*Candidate, error)

GetCollCandidatesOfChild provides collocation candidates of a child

func (*CollDatabase) GetCollCandidatesOfParent added in v0.0.2

func (cdb *CollDatabase) GetCollCandidatesOfParent(lemma, upos, deprel string, minFreq int) ([]*Candidate, error)

GetCollCandidatesOfParent provides collocation candidates of a parent

func (*CollDatabase) GetFreq

func (cdb *CollDatabase) GetFreq(lemma, upos, pLemma, pUpos, deprel string) (int64, error)

func (*CollDatabase) InitializeDB added in v0.0.2

func (cdb *CollDatabase) InitializeDB(db *sql.DB, force bool) error

func (*CollDatabase) TableName added in v0.0.2

func (cdb *CollDatabase) TableName() string

func (*CollDatabase) TestTableReady added in v0.0.2

func (cdb *CollDatabase) TestTableReady() error

type CorporaConf

type CorporaConf []*CorpusProps

func (CorporaConf) GetCorpusProps

func (cp CorporaConf) GetCorpusProps(corpusID string) *CorpusProps

type CorpusProps

type CorpusProps struct {
	Name string `json:"name"`
	Size int64  `json:"size"`
	// HasMaterializedViews if true then scollex will use queries
	// targeting those views for the corpus to provide better performance.
	// This is highly recommended (see scripts/schema.sql for
	// the views' definitions)
	HasMaterializedViews bool        `json:"hasMaterializedViews"`
	Syntax               SyntaxProps `json:"syntax"`
}

func (*CorpusProps) ValidateAndDefaults

func (conf *CorpusProps) ValidateAndDefaults(confContext string) error

type CounterTable

type CounterTable map[string]*CTItem

func (CounterTable) Add

func (table CounterTable) Add(lemma, upos, pLemma, pUpos, deprel string, val int64)

type DBConf

type DBConf struct {
	Host     string `json:"host"`
	Port     int    `json:"port"`
	Name     string `json:"name"`
	User     string `json:"user"`
	Password string `json:"password"`
	PoolSize int    `json:"poolSize"`
}

type FreqDistrib

type FreqDistrib struct {

	// CorpusSize is always equal to the whole corpus size
	// (even if we work with a subcorpus)
	CorpusSize int64 `json:"corpusSize"`

	Freqs FreqDistribItemList `json:"freqs"`

	// ExamplesQueryTpl provides a (CQL) query template
	// for obtaining examples matching words from the `Freqs`
	// atribute (one by one).
	ExamplesQueryTpl string `json:"examplesQueryTpl"`

	Error string `json:"error"`
}

type FreqDistribItem

type FreqDistribItem struct {
	Word       string   `json:"word"`
	Freq       int64    `json:"freq"`
	Norm       int64    `json:"norm"`
	IPM        float32  `json:"ipm"`
	CollWeight *float64 `json:"collWeight"`
	CoOccScore *float64 `json:"coOccScore"`
}

type FreqDistribItemList

type FreqDistribItemList []*FreqDistribItem

func (FreqDistribItemList) Cut

func (flist FreqDistribItemList) Cut(maxItems int) FreqDistribItemList

type FyItem added in v0.0.2

type FyItem struct {
	Lemma  string
	Upos   string
	Deprel string
	Freq   int64
}

type FyTable added in v0.0.2

type FyTable map[string]*FyItem

func (FyTable) Add added in v0.0.2

func (table FyTable) Add(lemma, upos, deprel string, val int64)

func (FyTable) Has added in v0.0.4

func (table FyTable) Has(lemma, upos, deprel string) bool

type PosAttrProps

type PosAttrProps struct {
	Name        string `json:"name"`
	VerticalCol int    `json:"verticalCol"`
}

type SyntaxProps

type SyntaxProps struct {

	// ParentIdxAttr specifies a positional attribute providing
	// information about relative position of a parent token.
	ParentIdxAttr PosAttrProps `json:"parentIdxAttr"`

	// LemmaAttr - an attribute specifying lemma
	// (in intercorp_v13ud: `lemma`)
	LemmaAttr PosAttrProps `json:"lemmaAttr"`

	// ParLemmaAttr - an attribute specifying lemma in parent
	// (in intercorp_v13ud: `p_lemma`)
	ParLemmaAttr PosAttrProps `json:"parLemmaAttr"`

	// PosAttr - an attr specifying part of speech
	// (in intercorp_v13ud: `upos`)
	PosAttr PosAttrProps `json:"posAttr"`

	// ParPosAttr - an attr specifying part of speech in parent
	// (in intercorp_v13ud: `p_upos`)
	ParPosAttr PosAttrProps `json:"parPosAttr"`

	// (in intercorp_v13ud: `deprel`)
	FuncAttr PosAttrProps `json:"funcAttr"`

	// (in intercorp_v13ud: `NOUN`)
	NounValue string `json:"nounPosValue"`

	// (in intercorp_v13ud: `VERB`)
	VerbValue string `json:"verbPosValue"`

	// (in intercorp_v13ud: `nmod`)
	NounModifiedValue string `json:"nounModifiedValue"`

	// (in intercorp_v13ud: `nsubj`)
	NounSubjectValue string `json:"nounSubjectValue"`

	// (in intercorp_v13ud: `obj|iobj`)
	NounObjectValue string `json:"nounObjectValue"`
}

func (*SyntaxProps) ValidateAndDefaults

func (conf *SyntaxProps) ValidateAndDefaults(confContext string) error

type VertProcessor

type VertProcessor struct {
	DeprelCol   int
	DeprelTypes []string

	Table        CounterTable
	ParentCounts FyTable
	ChildCounts  FyTable
	// contains filtered or unexported fields
}

func (*VertProcessor) ProcStruct

func (vp *VertProcessor) ProcStruct(strc *vertigo.Structure, line int, err error) error

func (*VertProcessor) ProcStructClose

func (vp *VertProcessor) ProcStructClose(strc *vertigo.StructureClose, line int, err error) error

func (*VertProcessor) ProcToken

func (vp *VertProcessor) ProcToken(token *vertigo.Token, line int, err error) error

type Word

type Word struct {
	V   string
	PoS string
}

func (Word) IsValid

func (w Word) IsValid() bool

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL