model

package
v0.0.3 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Dec 24, 2020 License: Apache-2.0 Imports: 13 Imported by: 0

Documentation

Index

Constants

View Source
const (
	DOCS_POSITIONS_ENUM_FLAG_OFF_SETS = 1
	DOCS_POSITIONS_ENUM_FLAG_PAYLOADS = 2
)
View Source
const (
	INDEX_OPT_DOCS_ONLY                                = IndexOptions(1)
	INDEX_OPT_DOCS_AND_FREQS                           = IndexOptions(2)
	INDEX_OPT_DOCS_AND_FREQS_AND_POSITIONS             = IndexOptions(3)
	INDEX_OPT_DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS = IndexOptions(4)
)
View Source
const (
	DOC_VALUES_TYPE_NUMERIC        = DocValuesType(1)
	DOC_VALUES_TYPE_BINARY         = DocValuesType(2)
	DOC_VALUES_TYPE_SORTED         = DocValuesType(3)
	DOC_VALUES_TYPE_SORTED_SET     = DocValuesType(4)
	DOC_VALUES_TYPE_SORTED_NUMERIC = DocValuesType(5)
)
View Source
const (
	SEEK_STATUS_END       = 1
	SEEK_STATUS_FOUND     = 2
	SEEK_STATUS_NOT_FOUND = 3
)
View Source
const (
	DOCS_ENUM_FLAG_FREQS = 1
)
View Source
const NO = -1
View Source
const YES = 1

Variables

View Source
var CODEC_FILE_PATTERN = regexp.MustCompile("_[a-z0-9]+(_.*)?\\..*")
View Source
var (
	EMPTY_TERMS_ENUM = &EmptyTermsEnum{}
)
View Source
var EMPTY_TERM_STATE = &EmptyTermState{}

Functions

This section is empty.

Types

type AttributesMixin

type AttributesMixin struct {
	// contains filtered or unexported fields
}

func (*AttributesMixin) Attribute

func (m *AttributesMixin) Attribute(key string) string

Get a codec attribute value, or "" if it does not exist

func (*AttributesMixin) Attributes

func (m *AttributesMixin) Attributes() map[string]string

Returns the internal codec attributes map.

type DocValuesType

type DocValuesType int

type DocsAndPositionsEnum

type DocsAndPositionsEnum struct {
}

type DocsEnum

type DocsEnum interface {
	DocIdSetIterator
	/**
	 * Returns term frequency in the current document, or 1 if the field was
	 * indexed with {@link IndexOptions#DOCS_ONLY}. Do not call this before
	 * {@link #nextDoc} is first called, nor after {@link #nextDoc} returns
	 * {@link DocIdSetIterator#NO_MORE_DOCS}.
	 *
	 * <p>
	 * <b>NOTE:</b> if the {@link DocsEnum} was obtain with {@link #FLAG_NONE},
	 * the result of this method is undefined.
	 */
	Freq() (n int, err error)
}

type EmptyTermState

type EmptyTermState struct{}

func (*EmptyTermState) Clone

func (ts *EmptyTermState) Clone() TermState

func (*EmptyTermState) CopyFrom

func (ts *EmptyTermState) CopyFrom(other TermState)

type EmptyTermsEnum

type EmptyTermsEnum struct {
	*TermsEnumImpl
}
An empty TermsEnum for quickly returning an empty instance e.g.

in MultiTermQuery Please note: This enum should be unmodifiable, but it is currently possible to add Attributes to it. This should not be a problem, as the enum is always empty and the existence of unused Attributes does not matter.

func (*EmptyTermsEnum) Comparator

func (e *EmptyTermsEnum) Comparator() sort.Interface

func (*EmptyTermsEnum) DocFreq

func (e *EmptyTermsEnum) DocFreq() (df int, err error)

func (*EmptyTermsEnum) DocsAndPositionsByFlags

func (e *EmptyTermsEnum) DocsAndPositionsByFlags(liveDocs util.Bits, reuse DocsAndPositionsEnum, flags int) DocsAndPositionsEnum

func (*EmptyTermsEnum) DocsByFlags

func (e *EmptyTermsEnum) DocsByFlags(liveDocs util.Bits, reuse DocsEnum, flags int) (de DocsEnum, err error)

func (*EmptyTermsEnum) Next

func (e *EmptyTermsEnum) Next() (term []byte, err error)

func (*EmptyTermsEnum) Ord

func (e *EmptyTermsEnum) Ord() int64

func (*EmptyTermsEnum) SeekCeilUsingCache

func (e *EmptyTermsEnum) SeekCeilUsingCache(term []byte, useCache bool) SeekStatus

func (*EmptyTermsEnum) SeekExactByPosition

func (e *EmptyTermsEnum) SeekExactByPosition(ord int64) error

func (*EmptyTermsEnum) SeekExactFromLast

func (e *EmptyTermsEnum) SeekExactFromLast(term []byte, state TermState) error

func (*EmptyTermsEnum) Term

func (e *EmptyTermsEnum) Term() []byte

func (*EmptyTermsEnum) TermState

func (e *EmptyTermsEnum) TermState() (ts TermState, err error)

func (*EmptyTermsEnum) TotalTermFreq

func (e *EmptyTermsEnum) TotalTermFreq() (tf int64, err error)

type FieldInfo

type FieldInfo struct {
	// Field's name
	Name string
	// Internal field number
	Number int32

	*AttributesMixin
	// contains filtered or unexported fields
}

func NewFieldInfo

func NewFieldInfo(name string, indexed bool, number int32,
	storeTermVector, omitNorms, storePayloads bool,
	indexOptions IndexOptions, docValues, normsType DocValuesType,
	dvGen int64, attributes map[string]string) *FieldInfo

func (*FieldInfo) DocValuesGen

func (info *FieldInfo) DocValuesGen() int64

func (*FieldInfo) DocValuesType

func (info *FieldInfo) DocValuesType() DocValuesType

Returns DocValueType of the docValues. This may be 0 if the fiel dhas no docValues.

func (*FieldInfo) HasDocValues

func (info *FieldInfo) HasDocValues() bool

Returns true if this field has any docValues.

func (*FieldInfo) HasNorms

func (info *FieldInfo) HasNorms() bool

Returns true if this field actually has any norms.

func (*FieldInfo) HasPayloads

func (info *FieldInfo) HasPayloads() bool

Returns true if any payloads exist for this field.

func (*FieldInfo) HasVectors

func (info *FieldInfo) HasVectors() bool

Returns true if any term vectors exist for this field.

func (*FieldInfo) IndexOptions

func (info *FieldInfo) IndexOptions() IndexOptions

Returns IndexOptions for the field, or 0 if the field is not indexed

func (*FieldInfo) IsIndexed

func (info *FieldInfo) IsIndexed() bool

Returns true if this field is indexed.

func (*FieldInfo) NormType

func (info *FieldInfo) NormType() DocValuesType

Returns DocValuesType of the norm. This may be 0 if the field has no norms.

func (*FieldInfo) OmitsNorms

func (info *FieldInfo) OmitsNorms() bool

Returns true if norms are explicitly omitted for this field

func (*FieldInfo) PutAttribute

func (fi *FieldInfo) PutAttribute(key, value string) string

Puts a codec attribute value.

This is a key-value mapping for the field that the codec can use to store additional metadata, and will be available to the codec when reading the segment via Attribute()

If a value already exists ofr the field, it will be replaced with the new value.

func (*FieldInfo) SetDocValueType

func (info *FieldInfo) SetDocValueType(v DocValuesType)

func (*FieldInfo) SetNormValueType

func (info *FieldInfo) SetNormValueType(typ DocValuesType)

func (*FieldInfo) String

func (fi *FieldInfo) String() string

func (*FieldInfo) Update

func (info *FieldInfo) Update(ft IndexableFieldType)

type FieldInfos

type FieldInfos struct {
	HasFreq      bool
	HasProx      bool
	HasPayloads  bool
	HasOffsets   bool
	HasVectors   bool
	HasNorms     bool
	HasDocValues bool

	Values []*FieldInfo // sorted by ID
	// contains filtered or unexported fields
}

Collection of FieldInfo(s) (accessible by number of by name)

func NewFieldInfos

func NewFieldInfos(infos []*FieldInfo) FieldInfos

func (FieldInfos) FieldInfoByName

func (infos FieldInfos) FieldInfoByName(fieldName string) *FieldInfo

Return the FieldInfo object referenced by the field name

func (FieldInfos) FieldInfoByNumber

func (infos FieldInfos) FieldInfoByNumber(fieldNumber int) *FieldInfo

Return the FieldInfo object referenced by the fieldNumber.

func (FieldInfos) Size

func (infos FieldInfos) Size() int

Returns the number of fields

func (FieldInfos) String

func (fis FieldInfos) String() string

type FieldInfosBuilder

type FieldInfosBuilder struct {
	// contains filtered or unexported fields
}

func NewFieldInfosBuilder

func NewFieldInfosBuilder(globalFieldNumbers *FieldNumbers) *FieldInfosBuilder

func (*FieldInfosBuilder) AddOrUpdate

func (b *FieldInfosBuilder) AddOrUpdate(name string, fieldType IndexableFieldType) *FieldInfo

NOTE: this method does not carry over termVector booleans nor docValuesType; the indexer chain (TermVectorsConsumerPerField, DocFieldProcessor) must set these fields when they succeed in consuming the document

func (*FieldInfosBuilder) Finish

func (b *FieldInfosBuilder) Finish() FieldInfos

type FieldNumbers

type FieldNumbers struct {
	sync.Locker
	// contains filtered or unexported fields
}

func NewFieldNumbers

func NewFieldNumbers() *FieldNumbers

func (*FieldNumbers) AddOrGet

func (fn *FieldNumbers) AddOrGet(info *FieldInfo) int

type Fields

type Fields interface {
	// Iterator of string
	Terms(field string) Terms
}

type IndexOptions

type IndexOptions int

type IndexableField

type IndexableField interface {
	/** Field name */
	Name() string
	/** {@linkmodel.IndexableFieldType} describing the properties
	 * of this field. */
	FieldType() IndexableFieldType
	/**
	 * Returns the field's index-time boost.
	 * <p>
	 * Only fields can have an index-time boost, if you want to simulate
	 * a "document boost", then you must pre-multiply it across all the
	 * relevant fields yourself.
	 * <p>The boost is used to compute the norm factor for the field.  By
	 * default, in the {@link Similarity#computeNorm(FieldInvertState)} method,
	 * the boost value is multiplied by the length normalization factor and then
	 * rounded by {@link DefaultSimilarity#encodeNormValue(float)} before it is stored in the
	 * index.  One should attempt to ensure that this product does not overflow
	 * the range of that encoding.
	 * <p>
	 * It is illegal to return a boost other than 1.0f for a field that is not
	 * indexed ({@linkmodel.IndexableFieldType#indexed()} is false) or omits normalization values
	 * ({@linkmodel.IndexableFieldType#omitNorms()} returns true).
	 *
	 * @see Similarity#computeNorm(FieldInvertState)
	 * @see DefaultSimilarity#encodeNormValue(float)
	 */
	Boost() float32
	/** Non-null if this field has a binary value */
	BinaryValue() []byte

	/** Non-null if this field has a string value */
	StringValue() string

	/** Non-null if this field has a Reader value */
	ReaderValue() io.RuneReader

	/** Non-null if this field has a numeric value */
	NumericValue() interface{}

	// Creates the TokenStream used for indexing this field.  If appropriate,
	// implementations should use the given Analyzer to create the TokenStreams.
	TokenStream(analysis.Analyzer, analysis.TokenStream) (analysis.TokenStream, error)
}

* Represents a single field for indexing. IndexWriter

  • consumes Iterable&lt;IndexableField&gt; as a document. *
  • @lucene.experimental

type IndexableFieldType

type IndexableFieldType interface {
	/** True if this field should be indexed (inverted) */
	Indexed() bool
	/** True if the field's value should be stored */
	Stored() bool
	/**
	 * True if this field's value should be analyzed by the
	 * {@link Analyzer}.
	 * <p>
	 * This has no effect if {@link #indexed()} returns false.
	 */
	Tokenized() bool
	/**
	 * True if this field's indexed form should be also stored
	 * into term vectors.
	 * <p>
	 * This builds a miniature inverted-index for this field which
	 * can be accessed in a document-oriented way from
	 * {@link IndexReader#getTermVector(int,String)}.
	 * <p>
	 * This option is illegal if {@link #indexed()} returns false.
	 */
	StoreTermVectors() bool
	/**
	 * True if this field's token character offsets should also
	 * be stored into term vectors.
	 * <p>
	 * This option is illegal if term vectors are not enabled for the field
	 * ({@link #storeTermVectors()} is false)
	 */
	StoreTermVectorOffsets() bool

	/**
	 * True if this field's token positions should also be stored
	 * into the term vectors.
	 * <p>
	 * This option is illegal if term vectors are not enabled for the field
	 * ({@link #storeTermVectors()} is false).
	 */
	StoreTermVectorPositions() bool

	/**
	 * True if this field's token payloads should also be stored
	 * into the term vectors.
	 * <p>
	 * This option is illegal if term vector positions are not enabled
	 * for the field ({@link #storeTermVectors()} is false).
	 */
	StoreTermVectorPayloads() bool

	/**
	 * True if normalization values should be omitted for the field.
	 * <p>
	 * This saves memory, but at the expense of scoring quality (length normalization
	 * will be disabled), and if you omit norms, you cannot use index-time boosts.
	 */
	OmitNorms() bool

	/** {@link IndexOptions}, describing what should be
	 * recorded into the inverted index */
	IndexOptions() IndexOptions

	/**
	 * DocValues {@link DocValuesType}: if non-null then the field's value
	 * will be indexed into docValues.
	 */
	DocValueType() DocValuesType
}

*

  • Describes the properties of a field.

type Int32Slice

type Int32Slice []int32

func (Int32Slice) Len

func (p Int32Slice) Len() int

func (Int32Slice) Less

func (p Int32Slice) Less(i, j int) bool

func (Int32Slice) Swap

func (p Int32Slice) Swap(i, j int)

type SeekStatus

type SeekStatus int

type SegmentInfo

type SegmentInfo struct {
	Dir store.Directory

	Name string

	*AttributesMixin
	// contains filtered or unexported fields
}

func NewSegmentInfo

func NewSegmentInfo(dir store.Directory,
	version util.Version, name string, docCount int,
	isCompoundFile bool, codec interface{},
	diagnostics map[string]string) *SegmentInfo

func NewSegmentInfo2

func NewSegmentInfo2(dir store.Directory,
	version util.Version, name string, docCount int,
	isCompoundFile bool, codec interface{},
	diagnostics map[string]string,
	attributes map[string]string) *SegmentInfo

func (*SegmentInfo) AddFile

func (si *SegmentInfo) AddFile(file string)

Add this file to the set of files written for this segment.

func (*SegmentInfo) Clone

func (si *SegmentInfo) Clone() *SegmentInfo

func (*SegmentInfo) Codec

func (si *SegmentInfo) Codec() interface{}

Return Codec that wrote this segment.

func (*SegmentInfo) Diagnostics

func (info *SegmentInfo) Diagnostics() map[string]string

Returns diagnostics saved into the segment when it was written .

func (*SegmentInfo) DocCount

func (si *SegmentInfo) DocCount() int

func (*SegmentInfo) Files

func (si *SegmentInfo) Files() map[string]bool

Return all files referenced by this SegmentInfo.

func (*SegmentInfo) HasSeparateNorms

func (si *SegmentInfo) HasSeparateNorms() bool

seprate norms are not supported in >= 4.0

func (*SegmentInfo) IsCompoundFile

func (si *SegmentInfo) IsCompoundFile() bool

Returns true if this segment is stored as a compound file

func (*SegmentInfo) SetCodec

func (info *SegmentInfo) SetCodec(codec interface{})

Can only be called once.

func (*SegmentInfo) SetDiagnostics

func (info *SegmentInfo) SetDiagnostics(diagnostics map[string]string)

func (*SegmentInfo) SetDocCount

func (info *SegmentInfo) SetDocCount(docCount int)

func (*SegmentInfo) SetFiles

func (si *SegmentInfo) SetFiles(files map[string]bool)

Sets the files written for this segment.

func (*SegmentInfo) SetUseCompoundFile

func (si *SegmentInfo) SetUseCompoundFile(isCompoundFile bool)

Mark whether this segment is stored as a compound file.

func (*SegmentInfo) String

func (si *SegmentInfo) String() string

func (*SegmentInfo) StringOf

func (si *SegmentInfo) StringOf(dir store.Directory, delCount int) string

func (*SegmentInfo) Version

func (si *SegmentInfo) Version() util.Version

Returns the version of the code which wrote the segment.

type SegmentReadState

type SegmentReadState struct {
	Dir               store.Directory
	SegmentInfo       *SegmentInfo
	FieldInfos        FieldInfos
	Context           store.IOContext
	TermsIndexDivisor int
	SegmentSuffix     string
}

func NewSegmentReadState

func NewSegmentReadState(dir store.Directory,
	info *SegmentInfo, fieldInfos FieldInfos,
	context store.IOContext, termsIndexDivisor int) SegmentReadState

type SegmentWriteState

type SegmentWriteState struct {
	Directory       store.Directory
	SegmentInfo     *SegmentInfo
	FieldInfos      FieldInfos
	DelCountOnFlush int
	SegUpdates      interface{} // BufferedUpdates
	LiveDocs        util.MutableBits
	SegmentSuffix   string

	Context store.IOContext
	// contains filtered or unexported fields
}

Holder class for common parameters used during write.

func NewSegmentWriteState

func NewSegmentWriteState(infoStream util.InfoStream,
	dir store.Directory, segmentInfo *SegmentInfo,
	fieldInfos FieldInfos, termIndexInterval int,
	SegUpdates interface{}, ctx store.IOContext) *SegmentWriteState

func NewSegmentWriteState2

func NewSegmentWriteState2(infoStream util.InfoStream,
	dir store.Directory, segmentInfo *SegmentInfo,
	fieldInfos FieldInfos, termIndexInterval int,
	SegUpdates interface{}, ctx store.IOContext,
	segmentSuffix string) *SegmentWriteState

func NewSegmentWriteStateFrom

func NewSegmentWriteStateFrom(state *SegmentWriteState,
	segmentSuffix string) *SegmentWriteState

Create a shallow copy of SegmentWriteState with a new segment suffix.

type TermState

type TermState interface {
	CopyFrom(other TermState)
	Clone() TermState
}

TermState.java Encapsulates all requried internal state to postiion the associated termsEnum without re-seeking

type Terms

type Terms interface {
	Iterator(reuse TermsEnum) TermsEnum
	DocCount() int
	SumTotalTermFreq() int64
	SumDocFreq() int64
}

type TermsEnum

type TermsEnum interface {
	util.BytesRefIterator

	Attributes() *util.AttributeSource
	/* Attempts to seek to the exact term, returning
	true if the term is found. If this returns false, the
	enum is unpositioned. For some codecs, seekExact may
	be substantially faster than seekCeil. */
	SeekExact(text []byte) (ok bool, err error)
	/* Seeks to the specified term, if it exists, or to the
	next (ceiling) term. Returns SeekStatus to
	indicate whether exact term was found, a different
	term was found, or EOF was hit. The target term may
	be before or after the current term. If this returns
	SeekStatus.END, then enum is unpositioned. */
	SeekCeil(text []byte) SeekStatus
	/* Seeks to the specified term by ordinal (position) as
	previously returned by ord. The target ord
	may be before or after the current ord, and must be
	within bounds. */
	SeekExactByPosition(ord int64) error
	/* Expert: Seeks a specific position by TermState previously obtained
	from termState(). Callers shoudl maintain the TermState to
	use this method. Low-level implementations may position the TermsEnum
	without re-seeking the term dictionary.

	Seeking by TermState should only be used iff the state was obtained
	from the same instance.

	NOTE: Using this method with an incompatible TermState might leave
	this TermsEnum in undefiend state. On a segment level
	TermState instances are compatible only iff the source and the
	target TermsEnum operate on the same field. If operating on segment
	level, TermState instances must not be used across segments.

	NOTE: A seek by TermState might not restore the
	AttributeSource's state. AttributeSource state must be
	maintained separately if the method is used. */
	SeekExactFromLast(text []byte, state TermState) error
	/* Returns current term. Do not call this when enum
	is unpositioned. */
	Term() []byte
	/* Returns ordinal position for current term. This is an
	optional method (the codec may panic). Do not call this
	when the enum is unpositioned. */
	Ord() int64
	/* Returns the number of documentsw containing the current
	term. Do not call this when enum is unpositioned. */
	DocFreq() (df int, err error)
	/* Returns the total numnber of occurrences of this term
	across all documents (the sum of the freq() for each
	doc that has this term). This will be -1 if the
	codec doesn't support this measure. Note that, like
	other term measures, this measure does not take
	deleted documents into account. */
	TotalTermFreq() (tf int64, err error)
	/* Get DocsEnum for the current term. Do not
	call this when the enum is unpositioned. This method
	will not return nil. */
	Docs(liveDocs util.Bits, reuse DocsEnum) (de DocsEnum, err error)
	/* Get DocsEnum for the current term, with
	control over whether freqs are required. Do not
	call this when the enum is unpositioned. This method
	will not return nil. */
	DocsByFlags(liveDocs util.Bits, reuse DocsEnum, flags int) (de DocsEnum, err error)
	/* Get DocsAndPositionEnum for the current term.
	Do not call this when the enum is unpositioned. This
	method will return nil if positions were not
	indexed. */
	DocsAndPositions(liveDocs util.Bits, reuse DocsAndPositionsEnum) DocsAndPositionsEnum
	/* Get DocsAndPositionEnum for the current term,
	with control over whether offsets and payloads are
	required. Some codecs may be able to optimize their
	implementation when offsets and/or payloads are not required.
	Do not call this when the enum is unpositioned. This
	will return nil if positions were not indexed. */
	DocsAndPositionsByFlags(liveDocs util.Bits, reuse DocsAndPositionsEnum, flags int) DocsAndPositionsEnum
	/* Expert: Returns the TermsEnum internal state to position the TermsEnum
	without re-seeking the term dictionary.

	NOTE: A sek by TermState might not capture the
	AttributeSource's state. Callers must maintain the
	AttributeSource states separately. */
	TermState() (ts TermState, err error)
}

TermsEnum.java

Iterator to seek, or step through terms to obtain frequency information, or for the current term.

Term enumerations are always ordered by specified Comparator. Each term in the enumeration is greater than the one before it.

The TermsEnum is unpositioned when you first obtain it and you must first succesfully call next() or one of the seek methods.

type TermsEnumImpl

type TermsEnumImpl struct {
	TermsEnum
	// contains filtered or unexported fields
}

func NewTermsEnumImpl

func NewTermsEnumImpl(self TermsEnum) *TermsEnumImpl

func (*TermsEnumImpl) Attributes

func (e *TermsEnumImpl) Attributes() *util.AttributeSource

func (*TermsEnumImpl) Docs

func (e *TermsEnumImpl) Docs(liveDocs util.Bits, reuse DocsEnum) (DocsEnum, error)

func (*TermsEnumImpl) DocsAndPositions

func (e *TermsEnumImpl) DocsAndPositions(liveDocs util.Bits, reuse DocsAndPositionsEnum) DocsAndPositionsEnum

func (*TermsEnumImpl) SeekExact

func (e *TermsEnumImpl) SeekExact(text []byte) (ok bool, err error)

func (*TermsEnumImpl) SeekExactFromLast

func (e *TermsEnumImpl) SeekExactFromLast(text []byte, state TermState) error

func (*TermsEnumImpl) TermState

func (e *TermsEnumImpl) TermState() (ts TermState, err error)

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL