simpletext

package
v0.0.0-...-53ff736 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Mar 27, 2024 License: Apache-2.0 Imports: 23 Imported by: 10

Documentation

Index

Constants

View Source
const (
	CODEC_NAME                    = "BKD"
	VERSION_START                 = 0
	VERSION_COMPRESSED_DOC_IDS    = 1
	VERSION_COMPRESSED_VALUES     = 2
	VERSION_IMPLICIT_SPLIT_DIM_1D = 3
	VERSION_CURRENT               = VERSION_IMPLICIT_SPLIT_DIM_1D
	DEFAULT_MAX_MB_SORT_IN_HEAP   = 16.0
)
View Source
const (
	// POINT_EXTENSION Extension of points data file
	POINT_EXTENSION = "dim"

	// POINT_INDEX_EXTENSION Extension of points index file
	POINT_INDEX_EXTENSION = "dii"
)
View Source
const (
	BLOCK_SIZE = 8
)
View Source
const (
	DATA_EXTENSION = "scf"
)
View Source
const (
	// FIELD_INFOS_EXTENSION Extension of field infos
	FIELD_INFOS_EXTENSION = "inf"
)
View Source
const (
	NORMS_SEG_EXTENSION = "len"
)
View Source
const (
	POSTINGS_EXTENSION = "pst"
)

Variables

View Source
var (
	COMPOUND_FORMAT_HEADER     = []byte("cfs entry for: ")
	COMPOUND_FORMAT_TABLE      = []byte("table of contents, size: ")
	COMPOUND_FORMAT_TABLENAME  = []byte("  filename: ")
	COMPOUND_FORMAT_TABLESTART = []byte("    start: ")
	COMPOUND_FORMAT_TABLEEND   = []byte("    end: ")
	COMPOUND_FORMAT_TABLEPOS   = []byte("table of contents begins at offset: ")
	OFFSETPATTERN              = "0000000000000000000"
)
View Source
var (
	DOC_VALUES_END   = []byte("END")
	DOC_VALUES_FIELD = []byte("field ")
	DOC_VALUES_TYPE  = []byte("  type ")
	// used for numerics
	DOC_VALUES_MINVALUE = []byte("  minvalue ")
	DOC_VALUES_PATTERN  = []byte("  pattern ")
	// used for bytes
	DOC_VALUES_LENGTH    = []byte("length ")
	DOC_VALUES_MAXLENGTH = []byte("  maxlength ")
	// used for sorted bytes
	DOC_VALUES_NUMVALUES  = []byte("  numvalues ")
	DOC_VALUES_ORDPATTERN = []byte("  ordpattern ")
)
View Source
var (
	NUMFIELDS       = []byte("number of fields ")
	NAME            = []byte("  name ")
	NUMBER          = []byte("  number ")
	STORETV         = []byte("  term vectors ")
	STORETVPOS      = []byte("  term vector positions ")
	STORETVOFF      = []byte("  term vector offsets ")
	PAYLOADS        = []byte("  payloads ")
	NORMS           = []byte("  norms ")
	DOCVALUES       = []byte("  doc values ")
	DOCVALUES_GEN   = []byte("  doc values gen ")
	INDEXOPTIONS    = []byte("  index options ")
	NUM_ATTS        = []byte("  attributes ")
	ATT_KEY         = []byte("    key ")
	ATT_VALUE       = []byte("    value ")
	DATA_DIM_COUNT  = []byte("  data dimensional count ")
	INDEX_DIM_COUNT = []byte("  index dimensional count ")
	DIM_NUM_BYTES   = []byte("  dimensional num bytes ")
	SOFT_DELETES    = []byte("  soft-deletes ")
)
View Source
var (
	FIELDS_END          = []byte("END")
	FIELDS_FIELD        = []byte("field ")
	FIELDS_TERM         = []byte("  term ")
	FIELDS_DOC          = []byte("    doc ")
	FIELDS_FREQ         = []byte("      freq ")
	FIELDS_POS          = []byte("      pos ")
	FIELDS_START_OFFSET = []byte("      startOffset ")
	FIELDS_END_OFFSET   = []byte("      endOffset ")
	FIELDS_PAYLOAD      = []byte("        payload ")
)
View Source
var (
	LIVEDOCS_EXTENSION = "liv"

	LIVE_DOCS_FORMAT_SIZE = []byte("size ")
	LIVE_DOCS_FORMAT_DOC  = []byte("  doc ")
	LIVE_DOCS_FORMAT_END  = []byte("END")
)
View Source
var (
	NUM_DATA_DIMS   = []byte("num data dims ")
	NUM_INDEX_DIMS  = []byte("num index dims ")
	BYTES_PER_DIM   = []byte("bytes per dim ")
	MAX_LEAF_POINTS = []byte("max leaf points ")
	INDEX_COUNT     = []byte("index count ")
	BLOCK_COUNT     = []byte("block count ")
	BLOCK_DOC_ID    = []byte("  doc ")
	BLOCK_FP        = []byte("  block fp ")
	BLOCK_VALUE     = []byte("  block value ")
	SPLIT_COUNT     = []byte("split count ")
	SPLIT_DIM       = []byte("  split dim ")
	SPLIT_VALUE     = []byte("  split value ")
	FIELD_COUNT     = []byte("field count ")
	FIELD_FP_NAME   = []byte("  field fp name ")
	FIELD_FP        = []byte("  field fp ")
	MIN_VALUE       = []byte("min value ")
	MAX_VALUE       = []byte("max value ")
	POINT_COUNT     = []byte("point count ")
	DOC_COUNT       = []byte("doc count ")
	END             = []byte("END")
)
View Source
var (
	SI_VERSION     = []byte("    version ")
	SI_MIN_VERSION = []byte("    min version ")
	SI_DOCCOUNT    = []byte("    number of documents ")
	SI_USECOMPOUND = []byte("    uses compound file ")
	SI_NUM_DIAG    = []byte("    diagnostics ")
	SI_DIAG_KEY    = []byte("      key ")
	SI_DIAG_VALUE  = []byte("      value ")
	SI_NUM_ATT     = []byte("    attributes ")
	SI_ATT_KEY     = []byte("      key ")
	SI_ATT_VALUE   = []byte("      value ")
	SI_NUM_FILES   = []byte("    files ")
	SI_FILE        = []byte("      file ")
	SI_ID          = []byte("    id ")
	SI_SORT        = []byte("    sort ")
	SI_SORT_TYPE   = []byte("      type ")
	SI_SORT_NAME   = []byte("      name ")
	SI_SORT_BYTES  = []byte("      bytes ")

	SI_EXTENSION = "si"
)
View Source
var (
	SKIP_LIST     = []byte("    skipList ")
	LEVEL_LENGTH  = []byte("      levelLength ")
	LEVEL         = []byte("      level ")
	SKIP_DOC      = []byte("        skipDoc ")
	SKIP_DOC_FP   = []byte("        skipDocFP ")
	IMPACTS       = []byte("        impacts ")
	IMPACT        = []byte("          impact ")
	FREQ          = []byte("            freq ")
	NORM          = []byte("            norm ")
	IMPACTS_END   = []byte("        impactsEnd ")
	CHILD_POINTER = []byte("        childPointer ")
)
View Source
var (
	FIELDS_EXTENSION         = "fld"
	STORED_FIELD_TYPE_STRING = []byte("string")
	STORED_FIELD_TYPE_BINARY = []byte("binary")
	STORED_FIELD_TYPE_INT    = []byte("int")
	STORED_FIELD_TYPE_LONG   = []byte("long")
	STORED_FIELD_TYPE_FLOAT  = []byte("float")
	STORED_FIELD_TYPE_DOUBLE = []byte("double")
	STORED_FIELD_END         = []byte("END")
	STORED_FIELD_DOC         = []byte("doc ")
	STORED_FIELD_FIELD       = []byte("  field ")
	STORED_FIELD_NAME        = []byte("    name ")
	STORED_FIELD_TYPE        = []byte("    type ")
	STORED_FIELD_VALUE       = []byte("    value ")
)
View Source
var (
	VECTORS_EXTENSION = "vec"

	VECTORS_END            = []byte("END")
	VECTORS_DOC            = []byte("doc ")
	VECTORS_NUMFIELDS      = []byte("  numfields ")
	VECTORS_FIELD          = []byte("  field ")
	VECTORS_FIELDNAME      = []byte("    name ")
	VECTORS_FIELDPOSITIONS = []byte("    positions ")
	VECTORS_FIELDOFFSETS   = []byte("    offsets   ")
	VECTORS_FIELDPAYLOADS  = []byte("    payloads  ")
	VECTORS_FIELDTERMCOUNT = []byte("    numterms ")
	VECTORS_TERMTEXT       = []byte("    term ")
	VECTORS_TERMFREQ       = []byte("      freq ")
	VECTORS_POSITION       = []byte("      position ")
	VECTORS_PAYLOAD        = []byte("        payload ")
	VECTORS_STARTOFFSET    = []byte("        startoffset ")
	VECTORS_ENDOFFSET      = []byte("        endoffset ")
)

Functions

func SortK

func SortK(data sort.Interface, k int)

Types

type BKDReader

type BKDReader struct {
	// contains filtered or unexported fields
}

func NewBKDReader

func NewBKDReader(in store.IndexInput, numDims, numIndexDims, maxPointsInLeafNode, bytesPerDim int,
	leafBlockFPs []int64, splitPackedValues []byte, minPackedValue, maxPackedValue []byte,
	pointCount int, docCount int) (*BKDReader, error)

func (*BKDReader) EstimateDocCount

func (s *BKDReader) EstimateDocCount(visitor types.IntersectVisitor) (int, error)

func (*BKDReader) EstimatePointCount

func (s *BKDReader) EstimatePointCount(ctx context.Context, visitor types.IntersectVisitor) (int, error)

func (*BKDReader) GetBytesPerDimension

func (s *BKDReader) GetBytesPerDimension() (int, error)

func (*BKDReader) GetDocCount

func (s *BKDReader) GetDocCount() int

func (*BKDReader) GetMaxPackedValue

func (s *BKDReader) GetMaxPackedValue() ([]byte, error)

func (*BKDReader) GetMinPackedValue

func (s *BKDReader) GetMinPackedValue() ([]byte, error)

func (*BKDReader) GetNumDimensions

func (s *BKDReader) GetNumDimensions() (int, error)

func (*BKDReader) GetNumIndexDimensions

func (s *BKDReader) GetNumIndexDimensions() (int, error)

func (*BKDReader) Intersect

func (s *BKDReader) Intersect(ctx context.Context, visitor types.IntersectVisitor) error

func (*BKDReader) NewIntersectState

func (s *BKDReader) NewIntersectState(in store.IndexInput,
	numDims, packedBytesLength, maxPointsInLeafNode int,
	visitor types.IntersectVisitor) *IntersectState

func (*BKDReader) Size

func (s *BKDReader) Size() int

type BKDWriter

type BKDWriter struct {
	// contains filtered or unexported fields
}

func NewBKDWriter

func NewBKDWriter(maxDoc int, tempDir store.Directory, tempFileNamePrefix string,
	config *bkd.Config, maxMBSortInHeap float64, totalPointCount int) *BKDWriter

func (*BKDWriter) Add

func (s *BKDWriter) Add(packedValue []byte, docID int) error

func (*BKDWriter) Finish

func (s *BKDWriter) Finish(out store.IndexOutput) (int64, error)

Finish Writes the BKD tree to the provided IndexOutput and returns the file offset where index was written.

func (*BKDWriter) GetPointCount

func (s *BKDWriter) GetPointCount() int

GetPointCount How many points have been added so far

func (*BKDWriter) WriteField

func (s *BKDWriter) WriteField(out store.IndexOutput, fieldName string, reader types.MutablePointValues) (int64, error)

WriteField Write a field from a MutablePointValues. This way of writing points is faster than regular writes with add since there is opportunity for reordering points before writing them to disk. This method does not use transient disk in order to reorder points.

type BytesOutput

type BytesOutput struct {
	*store.BaseDataOutput
	// contains filtered or unexported fields
}

func NewBytesOutput

func NewBytesOutput() *BytesOutput

func (*BytesOutput) Write

func (b *BytesOutput) Write(bs []byte) (n int, err error)

func (*BytesOutput) WriteByte

func (b *BytesOutput) WriteByte(c byte) error

type Codec

type Codec struct {
	// contains filtered or unexported fields
}

Codec plain text index format. FOR RECREATIONAL USE ONLY lucene.experimental

func NewCodec

func NewCodec() *Codec

func (*Codec) CompoundFormat

func (s *Codec) CompoundFormat() index.CompoundFormat

func (*Codec) DocValuesFormat

func (s *Codec) DocValuesFormat() index.DocValuesFormat

func (*Codec) FieldInfosFormat

func (s *Codec) FieldInfosFormat() index.FieldInfosFormat

func (*Codec) GetName

func (s *Codec) GetName() string

func (*Codec) LiveDocsFormat

func (s *Codec) LiveDocsFormat() index.LiveDocsFormat

func (*Codec) NormsFormat

func (s *Codec) NormsFormat() index.NormsFormat

func (*Codec) PointsFormat

func (s *Codec) PointsFormat() index.PointsFormat

func (*Codec) PostingsFormat

func (s *Codec) PostingsFormat() index.PostingsFormat

func (*Codec) SegmentInfoFormat

func (s *Codec) SegmentInfoFormat() index.SegmentInfoFormat

func (*Codec) StoredFieldsFormat

func (s *Codec) StoredFieldsFormat() index.StoredFieldsFormat

func (*Codec) TermVectorsFormat

func (s *Codec) TermVectorsFormat() index.TermVectorsFormat

type CompoundFormat

type CompoundFormat struct {
}

CompoundFormat plain text compound format. FOR RECREATIONAL USE ONLY lucene.experimental

func NewCompoundFormat

func NewCompoundFormat() *CompoundFormat

func (*CompoundFormat) GetCompoundReader

func (s *CompoundFormat) GetCompoundReader(dir store.Directory, si *index.SegmentInfo, context *store.IOContext) (index.CompoundDirectory, error)

func (*CompoundFormat) Write

func (s *CompoundFormat) Write(dir store.Directory, si *index.SegmentInfo, context *store.IOContext) error

type DocValuesFormat

type DocValuesFormat struct {
	// contains filtered or unexported fields
}

DocValuesFormat * plain text doc values format. FOR RECREATIONAL USE ONLY the .dat file contains the data. for numbers this is a "fixed-width" file, for example a single byte range:

field myField
  type NUMERIC
  minvalue 0
  pattern 000
005
T
234
T
123
T
...

so a document's value (delta encoded from minvalue) can be retrieved by seeking to startOffset + (1+pattern.length()+2)*docid. The extra 1 is the newline. The extra 2 is another newline and 'T' or 'F': true if the value is real, false if missing. for bytes this is also a "fixed-width" file, for example:

field myField
  type BINARY
  maxlength 6
  pattern 0
length 6
foobar[space][space]
T
length 3
baz[space][space][space][space][space]
T
...

so a doc's value can be retrieved by seeking to startOffset + (9+pattern.length+maxlength+2)*doc the extra 9 is 2 newlines, plus "length " itself. the extra 2 is another newline and 'T' or 'F': true if the value is real, false if missing. for sorted bytes this is a fixed-width file, for example:

field myField
  type SORTED
  numvalues 10
  maxLength 8
  pattern 0
  ordpattern 00
length 6
foobar[space][space]
length 3
baz[space][space][space][space][space]
...
03
06
01
10
...

so the "ord section" begins at startOffset + (9+pattern.length+maxlength)*numValues. a document's ord can be retrieved by seeking to "ord section" + (1+ordpattern.length())*docid an ord's value can be retrieved by seeking to startOffset + (9+pattern.length+maxlength)*ord for sorted set this is a fixed-width file very similar to the SORTED case, for example:

  field myField
    type SORTED_SET
    numvalues 10
    maxLength 8
    pattern 0
    ordpattern XXXXX
  length 6
  foobar[space][space]
  length 3
  baz[space][space][space][space][space]
  ...
  0,3,5
  1,2

  10
  ...

so the "ord section" begins at startOffset + (9+pattern.length+maxlength)*numValues.
a document's ord list can be retrieved by seeking to "ord section" + (1+ordpattern.length())*docid
this is a comma-separated list, and it's padded with spaces to be fixed width. so trim() and split() it.
and beware the empty string! an ord's value can be retrieved by seeking to
startOffset + (9+pattern.length+maxlength)*ord for sorted numerics, it's encoded (not very creatively)
as a comma-separated list of strings the same as binary. beware the empty string! the reader can just
scan this file when it opens, skipping over the data blocks and saving the offset/etc for each field.

lucene.experimental

func NewSimpleTextDocValuesFormat

func NewSimpleTextDocValuesFormat() *DocValuesFormat

func (*DocValuesFormat) FieldsConsumer

func (s *DocValuesFormat) FieldsConsumer(state *index.SegmentWriteState) (index.DocValuesConsumer, error)

func (*DocValuesFormat) FieldsProducer

func (s *DocValuesFormat) FieldsProducer(state *index.SegmentReadState) (index.DocValuesProducer, error)

func (*DocValuesFormat) GetName

func (s *DocValuesFormat) GetName() string

type DocValuesIterator

type DocValuesIterator interface {
	types.DocIdSetIterator

	AdvanceExact(target int) (bool, error)
}

type DocValuesReader

type DocValuesReader struct {
	// contains filtered or unexported fields
}

func NewDocValuesReader

func NewDocValuesReader(state *index.SegmentReadState, ext string) (*DocValuesReader, error)

func (*DocValuesReader) CheckIntegrity

func (s *DocValuesReader) CheckIntegrity() error

func (*DocValuesReader) Close

func (s *DocValuesReader) Close() error

func (*DocValuesReader) GetBinary

func (s *DocValuesReader) GetBinary(fieldInfo *document.FieldInfo) (index.BinaryDocValues, error)

func (*DocValuesReader) GetMergeInstance

func (s *DocValuesReader) GetMergeInstance() index.DocValuesProducer

func (*DocValuesReader) GetNumeric

func (s *DocValuesReader) GetNumeric(fieldInfo *document.FieldInfo) (index.NumericDocValues, error)

func (*DocValuesReader) GetSorted

func (s *DocValuesReader) GetSorted(fieldInfo *document.FieldInfo) (index.SortedDocValues, error)

func (*DocValuesReader) GetSortedNumeric

func (s *DocValuesReader) GetSortedNumeric(fieldInfo *document.FieldInfo) (index.SortedNumericDocValues, error)

func (*DocValuesReader) GetSortedSet

func (s *DocValuesReader) GetSortedSet(fieldInfo *document.FieldInfo) (index.SortedSetDocValues, error)

type DocValuesWriter

type DocValuesWriter struct {
	// contains filtered or unexported fields
}

func NewDocValuesWriter

func NewDocValuesWriter(state *index.SegmentWriteState, ext string) (*DocValuesWriter, error)

func (*DocValuesWriter) AddBinaryField

func (s *DocValuesWriter) AddBinaryField(ctx context.Context, field *document.FieldInfo, valuesProducer index.DocValuesProducer) error

func (*DocValuesWriter) AddNumericField

func (s *DocValuesWriter) AddNumericField(ctx context.Context, field *document.FieldInfo, valuesProducer index.DocValuesProducer) error

func (*DocValuesWriter) AddSortedField

func (s *DocValuesWriter) AddSortedField(ctx context.Context, field *document.FieldInfo, valuesProducer index.DocValuesProducer) error

func (*DocValuesWriter) AddSortedNumericField

func (s *DocValuesWriter) AddSortedNumericField(ctx context.Context, field *document.FieldInfo, valuesProducer index.DocValuesProducer) error

func (*DocValuesWriter) AddSortedSetField

func (s *DocValuesWriter) AddSortedSetField(ctx context.Context, field *document.FieldInfo, valuesProducer index.DocValuesProducer) error

func (*DocValuesWriter) Close

func (s *DocValuesWriter) Close() error

type DocsEnum

type DocsEnum struct {
	// contains filtered or unexported fields
}

func (*DocsEnum) Advance

func (s *DocsEnum) Advance(target int) (int, error)

func (*DocsEnum) AdvanceShallow

func (s *DocsEnum) AdvanceShallow(target int) error

func (*DocsEnum) CanReuse

func (s *DocsEnum) CanReuse(in store.IndexInput) bool

func (*DocsEnum) Cost

func (s *DocsEnum) Cost() int64

func (*DocsEnum) DocID

func (s *DocsEnum) DocID() int

func (*DocsEnum) EndOffset

func (s *DocsEnum) EndOffset() (int, error)

func (*DocsEnum) Freq

func (s *DocsEnum) Freq() (int, error)

func (*DocsEnum) GetImpacts

func (s *DocsEnum) GetImpacts() (index.Impacts, error)

func (*DocsEnum) GetPayload

func (s *DocsEnum) GetPayload() ([]byte, error)

func (*DocsEnum) NextDoc

func (s *DocsEnum) NextDoc() (int, error)

func (*DocsEnum) NextPosition

func (s *DocsEnum) NextPosition() (int, error)

func (*DocsEnum) SlowAdvance

func (s *DocsEnum) SlowAdvance(target int) (int, error)

func (*DocsEnum) StartOffset

func (s *DocsEnum) StartOffset() (int, error)

type FieldInfosFormat

type FieldInfosFormat struct {
}

FieldInfosFormat plaintext field infos format FOR RECREATIONAL USE ONLY lucene.experimental

func NewSimpleTextFieldInfosFormat

func NewSimpleTextFieldInfosFormat() *FieldInfosFormat

func (*FieldInfosFormat) Read

func (s *FieldInfosFormat) Read(directory store.Directory, segmentInfo *index.SegmentInfo, segmentSuffix string, ctx *store.IOContext) (*index.FieldInfos, error)

func (*FieldInfosFormat) Write

func (s *FieldInfosFormat) Write(directory store.Directory, segmentInfo *index.SegmentInfo,
	segmentSuffix string, infos *index.FieldInfos, context *store.IOContext) error

type FieldsReader

type FieldsReader struct {
	// contains filtered or unexported fields
}

func NewSimpleTextFieldsReader

func NewSimpleTextFieldsReader(state *index.SegmentReadState) (*FieldsReader, error)

func (*FieldsReader) CheckIntegrity

func (s *FieldsReader) CheckIntegrity() error

func (*FieldsReader) Close

func (s *FieldsReader) Close() error

func (*FieldsReader) GetMergeInstance

func (s *FieldsReader) GetMergeInstance() index.FieldsProducer

func (*FieldsReader) Names

func (s *FieldsReader) Names() []string

func (*FieldsReader) NewSimpleTextDocsEnum

func (s *FieldsReader) NewSimpleTextDocsEnum() *DocsEnum

func (*FieldsReader) Size

func (s *FieldsReader) Size() int

func (*FieldsReader) Terms

func (s *FieldsReader) Terms(field string) (index.Terms, error)

type IntersectState

type IntersectState struct {
	// contains filtered or unexported fields
}

IntersectState Used to track all state for a single call to intersect.

type LiveDocsFormat

type LiveDocsFormat struct {
}

LiveDocsFormat reads/writes plaintext live docs FOR RECREATIONAL USE ONLY lucene.experimental

func NewLiveDocsFormat

func NewLiveDocsFormat() *LiveDocsFormat

func (*LiveDocsFormat) Files

func (s *LiveDocsFormat) Files(info *index.SegmentCommitInfo, files map[string]struct{}) (map[string]struct{}, error)

func (*LiveDocsFormat) ReadLiveDocs

func (s *LiveDocsFormat) ReadLiveDocs(dir store.Directory, info *index.SegmentCommitInfo, context *store.IOContext) (util.Bits, error)

func (*LiveDocsFormat) WriteLiveDocs

func (s *LiveDocsFormat) WriteLiveDocs(bits util.Bits, dir store.Directory, info *index.SegmentCommitInfo, newDelCount int, context *store.IOContext) error

type NormsFormat

type NormsFormat struct {
}

NormsFormat plain-text norms format. FOR RECREATIONAL USE ONLY

func NewNormsFormat

func NewNormsFormat() *NormsFormat

func (*NormsFormat) NormsConsumer

func (s *NormsFormat) NormsConsumer(state *index.SegmentWriteState) (index.NormsConsumer, error)

func (*NormsFormat) NormsProducer

func (s *NormsFormat) NormsProducer(state *index.SegmentReadState) (index.NormsProducer, error)

type OneField

type OneField struct {
	// contains filtered or unexported fields
}

func NewOneField

func NewOneField() *OneField

type PointsFormat

type PointsFormat struct {
}

PointsFormat For debugging, curiosity, transparency only!! Do not use this codec in production. This codec stores all dimensional data in a single human-readable text file (_N.dim). You can view this in any text editor, and even edit it to alter your index. lucene.experimental

func NewPointsFormat

func NewPointsFormat() *PointsFormat

func (*PointsFormat) FieldsReader

func (s *PointsFormat) FieldsReader(state *index.SegmentReadState) (index.PointsReader, error)

func (*PointsFormat) FieldsWriter

func (s *PointsFormat) FieldsWriter(state *index.SegmentWriteState) (index.PointsWriter, error)

type PointsReader

type PointsReader struct {
	// contains filtered or unexported fields
}

func NewPointsReader

func NewPointsReader(readState *index.SegmentReadState) (*PointsReader, error)

func (*PointsReader) CheckIntegrity

func (s *PointsReader) CheckIntegrity() error

func (*PointsReader) Close

func (s *PointsReader) Close() error

func (*PointsReader) GetMergeInstance

func (s *PointsReader) GetMergeInstance() index.PointsReader

func (*PointsReader) GetValues

func (s *PointsReader) GetValues(field string) (types.PointValues, error)

type PointsWriter

type PointsWriter struct {
	*index.BasePointsWriter
	// contains filtered or unexported fields
}

func NewSimpleTextPointsWriter

func NewSimpleTextPointsWriter(writeState *index.SegmentWriteState) (*PointsWriter, error)

func (*PointsWriter) Close

func (s *PointsWriter) Close() error

func (*PointsWriter) Finish

func (s *PointsWriter) Finish() error

func (*PointsWriter) WriteField

func (s *PointsWriter) WriteField(ctx context.Context, fieldInfo *document.FieldInfo, reader index.PointsReader) error

type PostingsEnum

type PostingsEnum struct {
	// contains filtered or unexported fields
}

func (*PostingsEnum) Advance

func (s *PostingsEnum) Advance(target int) (int, error)

func (*PostingsEnum) AdvanceShallow

func (s *PostingsEnum) AdvanceShallow(target int) error

func (*PostingsEnum) Cost

func (s *PostingsEnum) Cost() int64

func (*PostingsEnum) DocID

func (s *PostingsEnum) DocID() int

func (*PostingsEnum) EndOffset

func (s *PostingsEnum) EndOffset() (int, error)

func (*PostingsEnum) Freq

func (s *PostingsEnum) Freq() (int, error)

func (*PostingsEnum) GetImpacts

func (s *PostingsEnum) GetImpacts() (index.Impacts, error)

func (*PostingsEnum) GetPayload

func (s *PostingsEnum) GetPayload() ([]byte, error)

func (*PostingsEnum) NextDoc

func (s *PostingsEnum) NextDoc() (int, error)

func (*PostingsEnum) NextPosition

func (s *PostingsEnum) NextPosition() (int, error)

func (*PostingsEnum) SlowAdvance

func (s *PostingsEnum) SlowAdvance(target int) (int, error)

func (*PostingsEnum) StartOffset

func (s *PostingsEnum) StartOffset() (int, error)

type PostingsFormat

type PostingsFormat struct {
	// contains filtered or unexported fields
}

PostingsFormat For debugging, curiosity, transparency only!! Do not use this codec in production. This codec stores all postings data in a single human-readable text file (_N.pst). You can view this in any text editor, and even edit it to alter your index. lucene.experimental

func NewPostingsFormat

func NewPostingsFormat() *PostingsFormat

func (*PostingsFormat) FieldsConsumer

func (s *PostingsFormat) FieldsConsumer(state *index.SegmentWriteState) (index.FieldsConsumer, error)

func (*PostingsFormat) FieldsProducer

func (s *PostingsFormat) FieldsProducer(state *index.SegmentReadState) (index.FieldsProducer, error)

func (*PostingsFormat) GetName

func (s *PostingsFormat) GetName() string

type SegmentInfoFormat

type SegmentInfoFormat struct {
}

func NewSegmentInfoFormat

func NewSegmentInfoFormat() *SegmentInfoFormat

func (*SegmentInfoFormat) Read

func (s *SegmentInfoFormat) Read(ctx context.Context, dir store.Directory,
	segmentName string, segmentID []byte, context *store.IOContext) (*index.SegmentInfo, error)

func (*SegmentInfoFormat) Write

func (s *SegmentInfoFormat) Write(ctx context.Context, dir store.Directory, si *index.SegmentInfo, ioContext *store.IOContext) error

type SimpleTVDocsEnum

type SimpleTVDocsEnum struct {
	// contains filtered or unexported fields
}

SimpleTVDocsEnum note: these two enum classes are exactly like the Default impl...

func NewSimpleTVDocsEnum

func NewSimpleTVDocsEnum() *SimpleTVDocsEnum

func (*SimpleTVDocsEnum) Advance

func (s *SimpleTVDocsEnum) Advance(target int) (int, error)

func (*SimpleTVDocsEnum) Cost

func (s *SimpleTVDocsEnum) Cost() int64

func (*SimpleTVDocsEnum) DocID

func (s *SimpleTVDocsEnum) DocID() int

func (*SimpleTVDocsEnum) EndOffset

func (s *SimpleTVDocsEnum) EndOffset() (int, error)

func (*SimpleTVDocsEnum) Freq

func (s *SimpleTVDocsEnum) Freq() (int, error)

func (*SimpleTVDocsEnum) GetPayload

func (s *SimpleTVDocsEnum) GetPayload() ([]byte, error)

func (*SimpleTVDocsEnum) NextDoc

func (s *SimpleTVDocsEnum) NextDoc() (int, error)

func (*SimpleTVDocsEnum) NextPosition

func (s *SimpleTVDocsEnum) NextPosition() (int, error)

func (*SimpleTVDocsEnum) Reset

func (s *SimpleTVDocsEnum) Reset(freq int)

func (*SimpleTVDocsEnum) SlowAdvance

func (s *SimpleTVDocsEnum) SlowAdvance(target int) (int, error)

func (*SimpleTVDocsEnum) StartOffset

func (s *SimpleTVDocsEnum) StartOffset() (int, error)

type SimpleTVFields

type SimpleTVFields struct {
	// contains filtered or unexported fields
}

func NewSimpleTVFields

func NewSimpleTVFields(fields *treemap.Map[string, index.Terms]) *SimpleTVFields

func (*SimpleTVFields) Names

func (s *SimpleTVFields) Names() []string

func (*SimpleTVFields) Size

func (s *SimpleTVFields) Size() int

func (*SimpleTVFields) Terms

func (s *SimpleTVFields) Terms(field string) (index.Terms, error)

type SimpleTVPostings

type SimpleTVPostings struct {
	// contains filtered or unexported fields
}

func NewSimpleTVPostings

func NewSimpleTVPostings() *SimpleTVPostings

type SimpleTVPostingsEnum

type SimpleTVPostingsEnum struct {
	// contains filtered or unexported fields
}

func NewSimpleTVPostingsEnum

func NewSimpleTVPostingsEnum() *SimpleTVPostingsEnum

func (*SimpleTVPostingsEnum) Advance

func (s *SimpleTVPostingsEnum) Advance(target int) (int, error)

func (*SimpleTVPostingsEnum) Cost

func (s *SimpleTVPostingsEnum) Cost() int64

func (*SimpleTVPostingsEnum) DocID

func (s *SimpleTVPostingsEnum) DocID() int

func (*SimpleTVPostingsEnum) EndOffset

func (s *SimpleTVPostingsEnum) EndOffset() (int, error)

func (*SimpleTVPostingsEnum) Freq

func (s *SimpleTVPostingsEnum) Freq() (int, error)

func (*SimpleTVPostingsEnum) GetPayload

func (s *SimpleTVPostingsEnum) GetPayload() ([]byte, error)

func (*SimpleTVPostingsEnum) NextDoc

func (s *SimpleTVPostingsEnum) NextDoc() (int, error)

func (*SimpleTVPostingsEnum) NextPosition

func (s *SimpleTVPostingsEnum) NextPosition() (int, error)

func (*SimpleTVPostingsEnum) Reset

func (s *SimpleTVPostingsEnum) Reset(positions, startOffsets, endOffsets []int, payloads [][]byte)

func (*SimpleTVPostingsEnum) SlowAdvance

func (s *SimpleTVPostingsEnum) SlowAdvance(target int) (int, error)

func (*SimpleTVPostingsEnum) StartOffset

func (s *SimpleTVPostingsEnum) StartOffset() (int, error)

type SimpleTVTerms

type SimpleTVTerms struct {
	*index.TermsBase
	// contains filtered or unexported fields
}

func NewSimpleTVTerms

func NewSimpleTVTerms(hasOffsets, hasPositions, hasPayloads bool) *SimpleTVTerms

func (*SimpleTVTerms) GetDocCount

func (s *SimpleTVTerms) GetDocCount() (int, error)

func (*SimpleTVTerms) GetSumDocFreq

func (s *SimpleTVTerms) GetSumDocFreq() (int64, error)

func (*SimpleTVTerms) GetSumTotalTermFreq

func (s *SimpleTVTerms) GetSumTotalTermFreq() (int64, error)

func (*SimpleTVTerms) HasFreqs

func (s *SimpleTVTerms) HasFreqs() bool

func (*SimpleTVTerms) HasOffsets

func (s *SimpleTVTerms) HasOffsets() bool

func (*SimpleTVTerms) HasPayloads

func (s *SimpleTVTerms) HasPayloads() bool

func (*SimpleTVTerms) HasPositions

func (s *SimpleTVTerms) HasPositions() bool

func (*SimpleTVTerms) Iterator

func (s *SimpleTVTerms) Iterator() (index.TermsEnum, error)

func (*SimpleTVTerms) Size

func (s *SimpleTVTerms) Size() (int, error)

type SimpleTVTermsEnum

type SimpleTVTermsEnum struct {
	*index.BaseTermsEnum
	// contains filtered or unexported fields
}

func NewSimpleTVTermsEnum

func NewSimpleTVTermsEnum(terms *treemap.Map[[]byte, *SimpleTVPostings]) *SimpleTVTermsEnum

func (*SimpleTVTermsEnum) DocFreq

func (s *SimpleTVTermsEnum) DocFreq() (int, error)

func (*SimpleTVTermsEnum) Impacts

func (s *SimpleTVTermsEnum) Impacts(flags int) (index.ImpactsEnum, error)

func (*SimpleTVTermsEnum) Next

func (s *SimpleTVTermsEnum) Next(context.Context) ([]byte, error)

func (*SimpleTVTermsEnum) Ord

func (s *SimpleTVTermsEnum) Ord() (int64, error)

func (*SimpleTVTermsEnum) Postings

func (s *SimpleTVTermsEnum) Postings(reuse index.PostingsEnum, flags int) (index.PostingsEnum, error)

func (*SimpleTVTermsEnum) SeekCeil

func (s *SimpleTVTermsEnum) SeekCeil(ctx context.Context, text []byte) (index.SeekStatus, error)

func (*SimpleTVTermsEnum) SeekExactByOrd

func (s *SimpleTVTermsEnum) SeekExactByOrd(ctx context.Context, ord int64) error

func (*SimpleTVTermsEnum) Term

func (s *SimpleTVTermsEnum) Term() ([]byte, error)

func (*SimpleTVTermsEnum) TotalTermFreq

func (s *SimpleTVTermsEnum) TotalTermFreq() (int64, error)

type SimpleTextBits

type SimpleTextBits struct {
	// contains filtered or unexported fields
}

func NewSimpleTextBits

func NewSimpleTextBits(bits *bitset.BitSet, size int) *SimpleTextBits

func (*SimpleTextBits) Len

func (s *SimpleTextBits) Len() uint

func (*SimpleTextBits) Test

func (s *SimpleTextBits) Test(index uint) bool

type SimpleTextNormsConsumer

type SimpleTextNormsConsumer struct {
	*index.NormsConsumerDefault
	// contains filtered or unexported fields
}

SimpleTextNormsConsumer Writes plain-text norms. FOR RECREATIONAL USE ONLY

func NewSimpleTextNormsConsumer

func NewSimpleTextNormsConsumer(state *index.SegmentWriteState) (*SimpleTextNormsConsumer, error)

func (*SimpleTextNormsConsumer) AddNormsField

func (s *SimpleTextNormsConsumer) AddNormsField(field *document.FieldInfo, normsProducer index.NormsProducer) error

func (*SimpleTextNormsConsumer) Close

func (s *SimpleTextNormsConsumer) Close() error

type SimpleTextNormsProducer

type SimpleTextNormsProducer struct {
	// contains filtered or unexported fields
}

func NewSimpleTextNormsProducer

func NewSimpleTextNormsProducer(state *index.SegmentReadState) (*SimpleTextNormsProducer, error)

func (*SimpleTextNormsProducer) CheckIntegrity

func (s *SimpleTextNormsProducer) CheckIntegrity() error

func (*SimpleTextNormsProducer) Close

func (s *SimpleTextNormsProducer) Close() error

func (*SimpleTextNormsProducer) GetMergeInstance

func (s *SimpleTextNormsProducer) GetMergeInstance() index.NormsProducer

func (*SimpleTextNormsProducer) GetNorms

type SkipReader

type SkipReader struct {
	*index.MultiLevelSkipListReaderDefault
	// contains filtered or unexported fields
}

SkipReader This class reads skip lists with multiple levels. See TextFieldsWriter for the information about the encoding of the multi level skip lists.

func NewSkipReader

func NewSkipReader(skipStream store.IndexInput) *SkipReader

func (*SkipReader) GetImpacts

func (s *SkipReader) GetImpacts() index.Impacts

func (*SkipReader) ReadSkipData

func (s *SkipReader) ReadSkipData(level int, skipStream store.IndexInput) (int, error)

func (*SkipReader) Reset

func (s *SkipReader) Reset(skipPointer int64, docFreq int)

type SkipWriter

type SkipWriter struct {
	*index.MultiLevelSkipListWriterDefault
	// contains filtered or unexported fields
}

func NewSkipWriter

func NewSkipWriter(writeState *index.SegmentWriteState) (*SkipWriter, error)

func (*SkipWriter) ResetSkip

func (s *SkipWriter) ResetSkip()

func (*SkipWriter) WriteChildPointer

func (s *SkipWriter) WriteChildPointer(childPointer int64, skipBuffer store.DataOutput) error

func (*SkipWriter) WriteLevelLength

func (s *SkipWriter) WriteLevelLength(levelLength int64, output store.IndexOutput) error

func (*SkipWriter) WriteSkip

func (s *SkipWriter) WriteSkip(output store.IndexOutput) (int64, error)

func (*SkipWriter) WriteSkipData

func (s *SkipWriter) WriteSkipData(level int, skipBuffer store.IndexOutput) error

type StoredFieldsFormat

type StoredFieldsFormat struct {
}

func NewStoredFieldsFormat

func NewStoredFieldsFormat() *StoredFieldsFormat

func (*StoredFieldsFormat) FieldsReader

func (s *StoredFieldsFormat) FieldsReader(directory store.Directory, si *index.SegmentInfo,
	fn *index.FieldInfos, context *store.IOContext) (index.StoredFieldsReader, error)

func (*StoredFieldsFormat) FieldsWriter

func (s *StoredFieldsFormat) FieldsWriter(directory store.Directory,
	si *index.SegmentInfo, context *store.IOContext) (index.StoredFieldsWriter, error)

type StoredFieldsReader

type StoredFieldsReader struct {
	// contains filtered or unexported fields
}

StoredFieldsReader reads plaintext stored fields FOR RECREATIONAL USE ONLY lucene.experimental

func NewStoredFieldsReader

func NewStoredFieldsReader(
	directory store.Directory, si *index.SegmentInfo,
	fn *index.FieldInfos, context *store.IOContext) (*StoredFieldsReader, error)

func (*StoredFieldsReader) CheckIntegrity

func (s *StoredFieldsReader) CheckIntegrity() error

func (*StoredFieldsReader) Clone

func (*StoredFieldsReader) Close

func (s *StoredFieldsReader) Close() error

func (*StoredFieldsReader) GetMergeInstance

func (s *StoredFieldsReader) GetMergeInstance() index.StoredFieldsReader

func (*StoredFieldsReader) VisitDocument

func (s *StoredFieldsReader) VisitDocument(docID int, visitor document.StoredFieldVisitor) error

type StoredFieldsWriter

type StoredFieldsWriter struct {
	// contains filtered or unexported fields
}

func NewStoredFieldsWriter

func NewStoredFieldsWriter(dir store.Directory,
	segment string, context *store.IOContext) (*StoredFieldsWriter, error)

func (*StoredFieldsWriter) Close

func (s *StoredFieldsWriter) Close() error

func (*StoredFieldsWriter) Finish

func (s *StoredFieldsWriter) Finish(fis *index.FieldInfos, numDocs int) error

func (*StoredFieldsWriter) FinishDocument

func (s *StoredFieldsWriter) FinishDocument() error

func (*StoredFieldsWriter) StartDocument

func (s *StoredFieldsWriter) StartDocument() error

func (*StoredFieldsWriter) WriteField

func (s *StoredFieldsWriter) WriteField(info *document.FieldInfo, field document.IndexableField) error

type TermVectorsFormat

type TermVectorsFormat struct {
}

func NewTermVectorsFormat

func NewTermVectorsFormat() *TermVectorsFormat

func (*TermVectorsFormat) VectorsReader

func (s *TermVectorsFormat) VectorsReader(dir store.Directory, segmentInfo *index.SegmentInfo,
	fieldInfos *index.FieldInfos, context *store.IOContext) (index.TermVectorsReader, error)

func (*TermVectorsFormat) VectorsWriter

func (s *TermVectorsFormat) VectorsWriter(dir store.Directory,
	segmentInfo *index.SegmentInfo, context *store.IOContext) (index.TermVectorsWriter, error)

type TermVectorsReader

type TermVectorsReader struct {
	// contains filtered or unexported fields
}

TermVectorsReader Reads plain-text term vectors. FOR RECREATIONAL USE ONLY lucene.experimental

func NewTermVectorsReader

func NewTermVectorsReader(directory store.Directory, si *index.SegmentInfo,
	context *store.IOContext) (*TermVectorsReader, error)

func (*TermVectorsReader) CheckIntegrity

func (s *TermVectorsReader) CheckIntegrity() error

func (*TermVectorsReader) Clone

func (*TermVectorsReader) Close

func (s *TermVectorsReader) Close() error

func (*TermVectorsReader) Get

func (s *TermVectorsReader) Get(doc int) (index.Fields, error)

func (*TermVectorsReader) GetMergeInstance

func (s *TermVectorsReader) GetMergeInstance() index.TermVectorsReader

type TermVectorsWriter

type TermVectorsWriter struct {
	// contains filtered or unexported fields
}

TermVectorsWriter Writes plain-text term vectors. FOR RECREATIONAL USE ONLY lucene.experimental

func NewTermVectorsWriter

func NewTermVectorsWriter(dir store.Directory,
	segment string, context *store.IOContext) (*TermVectorsWriter, error)

func (*TermVectorsWriter) AddPosition

func (s *TermVectorsWriter) AddPosition(position, startOffset, endOffset int, payload []byte) error

func (*TermVectorsWriter) Close

func (s *TermVectorsWriter) Close() error

func (*TermVectorsWriter) Finish

func (s *TermVectorsWriter) Finish(fis *index.FieldInfos, numDocs int) error

func (*TermVectorsWriter) FinishDocument

func (s *TermVectorsWriter) FinishDocument() error

func (*TermVectorsWriter) FinishField

func (s *TermVectorsWriter) FinishField() error

func (*TermVectorsWriter) FinishTerm

func (s *TermVectorsWriter) FinishTerm() error

func (*TermVectorsWriter) StartDocument

func (s *TermVectorsWriter) StartDocument(numVectorFields int) error

func (*TermVectorsWriter) StartField

func (s *TermVectorsWriter) StartField(info *document.FieldInfo, numTerms int, positions, offsets, payloads bool) error

func (*TermVectorsWriter) StartTerm

func (s *TermVectorsWriter) StartTerm(term []byte, freq int) error

type TextFieldsWriter

type TextFieldsWriter struct {
	*index.FieldsConsumerDefault // TODO: fix it
	// contains filtered or unexported fields
}

func NewFieldsWriter

func NewFieldsWriter(writeState *index.SegmentWriteState) (*TextFieldsWriter, error)

func (*TextFieldsWriter) Close

func (s *TextFieldsWriter) Close() error

func (*TextFieldsWriter) Write

func (s *TextFieldsWriter) Write(fields index.Fields, norms index.NormsProducer) error

func (*TextFieldsWriter) WriteV1

func (s *TextFieldsWriter) WriteV1(fieldInfos *index.FieldInfos, fields index.Fields,
	normsProducer index.NormsProducer) error

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL