bam

package
v1.4.5 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Mar 28, 2024 License: BSD-3-Clause Imports: 12 Imported by: 61

Documentation

Overview

Package bam implements BAM file format reading, writing and indexing. The BAM format is described in the SAM specification.

http://samtools.github.io/hts-specs/SAMv1.pdf

Index

Examples

Constants

View Source
const (
	None                  = iota // Omit no field data from the record.
	AuxTags                      // Omit auxiliary tag data.
	AllVariableLengthData        // Omit sequence, quality and auxiliary data.
)

None, AuxTags and AllVariableLengthData are values taken by the Reader Omit method.

Variables

This section is empty.

Functions

func WriteIndex

func WriteIndex(w io.Writer, idx *Index) error

WriteIndex writes the Index to the given io.Writer.

Types

type Index

type Index struct {

	// MergeStrategy is used to determine the
	// the merge strategy used to prepare the
	// slice of chunks returned by Chunks.
	// If MergeStrategy is nil, index.MergeStrategy
	// is used.
	MergeStrategy index.MergeStrategy
	// contains filtered or unexported fields
}

Index is a BAI index.

func ReadIndex

func ReadIndex(r io.Reader) (*Index, error)

ReadIndex reads the BAI Index from the given io.Reader.

func (*Index) Add

func (i *Index) Add(r *sam.Record, c bgzf.Chunk) error

Add records the SAM record as having being located at the given chunk.

Example
package main

import (
	"io"
	"log"
	"os"

	"github.com/biogo/hts/bam"
)

func main() {
	// Create a BAI for the BAM read from standard in and write it to standard out.
	br, err := bam.NewReader(os.Stdin, 1)
	if err != nil {
		log.Fatalf("failed to open BAM: %v", err)
	}

	var bai bam.Index
	for {
		r, err := br.Read()
		if err == io.EOF {
			break
		}
		if err != nil {
			log.Fatalf("failed to read BAM record: %v", err)
		}
		err = bai.Add(r, br.LastChunk())
		if err != nil {
			log.Fatalf("failed to add record to BAM index: %v", err)
		}
	}

	err = bam.WriteIndex(os.Stdout, &bai)
	if err != nil {
		log.Fatalf("failed to write BAM index: %v", err)
	}
}
Output:

func (*Index) Chunks

func (i *Index) Chunks(r *sam.Reference, beg, end int) ([]bgzf.Chunk, error)

Chunks returns a []bgzf.Chunk that corresponds to the given genomic interval.

func (*Index) MergeChunks

func (i *Index) MergeChunks(s index.MergeStrategy)

MergeChunks applies the given MergeStrategy to all bins in the Index.

func (*Index) NumRefs

func (i *Index) NumRefs() int

NumRefs returns the number of references in the index.

func (*Index) ReferenceStats

func (i *Index) ReferenceStats(id int) (stats index.ReferenceStats, ok bool)

ReferenceStats returns the index statistics for the given reference and true if the statistics are valid.

func (*Index) Unmapped

func (i *Index) Unmapped() (n uint64, ok bool)

Unmapped returns the number of unmapped reads and true if the count is valid.

type Iterator

type Iterator struct {
	// contains filtered or unexported fields
}

Iterator wraps a Reader to provide a convenient loop interface for reading BAM data. Successive calls to the Next method will step through the features of the provided Reader. Iteration stops unrecoverably at EOF or the first error.

func NewIterator

func NewIterator(r *Reader, chunks []bgzf.Chunk) (*Iterator, error)

NewIterator returns a Iterator to read from r, limiting the reads to the provided chunks.

chunks, err := idx.Chunks(ref, beg, end)
if err != nil {
	return err
}
i, err := NewIterator(r, chunks)
if err != nil {
	return err
}
for i.Next() {
	fn(i.Record())
}
return i.Close()

func (*Iterator) Close

func (i *Iterator) Close() error

Close releases the underlying Reader.

func (*Iterator) Error

func (i *Iterator) Error() error

Error returns the first non-EOF error that was encountered by the Iterator.

func (*Iterator) Next

func (i *Iterator) Next() bool

Next advances the Iterator past the next record, which will then be available through the Record method. It returns false when the iteration stops, either by reaching the end of the input or an error. After Next returns false, the Error method will return any error that occurred during iteration, except that if it was io.EOF, Error will return nil.

func (*Iterator) Record

func (i *Iterator) Record() *sam.Record

Record returns the most recent record read by a call to Next.

type Merger added in v1.1.0

type Merger struct {
	// contains filtered or unexported fields
}

Merger implements merging BAM data with a defined sort order. It can be used for sorting, concatenating and deduplicating BAM data.

Example (SortByCoordinate)
package main

import (
	"fmt"
	"io"
	"log"
	"os"
	"sort"

	"github.com/biogo/hts/bam"
	"github.com/biogo/hts/sam"
)

func main() {
	// Inputs.
	var (
		// Input source of BAM data.
		r io.Reader

		// Operation to perform on each record of
		// sorted stream.
		fn func(*sam.Record)
	)

	// Specify sort chunk size.
	const chunk = 1e5

	// Open source.
	br, err := bam.NewReader(r, 0)
	if err != nil {
		log.Fatalf("failed to open bam reader: %v", err)
	}
	defer br.Close()

	// Make header with coordinate sort order.
	h := br.Header().Clone()
	h.SortOrder = sam.Coordinate

	// Create file system workspace and prepare
	// for clean up.
	dir, err := os.MkdirTemp("", "")
	if err != nil {
		log.Fatalf("failed to create temp directory: %v", err)
	}
	defer func() {
		os.RemoveAll(dir)
		r := recover()
		if r != nil {
			log.Fatal(r)
		}
	}()

	// Limit number of records for each sort chunk.
	recs := make([]*sam.Record, 0, chunk)

	// Keep the collection of shards for merging.
	var t []*bam.Reader

	it := sam.NewIterator(br)
	for {
		var n int
		for it.Next() {
			recs = append(recs, it.Record())
			if len(recs) == cap(recs) {
				r, err := writeChunk(dir, h, recs)
				if err != nil {
					log.Panic(err)
				}
				t = append(t, r)
				n, recs = len(recs), recs[:0]
			}
		}
		if len(recs) != 0 {
			r, err := writeChunk(dir, h, recs)
			if err != nil {
				log.Panic(err)
			}
			t = append(t, r)
			break
		}
		err = it.Error()
		if n == 0 || err != nil {
			break
		}
	}
	if err != nil {
		log.Panicf("error during bam reading: %v", err)
	}

	// Create merge using the coordinate sort order.
	m, err := bam.NewMerger(nil, t...)
	if err != nil {
		log.Panicf("failed to created merger: %v", err)
	}
	sorted := sam.NewIterator(m)
	for sorted.Next() {
		// Operate on coordinate sorted stream.
		fn(sorted.Record())
	}
	// Close the underlying Readers.
	for i, r := range t {
		err = r.Close()
		if err != nil {
			log.Printf("failed to close reader %d: %v", i, err)
		}
	}
	err = sorted.Error()
	if err != nil {
		log.Panicf("error during bam reading: %v", err)
	}
}

// writeChunk writes out the records in recs to the given directory
// after sorting them.
func writeChunk(dir string, h *sam.Header, recs []*sam.Record) (*bam.Reader, error) {
	sort.Sort(byCoordinate(recs))

	f, err := os.CreateTemp(dir, "")
	if err != nil {
		return nil, fmt.Errorf("failed to create temp file in %q: %w", dir, err)
	}

	bw, err := bam.NewWriter(f, h, 0)
	if err != nil {
		return nil, fmt.Errorf("failed to open bam writer: %w", err)
	}
	for _, r := range recs {
		err = bw.Write(r)
		if err != nil {
			return nil, fmt.Errorf("failed to write record: %w", err)
		}
	}
	err = bw.Close()
	if err != nil {
		return nil, fmt.Errorf("failed to close bam writer: %w", err)
	}
	err = f.Sync()
	if err != nil {
		return nil, fmt.Errorf("failed to sync file: %w", err)
	}

	// Make a reader of the written data.
	_, err = f.Seek(0, io.SeekStart)
	if err != nil {
		return nil, fmt.Errorf("failed to seek to start: %w", err)
	}
	r, err := bam.NewReader(f, 0)
	if err != nil {
		return nil, fmt.Errorf("failed to open bam writer: %w", err)
	}
	return r, err
}

// byCoordinate implements the coordinate sort order.
type byCoordinate []*sam.Record

func (r byCoordinate) Len() int           { return len(r) }
func (r byCoordinate) Less(i, j int) bool { return r[i].LessByCoordinate(r[j]) }
func (r byCoordinate) Swap(i, j int)      { r[i], r[j] = r[j], r[i] }
Output:

func NewMerger added in v1.1.0

func NewMerger(less func(a, b *sam.Record) bool, src ...*Reader) (*Merger, error)

NewMerger returns a Merger that reads from the source Readers.

The sort order of the stream merge is defined by the sort order field of the src Reader headers and the provided less function. The header sort order fields must agree.

Sort order is determined using the following rules:

  • for sam.QueryName the LessByName sam.Record method is used.
  • for sam.Coordinate the LessByCoordinate sam.Record method is used.
  • for sam.Unsorted the reader streams are concatenated.
  • for sam.Unknown the provided less function is used - if nil this is the same as sam.Unsorted.

For all sort orders other than sam.Unknown, the less parameter is ignored. The src Readers should be closed individually after use to avoid leaking resources.

func (*Merger) Header added in v1.1.0

func (m *Merger) Header() *sam.Header

Header returns the SAM Header held by the Reader. This Header is constructed using the sam.MergeHeaders function.

func (*Merger) Read added in v1.1.0

func (m *Merger) Read() (rec *sam.Record, err error)

Read returns the next sam.Record in the BAM stream.

The Read behaviour will depend on the underlying Readers.

type Reader

type Reader struct {
	// contains filtered or unexported fields
}

Reader implements BAM data reading.

func NewReader

func NewReader(r io.Reader, rd int) (*Reader, error)

NewReader returns a new Reader using the given io.Reader and setting the read concurrency to rd. If rd is zero concurrency is set to GOMAXPROCS. The returned Reader should be closed after use to avoid leaking resources.

func (*Reader) Close

func (br *Reader) Close() error

Close closes the Reader.

func (*Reader) Header

func (br *Reader) Header() *sam.Header

Header returns the SAM Header held by the Reader.

func (*Reader) LastChunk

func (br *Reader) LastChunk() bgzf.Chunk

LastChunk returns the bgzf.Chunk corresponding to the last Read operation. The bgzf.Chunk returned is only valid if the last Read operation returned a nil error.

func (*Reader) Omit

func (br *Reader) Omit(o int)

Omit specifies what portions of the Record to omit reading. When o is None, a full sam.Record is returned by Read, when o is AuxTags the auxiliary tag data is omitted and when o is AllVariableLengthData, sequence, quality and auxiliary data is omitted.

func (*Reader) Read

func (br *Reader) Read() (*sam.Record, error)

Read returns the next sam.Record in the BAM stream.

The sam.Record returned will not contain the sequence, quality or auxiliary tag data if Omit(AllVariableLengthData) has been called prior to the Read call and will not contain the auxiliary tag data if Omit(AuxTags) has been called.

func (*Reader) Seek

func (br *Reader) Seek(off bgzf.Offset) error

Seek performs a seek to the specified bgzf.Offset.

func (*Reader) SetCache

func (bg *Reader) SetCache(c bgzf.Cache)

SetCache sets the cache to be used by the Reader.

func (*Reader) SetChunk

func (br *Reader) SetChunk(c *bgzf.Chunk) error

SetChunk sets a limited range of the underlying BGZF file to read, after seeking to the start of the given chunk. It may be used to iterate over a defined genomic interval.

type Writer

type Writer struct {
	// contains filtered or unexported fields
}

Writer implements BAM data writing.

func NewWriter

func NewWriter(w io.Writer, h *sam.Header, wc int) (*Writer, error)

NewWriter returns a new Writer using the given SAM header. Write concurrency is set to wc.

func NewWriterLevel

func NewWriterLevel(w io.Writer, h *sam.Header, level, wc int) (*Writer, error)

NewWriterLevel returns a new Writer using the given SAM header. Write concurrency is set to wc and compression level is set to level. Valid values for level are described in the compress/gzip documentation.

func (*Writer) Close

func (bw *Writer) Close() error

Close closes the writer.

func (*Writer) Write

func (bw *Writer) Write(r *sam.Record) error

Write writes r to the BAM stream.

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL