|
- package diskqueue
-
- import (
- "bufio"
- "bytes"
- "encoding/binary"
- "errors"
- "fmt"
- "io"
- "math/rand"
- "os"
- "path"
- "sync"
- "time"
- )
-
- // logging stuff copied from github.com/nsqio/nsq/internal/lg
-
- type LogLevel int
-
- const (
- DEBUG = LogLevel(1)
- INFO = LogLevel(2)
- WARN = LogLevel(3)
- ERROR = LogLevel(4)
- FATAL = LogLevel(5)
- )
-
- type AppLogFunc func(lvl LogLevel, f string, args ...interface{})
-
- func (l LogLevel) String() string {
- switch l {
- case 1:
- return "DEBUG"
- case 2:
- return "INFO"
- case 3:
- return "WARNING"
- case 4:
- return "ERROR"
- case 5:
- return "FATAL"
- }
- panic("invalid LogLevel")
- }
-
- type Interface interface {
- Put([]byte) error
- ReadChan() <-chan []byte // this is expected to be an *unbuffered* channel
- Close() error
- Delete() error
- Depth() int64
- Empty() error
- }
-
- // diskQueue implements a filesystem backed FIFO queue
- type diskQueue struct {
- // 64bit atomic vars need to be first for proper alignment on 32bit platforms
-
- // run-time state (also persisted to disk)
- readPos int64
- writePos int64
- readFileNum int64
- writeFileNum int64
- depth int64
-
- sync.RWMutex
-
- // instantiation time metadata
- name string
- dataPath string
- maxBytesPerFile int64 // cannot change once created
- maxBytesPerFileRead int64
- minMsgSize int32
- maxMsgSize int32
- syncEvery int64 // number of writes per fsync
- syncTimeout time.Duration // duration of time per fsync
- exitFlag int32
- needSync bool
-
- // keeps track of the position where we have read
- // (but not yet sent over readChan)
- nextReadPos int64
- nextReadFileNum int64
-
- readFile *os.File
- writeFile *os.File
- reader *bufio.Reader
- writeBuf bytes.Buffer
-
- // exposed via ReadChan()
- readChan chan []byte
-
- // internal channels
- depthChan chan int64
- writeChan chan []byte
- writeResponseChan chan error
- emptyChan chan int
- emptyResponseChan chan error
- exitChan chan int
- exitSyncChan chan int
-
- logf AppLogFunc
- }
-
- // New instantiates an instance of diskQueue, retrieving metadata
- // from the filesystem and starting the read ahead goroutine
- func New(name string, dataPath string, maxBytesPerFile int64,
- minMsgSize int32, maxMsgSize int32,
- syncEvery int64, syncTimeout time.Duration, logf AppLogFunc) Interface {
- d := diskQueue{
- name: name,
- dataPath: dataPath,
- maxBytesPerFile: maxBytesPerFile,
- minMsgSize: minMsgSize,
- maxMsgSize: maxMsgSize,
- readChan: make(chan []byte),
- depthChan: make(chan int64),
- writeChan: make(chan []byte),
- writeResponseChan: make(chan error),
- emptyChan: make(chan int),
- emptyResponseChan: make(chan error),
- exitChan: make(chan int),
- exitSyncChan: make(chan int),
- syncEvery: syncEvery,
- syncTimeout: syncTimeout,
- logf: logf,
- }
-
- // no need to lock here, nothing else could possibly be touching this instance
- err := d.retrieveMetaData()
- if err != nil && !os.IsNotExist(err) {
- d.logf(ERROR, "DISKQUEUE(%s) failed to retrieveMetaData - %s", d.name, err)
- }
-
- go d.ioLoop()
- return &d
- }
-
- // Depth returns the depth of the queue
- func (d *diskQueue) Depth() int64 {
- depth, ok := <-d.depthChan
- if !ok {
- // ioLoop exited
- depth = d.depth
- }
- return depth
- }
-
- // ReadChan returns the receive-only []byte channel for reading data
- func (d *diskQueue) ReadChan() <-chan []byte {
- return d.readChan
- }
-
- // Put writes a []byte to the queue
- func (d *diskQueue) Put(data []byte) error {
- d.RLock()
- defer d.RUnlock()
-
- if d.exitFlag == 1 {
- return errors.New("exiting")
- }
-
- d.writeChan <- data
- return <-d.writeResponseChan
- }
-
- // Close cleans up the queue and persists metadata
- func (d *diskQueue) Close() error {
- err := d.exit(false)
- if err != nil {
- return err
- }
- return d.sync()
- }
-
- func (d *diskQueue) Delete() error {
- return d.exit(true)
- }
-
- func (d *diskQueue) exit(deleted bool) error {
- d.Lock()
- defer d.Unlock()
-
- d.exitFlag = 1
-
- if deleted {
- d.logf(INFO, "DISKQUEUE(%s): deleting", d.name)
- } else {
- d.logf(INFO, "DISKQUEUE(%s): closing", d.name)
- }
-
- close(d.exitChan)
- // ensure that ioLoop has exited
- <-d.exitSyncChan
-
- close(d.depthChan)
-
- if d.readFile != nil {
- d.readFile.Close()
- d.readFile = nil
- }
-
- if d.writeFile != nil {
- d.writeFile.Close()
- d.writeFile = nil
- }
-
- return nil
- }
-
- // Empty destructively clears out any pending data in the queue
- // by fast forwarding read positions and removing intermediate files
- func (d *diskQueue) Empty() error {
- d.RLock()
- defer d.RUnlock()
-
- if d.exitFlag == 1 {
- return errors.New("exiting")
- }
-
- d.logf(INFO, "DISKQUEUE(%s): emptying", d.name)
-
- d.emptyChan <- 1
- return <-d.emptyResponseChan
- }
-
- func (d *diskQueue) deleteAllFiles() error {
- err := d.skipToNextRWFile()
-
- innerErr := os.Remove(d.metaDataFileName())
- if innerErr != nil && !os.IsNotExist(innerErr) {
- d.logf(ERROR, "DISKQUEUE(%s) failed to remove metadata file - %s", d.name, innerErr)
- return innerErr
- }
-
- return err
- }
-
- func (d *diskQueue) skipToNextRWFile() error {
- var err error
-
- if d.readFile != nil {
- d.readFile.Close()
- d.readFile = nil
- }
-
- if d.writeFile != nil {
- d.writeFile.Close()
- d.writeFile = nil
- }
-
- for i := d.readFileNum; i <= d.writeFileNum; i++ {
- fn := d.fileName(i)
- innerErr := os.Remove(fn)
- if innerErr != nil && !os.IsNotExist(innerErr) {
- d.logf(ERROR, "DISKQUEUE(%s) failed to remove data file - %s", d.name, innerErr)
- err = innerErr
- }
- }
-
- d.writeFileNum++
- d.writePos = 0
- d.readFileNum = d.writeFileNum
- d.readPos = 0
- d.nextReadFileNum = d.writeFileNum
- d.nextReadPos = 0
- d.depth = 0
-
- return err
- }
-
- // readOne performs a low level filesystem read for a single []byte
- // while advancing read positions and rolling files, if necessary
- func (d *diskQueue) readOne() ([]byte, error) {
- var err error
- var msgSize int32
-
- if d.readFile == nil {
- curFileName := d.fileName(d.readFileNum)
- d.readFile, err = os.OpenFile(curFileName, os.O_RDONLY, 0600)
- if err != nil {
- return nil, err
- }
-
- d.logf(INFO, "DISKQUEUE(%s): readOne() opened %s", d.name, curFileName)
-
- if d.readPos > 0 {
- _, err = d.readFile.Seek(d.readPos, 0)
- if err != nil {
- d.readFile.Close()
- d.readFile = nil
- return nil, err
- }
- }
-
- // for "complete" files (i.e. not the "current" file), maxBytesPerFileRead
- // should be initialized to the file's size, or default to maxBytesPerFile
- d.maxBytesPerFileRead = d.maxBytesPerFile
- if d.readFileNum < d.writeFileNum {
- stat, err := d.readFile.Stat()
- if err == nil {
- d.maxBytesPerFileRead = stat.Size()
- }
- }
-
- d.reader = bufio.NewReader(d.readFile)
- }
-
- err = binary.Read(d.reader, binary.BigEndian, &msgSize)
- if err != nil {
- d.readFile.Close()
- d.readFile = nil
- return nil, err
- }
-
- if msgSize < d.minMsgSize || msgSize > d.maxMsgSize {
- // this file is corrupt and we have no reasonable guarantee on
- // where a new message should begin
- d.readFile.Close()
- d.readFile = nil
- return nil, fmt.Errorf("invalid message read size (%d)", msgSize)
- }
-
- readBuf := make([]byte, msgSize)
- _, err = io.ReadFull(d.reader, readBuf)
- if err != nil {
- d.readFile.Close()
- d.readFile = nil
- return nil, err
- }
-
- totalBytes := int64(4 + msgSize)
-
- // we only advance next* because we have not yet sent this to consumers
- // (where readFileNum, readPos will actually be advanced)
- d.nextReadPos = d.readPos + totalBytes
- d.nextReadFileNum = d.readFileNum
-
- // we only consider rotating if we're reading a "complete" file
- // and since we cannot know the size at which it was rotated, we
- // rely on maxBytesPerFileRead rather than maxBytesPerFile
- if d.readFileNum < d.writeFileNum && d.nextReadPos >= d.maxBytesPerFileRead {
- if d.readFile != nil {
- d.readFile.Close()
- d.readFile = nil
- }
-
- d.nextReadFileNum++
- d.nextReadPos = 0
- }
-
- return readBuf, nil
- }
-
- // writeOne performs a low level filesystem write for a single []byte
- // while advancing write positions and rolling files, if necessary
- func (d *diskQueue) writeOne(data []byte) error {
- var err error
-
- dataLen := int32(len(data))
- totalBytes := int64(4 + dataLen)
-
- if dataLen < d.minMsgSize || dataLen > d.maxMsgSize {
- return fmt.Errorf("invalid message write size (%d) minMsgSize=%d maxMsgSize=%d", dataLen, d.minMsgSize, d.maxMsgSize)
- }
-
- // will not wrap-around if maxBytesPerFile + maxMsgSize < Int64Max
- if d.writePos > 0 && d.writePos+totalBytes > d.maxBytesPerFile {
- if d.readFileNum == d.writeFileNum {
- d.maxBytesPerFileRead = d.writePos
- }
-
- d.writeFileNum++
- d.writePos = 0
-
- // sync every time we start writing to a new file
- err = d.sync()
- if err != nil {
- d.logf(ERROR, "DISKQUEUE(%s) failed to sync - %s", d.name, err)
- }
-
- if d.writeFile != nil {
- d.writeFile.Close()
- d.writeFile = nil
- }
- }
- if d.writeFile == nil {
- curFileName := d.fileName(d.writeFileNum)
- d.writeFile, err = os.OpenFile(curFileName, os.O_RDWR|os.O_CREATE, 0600)
- if err != nil {
- return err
- }
-
- d.logf(INFO, "DISKQUEUE(%s): writeOne() opened %s", d.name, curFileName)
-
- if d.writePos > 0 {
- _, err = d.writeFile.Seek(d.writePos, 0)
- if err != nil {
- d.writeFile.Close()
- d.writeFile = nil
- return err
- }
- }
- }
-
- d.writeBuf.Reset()
- err = binary.Write(&d.writeBuf, binary.BigEndian, dataLen)
- if err != nil {
- return err
- }
-
- _, err = d.writeBuf.Write(data)
- if err != nil {
- return err
- }
-
- // only write to the file once
- _, err = d.writeFile.Write(d.writeBuf.Bytes())
- if err != nil {
- d.writeFile.Close()
- d.writeFile = nil
- return err
- }
-
- d.writePos += totalBytes
- d.depth += 1
-
- return err
- }
-
- // sync fsyncs the current writeFile and persists metadata
- func (d *diskQueue) sync() error {
- if d.writeFile != nil {
- err := d.writeFile.Sync()
- if err != nil {
- d.writeFile.Close()
- d.writeFile = nil
- return err
- }
- }
-
- err := d.persistMetaData()
- if err != nil {
- return err
- }
-
- d.needSync = false
- return nil
- }
-
- // retrieveMetaData initializes state from the filesystem
- func (d *diskQueue) retrieveMetaData() error {
- var f *os.File
- var err error
-
- fileName := d.metaDataFileName()
- f, err = os.OpenFile(fileName, os.O_RDONLY, 0600)
- if err != nil {
- return err
- }
- defer f.Close()
-
- var depth int64
- _, err = fmt.Fscanf(f, "%d\n%d,%d\n%d,%d\n",
- &depth,
- &d.readFileNum, &d.readPos,
- &d.writeFileNum, &d.writePos)
- if err != nil {
- return err
- }
- d.depth = depth
- d.nextReadFileNum = d.readFileNum
- d.nextReadPos = d.readPos
-
- // if the metadata was not sync'd at the last shutdown of nsqd
- // then the actual file size might actually be larger than the writePos,
- // in which case the safest thing to do is skip to the next file for
- // writes, and let the reader salvage what it can from the messages in the
- // diskqueue beyond the metadata's likely also stale readPos
- fileName = d.fileName(d.writeFileNum)
- fileInfo, err := os.Stat(fileName)
- if err != nil {
- return err
- }
- fileSize := fileInfo.Size()
- if d.writePos < fileSize {
- d.logf(WARN,
- "DISKQUEUE(%s) %s metadata writePos %d < file size of %d, skipping to new file",
- d.name, fileName, d.writePos, fileSize)
- d.writeFileNum += 1
- d.writePos = 0
- if d.writeFile != nil {
- d.writeFile.Close()
- d.writeFile = nil
- }
- }
-
- return nil
- }
-
- // persistMetaData atomically writes state to the filesystem
- func (d *diskQueue) persistMetaData() error {
- var f *os.File
- var err error
-
- fileName := d.metaDataFileName()
- tmpFileName := fmt.Sprintf("%s.%d.tmp", fileName, rand.Int())
-
- // write to tmp file
- f, err = os.OpenFile(tmpFileName, os.O_RDWR|os.O_CREATE, 0600)
- if err != nil {
- return err
- }
-
- _, err = fmt.Fprintf(f, "%d\n%d,%d\n%d,%d\n",
- d.depth,
- d.readFileNum, d.readPos,
- d.writeFileNum, d.writePos)
- if err != nil {
- f.Close()
- return err
- }
- f.Sync()
- f.Close()
-
- // atomically rename
- return os.Rename(tmpFileName, fileName)
- }
-
- func (d *diskQueue) metaDataFileName() string {
- return fmt.Sprintf(path.Join(d.dataPath, "%s.diskqueue.meta.dat"), d.name)
- }
-
- func (d *diskQueue) fileName(fileNum int64) string {
- return fmt.Sprintf(path.Join(d.dataPath, "%s.diskqueue.%06d.dat"), d.name, fileNum)
- }
-
- func (d *diskQueue) checkTailCorruption(depth int64) {
- if d.readFileNum < d.writeFileNum || d.readPos < d.writePos {
- return
- }
-
- // we've reached the end of the diskqueue
- // if depth isn't 0 something went wrong
- if depth != 0 {
- if depth < 0 {
- d.logf(ERROR,
- "DISKQUEUE(%s) negative depth at tail (%d), metadata corruption, resetting 0...",
- d.name, depth)
- } else if depth > 0 {
- d.logf(ERROR,
- "DISKQUEUE(%s) positive depth at tail (%d), data loss, resetting 0...",
- d.name, depth)
- }
- // force set depth 0
- d.depth = 0
- d.needSync = true
- }
-
- if d.readFileNum != d.writeFileNum || d.readPos != d.writePos {
- if d.readFileNum > d.writeFileNum {
- d.logf(ERROR,
- "DISKQUEUE(%s) readFileNum > writeFileNum (%d > %d), corruption, skipping to next writeFileNum and resetting 0...",
- d.name, d.readFileNum, d.writeFileNum)
- }
-
- if d.readPos > d.writePos {
- d.logf(ERROR,
- "DISKQUEUE(%s) readPos > writePos (%d > %d), corruption, skipping to next writeFileNum and resetting 0...",
- d.name, d.readPos, d.writePos)
- }
-
- d.skipToNextRWFile()
- d.needSync = true
- }
- }
-
- func (d *diskQueue) moveForward() {
- oldReadFileNum := d.readFileNum
- d.readFileNum = d.nextReadFileNum
- d.readPos = d.nextReadPos
- d.depth -= 1
-
- // see if we need to clean up the old file
- if oldReadFileNum != d.nextReadFileNum {
- // sync every time we start reading from a new file
- d.needSync = true
-
- fn := d.fileName(oldReadFileNum)
- err := os.Remove(fn)
- if err != nil {
- d.logf(ERROR, "DISKQUEUE(%s) failed to Remove(%s) - %s", d.name, fn, err)
- }
- }
-
- d.checkTailCorruption(d.depth)
- }
-
- func (d *diskQueue) handleReadError() {
- // jump to the next read file and rename the current (bad) file
- if d.readFileNum == d.writeFileNum {
- // if you can't properly read from the current write file it's safe to
- // assume that something is fucked and we should skip the current file too
- if d.writeFile != nil {
- d.writeFile.Close()
- d.writeFile = nil
- }
- d.writeFileNum++
- d.writePos = 0
- }
-
- badFn := d.fileName(d.readFileNum)
- badRenameFn := badFn + ".bad"
-
- d.logf(WARN,
- "DISKQUEUE(%s) jump to next file and saving bad file as %s",
- d.name, badRenameFn)
-
- err := os.Rename(badFn, badRenameFn)
- if err != nil {
- d.logf(ERROR,
- "DISKQUEUE(%s) failed to rename bad diskqueue file %s to %s",
- d.name, badFn, badRenameFn)
- }
-
- d.readFileNum++
- d.readPos = 0
- d.nextReadFileNum = d.readFileNum
- d.nextReadPos = 0
-
- // significant state change, schedule a sync on the next iteration
- d.needSync = true
-
- d.checkTailCorruption(d.depth)
- }
-
- // ioLoop provides the backend for exposing a go channel (via ReadChan())
- // in support of multiple concurrent queue consumers
- //
- // it works by looping and branching based on whether or not the queue has data
- // to read and blocking until data is either read or written over the appropriate
- // go channels
- //
- // conveniently this also means that we're asynchronously reading from the filesystem
- func (d *diskQueue) ioLoop() {
- var dataRead []byte
- var err error
- var count int64
- var r chan []byte
-
- syncTicker := time.NewTicker(d.syncTimeout)
-
- for {
- // dont sync all the time :)
- if count == d.syncEvery {
- d.needSync = true
- }
-
- if d.needSync {
- err = d.sync()
- if err != nil {
- d.logf(ERROR, "DISKQUEUE(%s) failed to sync - %s", d.name, err)
- }
- count = 0
- }
-
- if (d.readFileNum < d.writeFileNum) || (d.readPos < d.writePos) {
- if d.nextReadPos == d.readPos {
- dataRead, err = d.readOne()
- if err != nil {
- d.logf(ERROR, "DISKQUEUE(%s) reading at %d of %s - %s",
- d.name, d.readPos, d.fileName(d.readFileNum), err)
- d.handleReadError()
- continue
- }
- }
- r = d.readChan
- } else {
- r = nil
- }
-
- select {
- // the Go channel spec dictates that nil channel operations (read or write)
- // in a select are skipped, we set r to d.readChan only when there is data to read
- case r <- dataRead:
- count++
- // moveForward sets needSync flag if a file is removed
- d.moveForward()
- case d.depthChan <- d.depth:
- case <-d.emptyChan:
- d.emptyResponseChan <- d.deleteAllFiles()
- count = 0
- case dataWrite := <-d.writeChan:
- count++
- d.writeResponseChan <- d.writeOne(dataWrite)
- case <-syncTicker.C:
- if count == 0 {
- // avoid sync when there's no activity
- continue
- }
- d.needSync = true
- case <-d.exitChan:
- goto exit
- }
- }
-
- exit:
- d.logf(INFO, "DISKQUEUE(%s): closing ... ioLoop", d.name)
- syncTicker.Stop()
- d.exitSyncChan <- 1
- }
|