You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 
 
 

501 lines
12 KiB

// Copyright 2010 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package pgzip
import (
"bytes"
"errors"
"fmt"
"hash"
"io"
"sync"
"time"
"github.com/klauspost/compress/flate"
"github.com/klauspost/crc32"
)
const (
defaultBlockSize = 256 << 10
tailSize = 16384
defaultBlocks = 16
)
// These constants are copied from the flate package, so that code that imports
// "compress/gzip" does not also have to import "compress/flate".
const (
NoCompression = flate.NoCompression
BestSpeed = flate.BestSpeed
BestCompression = flate.BestCompression
DefaultCompression = flate.DefaultCompression
ConstantCompression = flate.ConstantCompression
HuffmanOnly = flate.HuffmanOnly
)
// A Writer is an io.WriteCloser.
// Writes to a Writer are compressed and written to w.
type Writer struct {
Header
w io.Writer
level int
wroteHeader bool
blockSize int
blocks int
currentBuffer []byte
prevTail []byte
digest hash.Hash32
size int
closed bool
buf [10]byte
errMu sync.RWMutex
err error
pushedErr chan struct{}
results chan result
dictFlatePool sync.Pool
dstPool sync.Pool
wg sync.WaitGroup
}
type result struct {
result chan []byte
notifyWritten chan struct{}
}
// Use SetConcurrency to finetune the concurrency level if needed.
//
// With this you can control the approximate size of your blocks,
// as well as how many you want to be processing in parallel.
//
// Default values for this is SetConcurrency(250000, 16),
// meaning blocks are split at 250000 bytes and up to 16 blocks
// can be processing at once before the writer blocks.
func (z *Writer) SetConcurrency(blockSize, blocks int) error {
if blockSize <= tailSize {
return fmt.Errorf("gzip: block size cannot be less than or equal to %d", tailSize)
}
if blocks <= 0 {
return errors.New("gzip: blocks cannot be zero or less")
}
if blockSize == z.blockSize && blocks == z.blocks {
return nil
}
z.blockSize = blockSize
z.results = make(chan result, blocks)
z.blocks = blocks
z.dstPool = sync.Pool{New: func() interface{} { return make([]byte, 0, blockSize+(blockSize)>>4) }}
return nil
}
// NewWriter returns a new Writer.
// Writes to the returned writer are compressed and written to w.
//
// It is the caller's responsibility to call Close on the WriteCloser when done.
// Writes may be buffered and not flushed until Close.
//
// Callers that wish to set the fields in Writer.Header must do so before
// the first call to Write or Close. The Comment and Name header fields are
// UTF-8 strings in Go, but the underlying format requires NUL-terminated ISO
// 8859-1 (Latin-1). NUL or non-Latin-1 runes in those strings will lead to an
// error on Write.
func NewWriter(w io.Writer) *Writer {
z, _ := NewWriterLevel(w, DefaultCompression)
return z
}
// NewWriterLevel is like NewWriter but specifies the compression level instead
// of assuming DefaultCompression.
//
// The compression level can be DefaultCompression, NoCompression, or any
// integer value between BestSpeed and BestCompression inclusive. The error
// returned will be nil if the level is valid.
func NewWriterLevel(w io.Writer, level int) (*Writer, error) {
if level < ConstantCompression || level > BestCompression {
return nil, fmt.Errorf("gzip: invalid compression level: %d", level)
}
z := new(Writer)
z.SetConcurrency(defaultBlockSize, defaultBlocks)
z.init(w, level)
return z, nil
}
// This function must be used by goroutines to set an
// error condition, since z.err access is restricted
// to the callers goruotine.
func (z *Writer) pushError(err error) {
z.errMu.Lock()
if z.err != nil {
z.errMu.Unlock()
return
}
z.err = err
close(z.pushedErr)
z.errMu.Unlock()
}
func (z *Writer) init(w io.Writer, level int) {
z.wg.Wait()
digest := z.digest
if digest != nil {
digest.Reset()
} else {
digest = crc32.NewIEEE()
}
z.Header = Header{OS: 255}
z.w = w
z.level = level
z.digest = digest
z.pushedErr = make(chan struct{}, 0)
z.results = make(chan result, z.blocks)
z.err = nil
z.closed = false
z.Comment = ""
z.Extra = nil
z.ModTime = time.Time{}
z.wroteHeader = false
z.currentBuffer = nil
z.buf = [10]byte{}
z.prevTail = nil
z.size = 0
if z.dictFlatePool.New == nil {
z.dictFlatePool.New = func() interface{} {
f, _ := flate.NewWriterDict(w, level, nil)
return f
}
}
}
// Reset discards the Writer z's state and makes it equivalent to the
// result of its original state from NewWriter or NewWriterLevel, but
// writing to w instead. This permits reusing a Writer rather than
// allocating a new one.
func (z *Writer) Reset(w io.Writer) {
if z.results != nil && !z.closed {
close(z.results)
}
z.SetConcurrency(defaultBlockSize, defaultBlocks)
z.init(w, z.level)
}
// GZIP (RFC 1952) is little-endian, unlike ZLIB (RFC 1950).
func put2(p []byte, v uint16) {
p[0] = uint8(v >> 0)
p[1] = uint8(v >> 8)
}
func put4(p []byte, v uint32) {
p[0] = uint8(v >> 0)
p[1] = uint8(v >> 8)
p[2] = uint8(v >> 16)
p[3] = uint8(v >> 24)
}
// writeBytes writes a length-prefixed byte slice to z.w.
func (z *Writer) writeBytes(b []byte) error {
if len(b) > 0xffff {
return errors.New("gzip.Write: Extra data is too large")
}
put2(z.buf[0:2], uint16(len(b)))
_, err := z.w.Write(z.buf[0:2])
if err != nil {
return err
}
_, err = z.w.Write(b)
return err
}
// writeString writes a UTF-8 string s in GZIP's format to z.w.
// GZIP (RFC 1952) specifies that strings are NUL-terminated ISO 8859-1 (Latin-1).
func (z *Writer) writeString(s string) (err error) {
// GZIP stores Latin-1 strings; error if non-Latin-1; convert if non-ASCII.
needconv := false
for _, v := range s {
if v == 0 || v > 0xff {
return errors.New("gzip.Write: non-Latin-1 header string")
}
if v > 0x7f {
needconv = true
}
}
if needconv {
b := make([]byte, 0, len(s))
for _, v := range s {
b = append(b, byte(v))
}
_, err = z.w.Write(b)
} else {
_, err = io.WriteString(z.w, s)
}
if err != nil {
return err
}
// GZIP strings are NUL-terminated.
z.buf[0] = 0
_, err = z.w.Write(z.buf[0:1])
return err
}
// compressCurrent will compress the data currently buffered
// This should only be called from the main writer/flush/closer
func (z *Writer) compressCurrent(flush bool) {
r := result{}
r.result = make(chan []byte, 1)
r.notifyWritten = make(chan struct{}, 0)
select {
case z.results <- r:
case <-z.pushedErr:
return
}
// If block given is more than twice the block size, split it.
c := z.currentBuffer
if len(c) > z.blockSize*2 {
c = c[:z.blockSize]
z.wg.Add(1)
go z.compressBlock(c, z.prevTail, r, false)
z.prevTail = c[len(c)-tailSize:]
z.currentBuffer = z.currentBuffer[z.blockSize:]
z.compressCurrent(flush)
// Last one flushes if needed
return
}
z.wg.Add(1)
go z.compressBlock(c, z.prevTail, r, z.closed)
if len(c) > tailSize {
z.prevTail = c[len(c)-tailSize:]
} else {
z.prevTail = nil
}
z.currentBuffer = z.dstPool.Get().([]byte)
z.currentBuffer = z.currentBuffer[:0]
// Wait if flushing
if flush {
<-r.notifyWritten
}
}
// Returns an error if it has been set.
// Cannot be used by functions that are from internal goroutines.
func (z *Writer) checkError() error {
z.errMu.RLock()
err := z.err
z.errMu.RUnlock()
return err
}
// Write writes a compressed form of p to the underlying io.Writer. The
// compressed bytes are not necessarily flushed to output until
// the Writer is closed or Flush() is called.
//
// The function will return quickly, if there are unused buffers.
// The sent slice (p) is copied, and the caller is free to re-use the buffer
// when the function returns.
//
// Errors that occur during compression will be reported later, and a nil error
// does not signify that the compression succeeded (since it is most likely still running)
// That means that the call that returns an error may not be the call that caused it.
// Only Flush and Close functions are guaranteed to return any errors up to that point.
func (z *Writer) Write(p []byte) (int, error) {
if err := z.checkError(); err != nil {
return 0, err
}
// Write the GZIP header lazily.
if !z.wroteHeader {
z.wroteHeader = true
z.buf[0] = gzipID1
z.buf[1] = gzipID2
z.buf[2] = gzipDeflate
z.buf[3] = 0
if z.Extra != nil {
z.buf[3] |= 0x04
}
if z.Name != "" {
z.buf[3] |= 0x08
}
if z.Comment != "" {
z.buf[3] |= 0x10
}
put4(z.buf[4:8], uint32(z.ModTime.Unix()))
if z.level == BestCompression {
z.buf[8] = 2
} else if z.level == BestSpeed {
z.buf[8] = 4
} else {
z.buf[8] = 0
}
z.buf[9] = z.OS
var n int
var err error
n, err = z.w.Write(z.buf[0:10])
if err != nil {
z.pushError(err)
return n, err
}
if z.Extra != nil {
err = z.writeBytes(z.Extra)
if err != nil {
z.pushError(err)
return n, err
}
}
if z.Name != "" {
err = z.writeString(z.Name)
if err != nil {
z.pushError(err)
return n, err
}
}
if z.Comment != "" {
err = z.writeString(z.Comment)
if err != nil {
z.pushError(err)
return n, err
}
}
// Start receiving data from compressors
go func() {
listen := z.results
for {
r, ok := <-listen
// If closed, we are finished.
if !ok {
return
}
buf := <-r.result
n, err := z.w.Write(buf)
if err != nil {
z.pushError(err)
close(r.notifyWritten)
return
}
if n != len(buf) {
z.pushError(fmt.Errorf("gzip: short write %d should be %d", n, len(buf)))
close(r.notifyWritten)
return
}
z.dstPool.Put(buf)
close(r.notifyWritten)
}
}()
z.currentBuffer = make([]byte, 0, z.blockSize)
}
q := p
for len(q) > 0 {
length := len(q)
if length+len(z.currentBuffer) > z.blockSize {
length = z.blockSize - len(z.currentBuffer)
}
z.digest.Write(q[:length])
z.currentBuffer = append(z.currentBuffer, q[:length]...)
if len(z.currentBuffer) >= z.blockSize {
z.compressCurrent(false)
if err := z.checkError(); err != nil {
return len(p) - len(q) - length, err
}
}
z.size += length
q = q[length:]
}
return len(p), z.checkError()
}
// Step 1: compresses buffer to buffer
// Step 2: send writer to channel
// Step 3: Close result channel to indicate we are done
func (z *Writer) compressBlock(p, prevTail []byte, r result, closed bool) {
defer func() {
close(r.result)
z.wg.Done()
}()
buf := z.dstPool.Get().([]byte)
dest := bytes.NewBuffer(buf[:0])
compressor := z.dictFlatePool.Get().(*flate.Writer)
compressor.ResetDict(dest, prevTail)
compressor.Write(p)
err := compressor.Flush()
if err != nil {
z.pushError(err)
return
}
if closed {
err = compressor.Close()
if err != nil {
z.pushError(err)
return
}
}
z.dictFlatePool.Put(compressor)
// Read back buffer
buf = dest.Bytes()
r.result <- buf
}
// Flush flushes any pending compressed data to the underlying writer.
//
// It is useful mainly in compressed network protocols, to ensure that
// a remote reader has enough data to reconstruct a packet. Flush does
// not return until the data has been written. If the underlying
// writer returns an error, Flush returns that error.
//
// In the terminology of the zlib library, Flush is equivalent to Z_SYNC_FLUSH.
func (z *Writer) Flush() error {
if err := z.checkError(); err != nil {
return err
}
if z.closed {
return nil
}
if !z.wroteHeader {
_, err := z.Write(nil)
if err != nil {
return err
}
}
// We send current block to compression
z.compressCurrent(true)
return z.checkError()
}
// UncompressedSize will return the number of bytes written.
// pgzip only, not a function in the official gzip package.
func (z *Writer) UncompressedSize() int {
return z.size
}
// Close closes the Writer, flushing any unwritten data to the underlying
// io.Writer, but does not close the underlying io.Writer.
func (z *Writer) Close() error {
if err := z.checkError(); err != nil {
return err
}
if z.closed {
return nil
}
z.closed = true
if !z.wroteHeader {
z.Write(nil)
if err := z.checkError(); err != nil {
return err
}
}
z.compressCurrent(true)
if err := z.checkError(); err != nil {
return err
}
close(z.results)
put4(z.buf[0:4], z.digest.Sum32())
put4(z.buf[4:8], uint32(z.size))
_, err := z.w.Write(z.buf[0:8])
if err != nil {
z.pushError(err)
return err
}
return nil
}