Browse Source

feat: decompress from data stream

feature/assets
Dnomd343 10 months ago
parent
commit
e86b243c02
  1. 44
      assets/common.go
  2. 110
      assets/extract.go
  3. 200
      assets/extract_legacy.go
  4. 215
      assets/extract_test.go
  5. 12
      assets/update.go

44
assets/common.go

@ -0,0 +1,44 @@
package assets
import (
"XProxy/logger"
"io"
"time"
)
type upstream interface {
getTag() string
doRequest() io.Reader
lastModify() time.Time
}
type asset struct {
tag string
stream io.Reader
archive int
}
func (a *asset) debug(template string, args ...interface{}) {
logger.Debugf("[%s] "+template, append([]interface{}{a.tag}, args...)...)
}
func (a *asset) error(template string, args ...interface{}) {
logger.Errorf("[%s] "+template, append([]interface{}{a.tag}, args...)...)
}
func (a *asset) Read(p []byte) (n int, err error) {
n, err = a.stream.Read(p)
if err != nil && err != io.EOF { // data stream broken
switch a.archive {
case notArchive:
a.error("Failed to read data stream -> %v", err)
case gzipArchive:
a.error("Failed to extract gzip archive -> %v", err)
case bzip2Archive:
a.error("Failed to extract bzip2 archive -> %v", err)
case xzArchive:
a.error("Failed to extract xz archive -> %v", err)
}
}
return n, err
}

110
assets/extract.go

@ -1,7 +1,6 @@
package assets package assets
import ( import (
"XProxy/logger"
"bytes" "bytes"
"compress/bzip2" "compress/bzip2"
"github.com/gabriel-vasile/mimetype" "github.com/gabriel-vasile/mimetype"
@ -17,90 +16,65 @@ const (
xzArchive xzArchive
) )
// gzipExtract use to extract independent gzip archive data. // gzipExtract use to extract independent gzip format stream.
func gzipExtract(data []byte) ([]byte, error) { func gzipExtract(a *asset) error {
logger.Debugf("Start extracting gzip archive -> %d bytes", len(data)) a.debug("Start extracting gzip archive stream")
reader, err := gzip.NewReader(bytes.NewReader(data)) reader, err := gzip.NewReader(a.stream)
if err != nil { if err != nil {
logger.Errorf("Failed to extract gzip archive -> %v", err) a.error("Failed to extract gzip archive -> %v", err)
return nil, err return err
} }
defer reader.Close() a.stream, a.archive = reader, gzipArchive
return nil
var buffer bytes.Buffer
size, err := reader.WriteTo(&buffer)
if err != nil {
logger.Errorf("Failed to handle gzip archive -> %v", err)
return nil, err
}
logger.Debugf("Extracted gzip archive successfully -> %d bytes", size)
return buffer.Bytes(), nil
} }
// bzip2Extract use to extract independent bzip2 archive data. // bzip2Extract use to extract independent bzip2 format stream.
func bzip2Extract(data []byte) ([]byte, error) { func bzip2Extract(a *asset) error {
logger.Debugf("Start extracting bzip2 archive -> %d bytes", len(data)) a.debug("Start extracting bzip2 archive stream")
reader := bzip2.NewReader(bytes.NewReader(data)) a.stream = bzip2.NewReader(a.stream)
a.archive = bzip2Archive
return nil
}
var buffer bytes.Buffer // xzExtract use to extract independent xz format stream.
size, err := io.Copy(&buffer, reader) func xzExtract(a *asset) error {
a.debug("Start extracting xz archive stream")
reader, err := xz.NewReader(a.stream)
if err != nil { if err != nil {
logger.Errorf("Failed to extract bzip2 archive -> %v", err) a.error("Failed to extract xz archive -> %v", err)
return nil, err return err
} }
logger.Debugf("Extracted bzip2 archive successfully -> %d bytes", size) a.stream, a.archive = reader, xzArchive
return buffer.Bytes(), nil return nil
} }
// xzExtract use to extract independent xz archive data. // tryExtract try to extract the data stream as a compressed format, and will
func xzExtract(data []byte) ([]byte, error) { // return the original data if it cannot be determined.
logger.Debugf("Start extracting xz archive -> %d bytes", len(data)) func (a *asset) tryExtract() error {
reader, err := xz.NewReader(bytes.NewReader(data)) if a.archive != notArchive {
if err != nil { return nil // already extracted
logger.Errorf("Failed to extract xz archive -> %v", err)
return nil, err
} }
var buffer bytes.Buffer header := bytes.NewBuffer(nil)
size, err := io.Copy(&buffer, reader) mime, err := mimetype.DetectReader(io.TeeReader(a.stream, header))
if err != nil { if err != nil {
logger.Errorf("Failed to handle xz archive -> %v", err) a.error("Failed to detect data stream -> %v", err)
return nil, err return err
} }
logger.Debugf("Extracted xz archive successfully -> %d bytes", size) a.stream = io.MultiReader(header, a.stream) // recycle reader
return buffer.Bytes(), nil
}
// archiveType use to determine the type of archive file. switch mime.String() { // extract with detected mime type
func archiveType(data []byte) uint {
mime := mimetype.Detect(data)
switch mime.String() {
case "application/gzip": case "application/gzip":
logger.Debugf("Data detected as gzip format") a.debug("Data detected as gzip format")
return gzipArchive return gzipExtract(a)
case "application/x-bzip2": case "application/x-bzip2":
logger.Debugf("Data detected as bzip2 format") a.debug("Data detected as bzip2 format")
return bzip2Archive return bzip2Extract(a)
case "application/x-xz": case "application/x-xz":
logger.Debugf("Data detected as xz format") a.debug("Data detected as xz format")
return xzArchive return xzExtract(a)
default:
logger.Debugf("Data detected as non-archive format -> `%s`", mime)
return notArchive
}
}
// tryExtract will try to extract the data as a compressed format, and will
// return the original data if it cannot be determined.
func tryExtract(data []byte) ([]byte, error) {
switch archiveType(data) {
case gzipArchive:
return gzipExtract(data)
case bzip2Archive:
return bzip2Extract(data)
case xzArchive:
return xzExtract(data)
default: default:
return data, nil a.debug("Data detected as non-archive format -> `%s`", mime)
return nil
} }
} }

200
assets/extract_legacy.go

@ -0,0 +1,200 @@
package assets
//const (
// notArchive = iota
// gzipArchive
// bzip2Archive
// xzArchive
//)
//type archive struct {
// id string
// size uint64
// input io.Reader
//}
//
//func (a *archive) Read(p []byte) (n int, err error) {
// n, err = a.input.Read(p)
// if err == io.EOF {
// logger.Debugf("read %d bytes", n)
//
// logger.Debugf("reach ending")
// return n, err
// }
// if err != nil {
// logger.Errorf("Failed to extract archive -> %v", err)
//
// // TODO: do close process
//
// return n, err
// }
//
// logger.Debugf("read %d bytes", n)
//
// return n, err
//}
// gzipExtract use to extract independent gzip archive stream.
//func gzipExtract(stream io.Reader) (io.Reader, error) {
// logger.Debugf("Start extracting gzip archive")
// reader, err := gzip.NewReader(stream)
// if err != nil {
// logger.Errorf("Failed to extract gzip archive -> %v", err)
// return nil, err
// }
// //defer reader.Close()
// return reader, nil
//}
// bzip2Extract use to extract independent bzip2 archive stream.
//func bzip2Extract(stream io.Reader) (io.Reader, error) {
// logger.Debugf("Start extracting bzip2 archive")
// reader := bzip2.NewReader(stream)
// return reader, nil
//}
// xzExtract use to extract independent xz archive stream.
//func xzExtract(stream io.Reader) (io.Reader, error) {
// logger.Debugf("Start extracting xz archive")
// reader, err := xz.NewReader(stream)
// if err != nil {
// logger.Errorf("Failed to extract xz archive -> %v", err)
// return nil, err
// }
// return reader, nil
//}
// gzipExtract use to extract independent gzip archive data.
//func gzipExtract(data []byte) ([]byte, error) {
// logger.Debugf("Start extracting gzip archive -> %d bytes", len(data))
// reader, err := gzip.NewReader(bytes.NewReader(data))
// if err != nil {
// logger.Errorf("Failed to extract gzip archive -> %v", err)
// return nil, err
// }
// defer reader.Close()
//
// var buffer bytes.Buffer
// size, err := reader.WriteTo(&buffer)
// if err != nil {
// logger.Errorf("Failed to handle gzip archive -> %v", err)
// return nil, err
// }
// logger.Debugf("Extracted gzip archive successfully -> %d bytes", size)
// return buffer.Bytes(), nil
//}
// bzip2Extract use to extract independent bzip2 archive data.
//func bzip2Extract(data []byte) ([]byte, error) {
// logger.Debugf("Start extracting bzip2 archive -> %d bytes", len(data))
// reader := bzip2.NewReader(bytes.NewReader(data))
//
// var buffer bytes.Buffer
// size, err := io.Copy(&buffer, reader)
// if err != nil {
// logger.Errorf("Failed to extract bzip2 archive -> %v", err)
// return nil, err
// }
// logger.Debugf("Extracted bzip2 archive successfully -> %d bytes", size)
// return buffer.Bytes(), nil
//}
// xzExtract use to extract independent xz archive data.
//func xzExtract(data []byte) ([]byte, error) {
// logger.Debugf("Start extracting xz archive -> %d bytes", len(data))
// reader, err := xz.NewReader(bytes.NewReader(data))
// if err != nil {
// logger.Errorf("Failed to extract xz archive -> %v", err)
// return nil, err
// }
//
// var buffer bytes.Buffer
// size, err := io.Copy(&buffer, reader)
// if err != nil {
// logger.Errorf("Failed to handle xz archive -> %v", err)
// return nil, err
// }
// logger.Debugf("Extracted xz archive successfully -> %d bytes", size)
// return buffer.Bytes(), nil
//}
//func recycleReader(input io.Reader) (mimeType string, recycled io.Reader, err error) {
// // header will store the bytes mimetype uses for detection.
// header := bytes.NewBuffer(nil)
//
// // After DetectReader, the data read from input is copied into header.
// mtype, err := mimetype.DetectReader(io.TeeReader(input, header))
// if err != nil {
// fmt.Printf("error -> %v\n", err)
// return
// }
//
// // Concatenate back the header to the rest of the file.
// // recycled now contains the complete, original data.
// recycled = io.MultiReader(header, input)
//
// fmt.Printf("mime-type -> %v\n", mtype)
//
// return mtype.String(), recycled, err
//}
// archiveType use to determine the type of archive file.
//func archiveType(stream io.Reader) uint {
//
// fmt.Println("start")
//
// mime, stream, _ := recycleReader(stream)
//
// fmt.Println("end")
//
// //mime := mimetype.Detect(data)
// switch mime {
// case "application/gzip":
// logger.Debugf("Data detected as gzip format")
// return gzipArchive
// case "application/x-bzip2":
// logger.Debugf("Data detected as bzip2 format")
// return bzip2Archive
// case "application/x-xz":
// logger.Debugf("Data detected as xz format")
// return xzArchive
// default:
// logger.Debugf("Data detected as non-archive format -> `%s`", mime)
// return notArchive
// }
//}
// archiveType use to determine the type of archive file.
//func archiveType(data []byte) uint {
// mime := mimetype.Detect(data)
// switch mime.String() {
// case "application/gzip":
// logger.Debugf("Data detected as gzip format")
// return gzipArchive
// case "application/x-bzip2":
// logger.Debugf("Data detected as bzip2 format")
// return bzip2Archive
// case "application/x-xz":
// logger.Debugf("Data detected as xz format")
// return xzArchive
// default:
// logger.Debugf("Data detected as non-archive format -> `%s`", mime)
// return notArchive
// }
//}
// tryExtract will try to extract the data as a compressed format, and will
// return the original data if it cannot be determined.
//func tryExtract(data []byte) ([]byte, error) {
// //switch archiveType(data) {
// //case gzipArchive:
// // //return gzipExtract(data)
// //case bzip2Archive:
// // //return bzip2Extract(data)
// //case xzArchive:
// // //return xzExtract(data)
// //default:
// // return data, nil
// //}
// return nil, nil
//}

215
assets/extract_test.go

@ -1,12 +1,14 @@
package assets package assets
import ( import (
"XProxy/logger"
"bytes" "bytes"
"compress/gzip" "compress/gzip"
"crypto/rand" "crypto/rand"
"fmt"
"github.com/dsnet/compress/bzip2" "github.com/dsnet/compress/bzip2"
"github.com/stretchr/testify/assert"
"github.com/ulikunitz/xz" "github.com/ulikunitz/xz"
"io"
mrand "math/rand" mrand "math/rand"
"testing" "testing"
) )
@ -27,7 +29,8 @@ func randInt(min int, max int) int {
func randData() []byte { func randData() []byte {
raw := randBytes(1024) raw := randBytes(1024)
size := randInt(testMinSize, testMaxSize) //size := randInt(testMinSize, testMaxSize)
size := 257
var buffer bytes.Buffer var buffer bytes.Buffer
for i := 0; i < size; i++ { for i := 0; i < size; i++ {
buffer.Write(raw) buffer.Write(raw)
@ -61,58 +64,178 @@ func xzCompress(data []byte) []byte {
return buf.Bytes() return buf.Bytes()
} }
func TestGzipExtract(t *testing.T) { //func TestGzipExtract(t *testing.T) {
raw := randData() // raw := randData()
gzOk := gzipCompress(raw) // gzOk := gzipCompress(raw)
gzErr := append(gzOk, randBytes(randInt(1, 16))...) // gzErr := append(gzOk, randBytes(randInt(1, 16))...)
//
ret, err := gzipExtract(gzOk) // ret, err := gzipExtract(gzOk)
assert.Nil(t, err) // assert.Nil(t, err)
assert.Equal(t, raw, ret) // assert.Equal(t, raw, ret)
_, err = gzipExtract(gzErr) // _, err = gzipExtract(gzErr)
assert.NotNil(t, err) // assert.NotNil(t, err)
//}
//func TestBzip2Extract(t *testing.T) {
// raw := randData()
// bz2Ok := bzip2Compress(raw)
// bz2Err := append(bz2Ok, randBytes(randInt(1, 16))...)
//
// ret, err := bzip2Extract(bz2Ok)
// assert.Nil(t, err)
// assert.Equal(t, raw, ret)
// _, err = bzip2Extract(bz2Err)
// assert.NotNil(t, err)
//}
//func TestXzExtract(t *testing.T) {
// raw := randData()
// xzOk := xzCompress(raw)
// xzErr := append(xzOk, randBytes(randInt(1, 16))...)
//
// ret, err := xzExtract(xzOk)
// assert.Nil(t, err)
// assert.Equal(t, raw, ret)
// _, err = xzExtract(xzErr)
// assert.NotNil(t, err)
//}
//func TestExtract(t *testing.T) {
// raw := randData()
//
// ret, err := tryExtract(raw)
// assert.Nil(t, err)
// assert.Equal(t, raw, ret)
//
// ret, err = tryExtract(gzipCompress(raw))
// assert.Nil(t, err)
// assert.Equal(t, raw, ret)
//
// ret, err = tryExtract(bzip2Compress(raw))
// assert.Nil(t, err)
// assert.Equal(t, raw, ret)
//
// ret, err = tryExtract(xzCompress(raw))
// assert.Nil(t, err)
// assert.Equal(t, raw, ret)
//}
func Test_demo(t *testing.T) {
//data := gzipCompress(randData())
//data := randData()
//data = append(data, randBytes(randInt(1, 16))...)
//fmt.Printf("origin gzip size -> %d\n", len(data))
//data := randData()
//data := bzip2Compress(randData())
//data = append(data, randBytes(randInt(1, 16))...)
//fmt.Printf("origin bzip2 size -> %d\n", len(data))
//data := randData()
data := xzCompress(randData())
data = append(data, randBytes(randInt(1, 16))...)
fmt.Printf("origin xz size -> %d\n", len(data))
//buffer := bytes.NewReader(data)
//reader, err := gzipExtract(buffer)
//reader, err := bzip2Extract(buffer)
//reader, err := xzExtract(buffer)
//archiveType(buffer)
//fmt.Printf("%v\n", err)
//
//buf := make([]byte, 1024*1024*4)
//for {
// n, err := reader.Read(buf)
//
// if err == io.EOF {
// fmt.Println("reach stream ending")
// break
// }
// if err != nil {
// fmt.Printf("stream error -> %v", err)
// return
// }
//
// fmt.Printf("get %d bytes\n", n)
//
//}
} }
func TestBzip2Extract(t *testing.T) { func init() {
raw := randData() logger.SetLevel(logger.DebugLevel)
bz2Ok := bzip2Compress(raw)
bz2Err := append(bz2Ok, randBytes(randInt(1, 16))...)
ret, err := bzip2Extract(bz2Ok)
assert.Nil(t, err)
assert.Equal(t, raw, ret)
_, err = bzip2Extract(bz2Err)
assert.NotNil(t, err)
} }
func TestXzExtract(t *testing.T) { //func Test_archive(t *testing.T) {
raw := randData() // data := gzipCompress(randData())
xzOk := xzCompress(raw) // fmt.Printf("origin gzip size -> %d\n", len(data))
xzErr := append(xzOk, randBytes(randInt(1, 16))...) //
// kk := asset{
ret, err := xzExtract(xzOk) // tag: "A7B932FD11",
assert.Nil(t, err) // stream: bytes.NewReader(data),
assert.Equal(t, raw, ret) // }
_, err = xzExtract(xzErr) //
assert.NotNil(t, err) // kk.gzipExtract()
//
// buf := make([]byte, 4*1024*1024)
// for {
// n, err := kk.stream.Read(buf)
//
// if err == io.EOF {
// fmt.Printf("get %d bytes\n", n)
// fmt.Printf("reach stream ending\n")
// return
// }
//
// if err != nil {
// fmt.Printf("stream error -> %v\n", err)
// return
// }
// fmt.Printf("get %d bytes\n", n)
// }
//
//}
type brokenReader struct {
time int
} }
func TestExtract(t *testing.T) { func (b *brokenReader) Read(p []byte) (n int, err error) {
raw := randData() b.time += 1
fmt.Printf("Read time = %d\n", b.time)
ret, err := tryExtract(raw) if b.time < 16 {
assert.Nil(t, err) return 1024, nil
assert.Equal(t, raw, ret) } else {
return 0, io.ErrShortWrite
}
}
ret, err = tryExtract(gzipCompress(raw)) func Test_extract(t *testing.T) {
assert.Nil(t, err)
assert.Equal(t, raw, ret)
ret, err = tryExtract(bzip2Compress(raw)) raw := randData()
assert.Nil(t, err) data := gzipCompress(raw)
assert.Equal(t, raw, ret) data = append(data, randBytes(3)...)
fmt.Printf("origin data size -> %d\n", len(data))
as := asset{
tag: "DEMO",
//stream: &brokenReader{time: 0},
stream: bytes.NewReader(data),
}
if err := as.tryExtract(); err != nil {
fmt.Printf("try extract error -> %v\n", err)
} else {
if n, err := io.Copy(io.Discard, &as); err != nil {
fmt.Printf("data stream error -> %v\n", err)
} else {
fmt.Printf("data stream complete -> %d bytes\n", n)
}
}
ret, err = tryExtract(xzCompress(raw))
assert.Nil(t, err)
assert.Equal(t, raw, ret)
} }

12
assets/update.go

@ -49,9 +49,9 @@ func updateRemoteAsset(file string, url string, proxy *urlpkg.URL) error {
logger.Errorf("Failed to download remote asset `%s`", url) logger.Errorf("Failed to download remote asset `%s`", url)
return err return err
} }
if asset, err = tryExtract(asset); err != nil { //if asset, err = tryExtract(asset); err != nil {
return err // return err
} //}
if err := saveAsset(file, asset, date); err != nil { if err := saveAsset(file, asset, date); err != nil {
return err return err
} }
@ -79,9 +79,9 @@ func updateLocalAsset(file string, src string) error {
logger.Errorf("Failed to read local asset -> %v", err) logger.Errorf("Failed to read local asset -> %v", err)
return err return err
} }
if asset, err = tryExtract(asset); err != nil { //if asset, err = tryExtract(asset); err != nil {
return err // return err
} //}
if err := saveAsset(file, asset, &date); err != nil { if err := saveAsset(file, asset, &date); err != nil {
return err return err
} }

Loading…
Cancel
Save