From e86b243c022bf7d3423be2bb2873d40b29020f0a Mon Sep 17 00:00:00 2001 From: Dnomd343 Date: Fri, 16 Feb 2024 17:44:23 +0800 Subject: [PATCH] feat: decompress from data stream --- assets/common.go | 44 ++++++++ assets/extract.go | 110 ++++++++------------ assets/extract_legacy.go | 200 ++++++++++++++++++++++++++++++++++++ assets/extract_test.go | 215 ++++++++++++++++++++++++++++++--------- assets/update.go | 12 +-- 5 files changed, 461 insertions(+), 120 deletions(-) create mode 100644 assets/common.go create mode 100644 assets/extract_legacy.go diff --git a/assets/common.go b/assets/common.go new file mode 100644 index 0000000..d435c3e --- /dev/null +++ b/assets/common.go @@ -0,0 +1,44 @@ +package assets + +import ( + "XProxy/logger" + "io" + "time" +) + +type upstream interface { + getTag() string + doRequest() io.Reader + lastModify() time.Time +} + +type asset struct { + tag string + stream io.Reader + archive int +} + +func (a *asset) debug(template string, args ...interface{}) { + logger.Debugf("[%s] "+template, append([]interface{}{a.tag}, args...)...) +} + +func (a *asset) error(template string, args ...interface{}) { + logger.Errorf("[%s] "+template, append([]interface{}{a.tag}, args...)...) +} + +func (a *asset) Read(p []byte) (n int, err error) { + n, err = a.stream.Read(p) + if err != nil && err != io.EOF { // data stream broken + switch a.archive { + case notArchive: + a.error("Failed to read data stream -> %v", err) + case gzipArchive: + a.error("Failed to extract gzip archive -> %v", err) + case bzip2Archive: + a.error("Failed to extract bzip2 archive -> %v", err) + case xzArchive: + a.error("Failed to extract xz archive -> %v", err) + } + } + return n, err +} diff --git a/assets/extract.go b/assets/extract.go index 8c83400..94eebe6 100644 --- a/assets/extract.go +++ b/assets/extract.go @@ -1,7 +1,6 @@ package assets import ( - "XProxy/logger" "bytes" "compress/bzip2" "github.com/gabriel-vasile/mimetype" @@ -17,90 +16,65 @@ const ( xzArchive ) -// gzipExtract use to extract independent gzip archive data. -func gzipExtract(data []byte) ([]byte, error) { - logger.Debugf("Start extracting gzip archive -> %d bytes", len(data)) - reader, err := gzip.NewReader(bytes.NewReader(data)) +// gzipExtract use to extract independent gzip format stream. +func gzipExtract(a *asset) error { + a.debug("Start extracting gzip archive stream") + reader, err := gzip.NewReader(a.stream) if err != nil { - logger.Errorf("Failed to extract gzip archive -> %v", err) - return nil, err + a.error("Failed to extract gzip archive -> %v", err) + return err } - defer reader.Close() - - var buffer bytes.Buffer - size, err := reader.WriteTo(&buffer) - if err != nil { - logger.Errorf("Failed to handle gzip archive -> %v", err) - return nil, err - } - logger.Debugf("Extracted gzip archive successfully -> %d bytes", size) - return buffer.Bytes(), nil + a.stream, a.archive = reader, gzipArchive + return nil } -// bzip2Extract use to extract independent bzip2 archive data. -func bzip2Extract(data []byte) ([]byte, error) { - logger.Debugf("Start extracting bzip2 archive -> %d bytes", len(data)) - reader := bzip2.NewReader(bytes.NewReader(data)) +// bzip2Extract use to extract independent bzip2 format stream. +func bzip2Extract(a *asset) error { + a.debug("Start extracting bzip2 archive stream") + a.stream = bzip2.NewReader(a.stream) + a.archive = bzip2Archive + return nil +} - var buffer bytes.Buffer - size, err := io.Copy(&buffer, reader) +// xzExtract use to extract independent xz format stream. +func xzExtract(a *asset) error { + a.debug("Start extracting xz archive stream") + reader, err := xz.NewReader(a.stream) if err != nil { - logger.Errorf("Failed to extract bzip2 archive -> %v", err) - return nil, err + a.error("Failed to extract xz archive -> %v", err) + return err } - logger.Debugf("Extracted bzip2 archive successfully -> %d bytes", size) - return buffer.Bytes(), nil + a.stream, a.archive = reader, xzArchive + return nil } -// xzExtract use to extract independent xz archive data. -func xzExtract(data []byte) ([]byte, error) { - logger.Debugf("Start extracting xz archive -> %d bytes", len(data)) - reader, err := xz.NewReader(bytes.NewReader(data)) - if err != nil { - logger.Errorf("Failed to extract xz archive -> %v", err) - return nil, err +// tryExtract try to extract the data stream as a compressed format, and will +// return the original data if it cannot be determined. +func (a *asset) tryExtract() error { + if a.archive != notArchive { + return nil // already extracted } - var buffer bytes.Buffer - size, err := io.Copy(&buffer, reader) + header := bytes.NewBuffer(nil) + mime, err := mimetype.DetectReader(io.TeeReader(a.stream, header)) if err != nil { - logger.Errorf("Failed to handle xz archive -> %v", err) - return nil, err + a.error("Failed to detect data stream -> %v", err) + return err } - logger.Debugf("Extracted xz archive successfully -> %d bytes", size) - return buffer.Bytes(), nil -} + a.stream = io.MultiReader(header, a.stream) // recycle reader -// archiveType use to determine the type of archive file. -func archiveType(data []byte) uint { - mime := mimetype.Detect(data) - switch mime.String() { + switch mime.String() { // extract with detected mime type case "application/gzip": - logger.Debugf("Data detected as gzip format") - return gzipArchive + a.debug("Data detected as gzip format") + return gzipExtract(a) case "application/x-bzip2": - logger.Debugf("Data detected as bzip2 format") - return bzip2Archive + a.debug("Data detected as bzip2 format") + return bzip2Extract(a) case "application/x-xz": - logger.Debugf("Data detected as xz format") - return xzArchive - default: - logger.Debugf("Data detected as non-archive format -> `%s`", mime) - return notArchive - } -} - -// tryExtract will try to extract the data as a compressed format, and will -// return the original data if it cannot be determined. -func tryExtract(data []byte) ([]byte, error) { - switch archiveType(data) { - case gzipArchive: - return gzipExtract(data) - case bzip2Archive: - return bzip2Extract(data) - case xzArchive: - return xzExtract(data) + a.debug("Data detected as xz format") + return xzExtract(a) default: - return data, nil + a.debug("Data detected as non-archive format -> `%s`", mime) + return nil } } diff --git a/assets/extract_legacy.go b/assets/extract_legacy.go new file mode 100644 index 0000000..1f47532 --- /dev/null +++ b/assets/extract_legacy.go @@ -0,0 +1,200 @@ +package assets + +//const ( +// notArchive = iota +// gzipArchive +// bzip2Archive +// xzArchive +//) + +//type archive struct { +// id string +// size uint64 +// input io.Reader +//} +// +//func (a *archive) Read(p []byte) (n int, err error) { +// n, err = a.input.Read(p) +// if err == io.EOF { +// logger.Debugf("read %d bytes", n) +// +// logger.Debugf("reach ending") +// return n, err +// } +// if err != nil { +// logger.Errorf("Failed to extract archive -> %v", err) +// +// // TODO: do close process +// +// return n, err +// } +// +// logger.Debugf("read %d bytes", n) +// +// return n, err +//} + +// gzipExtract use to extract independent gzip archive stream. +//func gzipExtract(stream io.Reader) (io.Reader, error) { +// logger.Debugf("Start extracting gzip archive") +// reader, err := gzip.NewReader(stream) +// if err != nil { +// logger.Errorf("Failed to extract gzip archive -> %v", err) +// return nil, err +// } +// //defer reader.Close() +// return reader, nil +//} + +// bzip2Extract use to extract independent bzip2 archive stream. +//func bzip2Extract(stream io.Reader) (io.Reader, error) { +// logger.Debugf("Start extracting bzip2 archive") +// reader := bzip2.NewReader(stream) +// return reader, nil +//} + +// xzExtract use to extract independent xz archive stream. +//func xzExtract(stream io.Reader) (io.Reader, error) { +// logger.Debugf("Start extracting xz archive") +// reader, err := xz.NewReader(stream) +// if err != nil { +// logger.Errorf("Failed to extract xz archive -> %v", err) +// return nil, err +// } +// return reader, nil +//} + +// gzipExtract use to extract independent gzip archive data. +//func gzipExtract(data []byte) ([]byte, error) { +// logger.Debugf("Start extracting gzip archive -> %d bytes", len(data)) +// reader, err := gzip.NewReader(bytes.NewReader(data)) +// if err != nil { +// logger.Errorf("Failed to extract gzip archive -> %v", err) +// return nil, err +// } +// defer reader.Close() +// +// var buffer bytes.Buffer +// size, err := reader.WriteTo(&buffer) +// if err != nil { +// logger.Errorf("Failed to handle gzip archive -> %v", err) +// return nil, err +// } +// logger.Debugf("Extracted gzip archive successfully -> %d bytes", size) +// return buffer.Bytes(), nil +//} + +// bzip2Extract use to extract independent bzip2 archive data. +//func bzip2Extract(data []byte) ([]byte, error) { +// logger.Debugf("Start extracting bzip2 archive -> %d bytes", len(data)) +// reader := bzip2.NewReader(bytes.NewReader(data)) +// +// var buffer bytes.Buffer +// size, err := io.Copy(&buffer, reader) +// if err != nil { +// logger.Errorf("Failed to extract bzip2 archive -> %v", err) +// return nil, err +// } +// logger.Debugf("Extracted bzip2 archive successfully -> %d bytes", size) +// return buffer.Bytes(), nil +//} + +// xzExtract use to extract independent xz archive data. +//func xzExtract(data []byte) ([]byte, error) { +// logger.Debugf("Start extracting xz archive -> %d bytes", len(data)) +// reader, err := xz.NewReader(bytes.NewReader(data)) +// if err != nil { +// logger.Errorf("Failed to extract xz archive -> %v", err) +// return nil, err +// } +// +// var buffer bytes.Buffer +// size, err := io.Copy(&buffer, reader) +// if err != nil { +// logger.Errorf("Failed to handle xz archive -> %v", err) +// return nil, err +// } +// logger.Debugf("Extracted xz archive successfully -> %d bytes", size) +// return buffer.Bytes(), nil +//} + +//func recycleReader(input io.Reader) (mimeType string, recycled io.Reader, err error) { +// // header will store the bytes mimetype uses for detection. +// header := bytes.NewBuffer(nil) +// +// // After DetectReader, the data read from input is copied into header. +// mtype, err := mimetype.DetectReader(io.TeeReader(input, header)) +// if err != nil { +// fmt.Printf("error -> %v\n", err) +// return +// } +// +// // Concatenate back the header to the rest of the file. +// // recycled now contains the complete, original data. +// recycled = io.MultiReader(header, input) +// +// fmt.Printf("mime-type -> %v\n", mtype) +// +// return mtype.String(), recycled, err +//} + +// archiveType use to determine the type of archive file. +//func archiveType(stream io.Reader) uint { +// +// fmt.Println("start") +// +// mime, stream, _ := recycleReader(stream) +// +// fmt.Println("end") +// +// //mime := mimetype.Detect(data) +// switch mime { +// case "application/gzip": +// logger.Debugf("Data detected as gzip format") +// return gzipArchive +// case "application/x-bzip2": +// logger.Debugf("Data detected as bzip2 format") +// return bzip2Archive +// case "application/x-xz": +// logger.Debugf("Data detected as xz format") +// return xzArchive +// default: +// logger.Debugf("Data detected as non-archive format -> `%s`", mime) +// return notArchive +// } +//} + +// archiveType use to determine the type of archive file. +//func archiveType(data []byte) uint { +// mime := mimetype.Detect(data) +// switch mime.String() { +// case "application/gzip": +// logger.Debugf("Data detected as gzip format") +// return gzipArchive +// case "application/x-bzip2": +// logger.Debugf("Data detected as bzip2 format") +// return bzip2Archive +// case "application/x-xz": +// logger.Debugf("Data detected as xz format") +// return xzArchive +// default: +// logger.Debugf("Data detected as non-archive format -> `%s`", mime) +// return notArchive +// } +//} + +// tryExtract will try to extract the data as a compressed format, and will +// return the original data if it cannot be determined. +//func tryExtract(data []byte) ([]byte, error) { +// //switch archiveType(data) { +// //case gzipArchive: +// // //return gzipExtract(data) +// //case bzip2Archive: +// // //return bzip2Extract(data) +// //case xzArchive: +// // //return xzExtract(data) +// //default: +// // return data, nil +// //} +// return nil, nil +//} diff --git a/assets/extract_test.go b/assets/extract_test.go index e025af3..e231a84 100644 --- a/assets/extract_test.go +++ b/assets/extract_test.go @@ -1,12 +1,14 @@ package assets import ( + "XProxy/logger" "bytes" "compress/gzip" "crypto/rand" + "fmt" "github.com/dsnet/compress/bzip2" - "github.com/stretchr/testify/assert" "github.com/ulikunitz/xz" + "io" mrand "math/rand" "testing" ) @@ -27,7 +29,8 @@ func randInt(min int, max int) int { func randData() []byte { raw := randBytes(1024) - size := randInt(testMinSize, testMaxSize) + //size := randInt(testMinSize, testMaxSize) + size := 257 var buffer bytes.Buffer for i := 0; i < size; i++ { buffer.Write(raw) @@ -61,58 +64,178 @@ func xzCompress(data []byte) []byte { return buf.Bytes() } -func TestGzipExtract(t *testing.T) { - raw := randData() - gzOk := gzipCompress(raw) - gzErr := append(gzOk, randBytes(randInt(1, 16))...) - - ret, err := gzipExtract(gzOk) - assert.Nil(t, err) - assert.Equal(t, raw, ret) - _, err = gzipExtract(gzErr) - assert.NotNil(t, err) +//func TestGzipExtract(t *testing.T) { +// raw := randData() +// gzOk := gzipCompress(raw) +// gzErr := append(gzOk, randBytes(randInt(1, 16))...) +// +// ret, err := gzipExtract(gzOk) +// assert.Nil(t, err) +// assert.Equal(t, raw, ret) +// _, err = gzipExtract(gzErr) +// assert.NotNil(t, err) +//} + +//func TestBzip2Extract(t *testing.T) { +// raw := randData() +// bz2Ok := bzip2Compress(raw) +// bz2Err := append(bz2Ok, randBytes(randInt(1, 16))...) +// +// ret, err := bzip2Extract(bz2Ok) +// assert.Nil(t, err) +// assert.Equal(t, raw, ret) +// _, err = bzip2Extract(bz2Err) +// assert.NotNil(t, err) +//} + +//func TestXzExtract(t *testing.T) { +// raw := randData() +// xzOk := xzCompress(raw) +// xzErr := append(xzOk, randBytes(randInt(1, 16))...) +// +// ret, err := xzExtract(xzOk) +// assert.Nil(t, err) +// assert.Equal(t, raw, ret) +// _, err = xzExtract(xzErr) +// assert.NotNil(t, err) +//} + +//func TestExtract(t *testing.T) { +// raw := randData() +// +// ret, err := tryExtract(raw) +// assert.Nil(t, err) +// assert.Equal(t, raw, ret) +// +// ret, err = tryExtract(gzipCompress(raw)) +// assert.Nil(t, err) +// assert.Equal(t, raw, ret) +// +// ret, err = tryExtract(bzip2Compress(raw)) +// assert.Nil(t, err) +// assert.Equal(t, raw, ret) +// +// ret, err = tryExtract(xzCompress(raw)) +// assert.Nil(t, err) +// assert.Equal(t, raw, ret) +//} + +func Test_demo(t *testing.T) { + //data := gzipCompress(randData()) + //data := randData() + //data = append(data, randBytes(randInt(1, 16))...) + //fmt.Printf("origin gzip size -> %d\n", len(data)) + + //data := randData() + //data := bzip2Compress(randData()) + //data = append(data, randBytes(randInt(1, 16))...) + //fmt.Printf("origin bzip2 size -> %d\n", len(data)) + + //data := randData() + data := xzCompress(randData()) + data = append(data, randBytes(randInt(1, 16))...) + fmt.Printf("origin xz size -> %d\n", len(data)) + + //buffer := bytes.NewReader(data) + + //reader, err := gzipExtract(buffer) + //reader, err := bzip2Extract(buffer) + //reader, err := xzExtract(buffer) + + //archiveType(buffer) + + //fmt.Printf("%v\n", err) + // + //buf := make([]byte, 1024*1024*4) + //for { + // n, err := reader.Read(buf) + // + // if err == io.EOF { + // fmt.Println("reach stream ending") + // break + // } + // if err != nil { + // fmt.Printf("stream error -> %v", err) + // return + // } + // + // fmt.Printf("get %d bytes\n", n) + // + //} + } -func TestBzip2Extract(t *testing.T) { - raw := randData() - bz2Ok := bzip2Compress(raw) - bz2Err := append(bz2Ok, randBytes(randInt(1, 16))...) - - ret, err := bzip2Extract(bz2Ok) - assert.Nil(t, err) - assert.Equal(t, raw, ret) - _, err = bzip2Extract(bz2Err) - assert.NotNil(t, err) +func init() { + logger.SetLevel(logger.DebugLevel) } -func TestXzExtract(t *testing.T) { - raw := randData() - xzOk := xzCompress(raw) - xzErr := append(xzOk, randBytes(randInt(1, 16))...) - - ret, err := xzExtract(xzOk) - assert.Nil(t, err) - assert.Equal(t, raw, ret) - _, err = xzExtract(xzErr) - assert.NotNil(t, err) +//func Test_archive(t *testing.T) { +// data := gzipCompress(randData()) +// fmt.Printf("origin gzip size -> %d\n", len(data)) +// +// kk := asset{ +// tag: "A7B932FD11", +// stream: bytes.NewReader(data), +// } +// +// kk.gzipExtract() +// +// buf := make([]byte, 4*1024*1024) +// for { +// n, err := kk.stream.Read(buf) +// +// if err == io.EOF { +// fmt.Printf("get %d bytes\n", n) +// fmt.Printf("reach stream ending\n") +// return +// } +// +// if err != nil { +// fmt.Printf("stream error -> %v\n", err) +// return +// } +// fmt.Printf("get %d bytes\n", n) +// } +// +//} + +type brokenReader struct { + time int } -func TestExtract(t *testing.T) { - raw := randData() +func (b *brokenReader) Read(p []byte) (n int, err error) { + b.time += 1 + + fmt.Printf("Read time = %d\n", b.time) - ret, err := tryExtract(raw) - assert.Nil(t, err) - assert.Equal(t, raw, ret) + if b.time < 16 { + return 1024, nil + } else { + return 0, io.ErrShortWrite + } + +} - ret, err = tryExtract(gzipCompress(raw)) - assert.Nil(t, err) - assert.Equal(t, raw, ret) +func Test_extract(t *testing.T) { - ret, err = tryExtract(bzip2Compress(raw)) - assert.Nil(t, err) - assert.Equal(t, raw, ret) + raw := randData() + data := gzipCompress(raw) + data = append(data, randBytes(3)...) + fmt.Printf("origin data size -> %d\n", len(data)) + + as := asset{ + tag: "DEMO", + //stream: &brokenReader{time: 0}, + stream: bytes.NewReader(data), + } + if err := as.tryExtract(); err != nil { + fmt.Printf("try extract error -> %v\n", err) + } else { + if n, err := io.Copy(io.Discard, &as); err != nil { + fmt.Printf("data stream error -> %v\n", err) + } else { + fmt.Printf("data stream complete -> %d bytes\n", n) + } + } - ret, err = tryExtract(xzCompress(raw)) - assert.Nil(t, err) - assert.Equal(t, raw, ret) } diff --git a/assets/update.go b/assets/update.go index 799e60b..8adcbca 100644 --- a/assets/update.go +++ b/assets/update.go @@ -49,9 +49,9 @@ func updateRemoteAsset(file string, url string, proxy *urlpkg.URL) error { logger.Errorf("Failed to download remote asset `%s`", url) return err } - if asset, err = tryExtract(asset); err != nil { - return err - } + //if asset, err = tryExtract(asset); err != nil { + // return err + //} if err := saveAsset(file, asset, date); err != nil { return err } @@ -79,9 +79,9 @@ func updateLocalAsset(file string, src string) error { logger.Errorf("Failed to read local asset -> %v", err) return err } - if asset, err = tryExtract(asset); err != nil { - return err - } + //if asset, err = tryExtract(asset); err != nil { + // return err + //} if err := saveAsset(file, asset, &date); err != nil { return err }