From 5727056ea109eb04ee535b981349cdfb44df9fae Mon Sep 17 00:00:00 2001 From: wxiaoguang Date: Tue, 28 Mar 2023 23:10:24 +0800 Subject: [PATCH] Make minio package support legacy MD5 checksum (#23768) A feedback from discord: https://discord.com/channels/322538954119184384/561007778139734027/1090185427115319386 Some storages like: * https://developers.cloudflare.com/r2/api/s3/api/ * https://www.backblaze.com/b2/docs/s3_compatible_api.html They do not support "x-amz-checksum-algorithm" header But minio recently uses that header with CRC32C by default. So we have to tell minio to use legacy MD5 checksum. I guess this needs to be backported because IIRC we 1.19 and 1.20 are using similar minio package. The minio package code for SendContentMD5 looks like this:
image
--- cmd/migrate_storage.go | 27 ++++++++---- custom/conf/app.example.ini | 9 ++-- .../config-cheat-sheet.en-us.md | 1 + modules/lfs/content_store.go | 42 +++++++++++++------ modules/setting/storage.go | 1 + modules/storage/minio.go | 21 ++++++++-- tests/pgsql.ini.tmpl | 1 + 7 files changed, 76 insertions(+), 26 deletions(-) diff --git a/cmd/migrate_storage.go b/cmd/migrate_storage.go index dfa7212e5b..9798913705 100644 --- a/cmd/migrate_storage.go +++ b/cmd/migrate_storage.go @@ -72,12 +72,21 @@ var CmdMigrateStorage = cli.Command{ cli.StringFlag{ Name: "minio-base-path", Value: "", - Usage: "Minio storage basepath on the bucket", + Usage: "Minio storage base path on the bucket", }, cli.BoolFlag{ Name: "minio-use-ssl", Usage: "Enable SSL for minio", }, + cli.BoolFlag{ + Name: "minio-insecure-skip-verify", + Usage: "Skip SSL verification", + }, + cli.StringFlag{ + Name: "minio-checksum-algorithm", + Value: "", + Usage: "Minio checksum algorithm (default/md5)", + }, }, } @@ -168,13 +177,15 @@ func runMigrateStorage(ctx *cli.Context) error { dstStorage, err = storage.NewMinioStorage( stdCtx, storage.MinioStorageConfig{ - Endpoint: ctx.String("minio-endpoint"), - AccessKeyID: ctx.String("minio-access-key-id"), - SecretAccessKey: ctx.String("minio-secret-access-key"), - Bucket: ctx.String("minio-bucket"), - Location: ctx.String("minio-location"), - BasePath: ctx.String("minio-base-path"), - UseSSL: ctx.Bool("minio-use-ssl"), + Endpoint: ctx.String("minio-endpoint"), + AccessKeyID: ctx.String("minio-access-key-id"), + SecretAccessKey: ctx.String("minio-secret-access-key"), + Bucket: ctx.String("minio-bucket"), + Location: ctx.String("minio-location"), + BasePath: ctx.String("minio-base-path"), + UseSSL: ctx.Bool("minio-use-ssl"), + InsecureSkipVerify: ctx.Bool("minio-insecure-skip-verify"), + ChecksumAlgorithm: ctx.String("minio-checksum-algorithm"), }) default: return fmt.Errorf("unsupported storage type: %s", ctx.String("storage")) diff --git a/custom/conf/app.example.ini b/custom/conf/app.example.ini index d1cfcd70e5..a299255d17 100644 --- a/custom/conf/app.example.ini +++ b/custom/conf/app.example.ini @@ -583,15 +583,15 @@ ROUTER = console ;; * In request Header: X-Request-ID: test-id-123 ;; * Configuration in app.ini: REQUEST_ID_HEADERS = X-Request-ID ;; * Print in log: 127.0.0.1:58384 - - [14/Feb/2023:16:33:51 +0800] "test-id-123" -;; -;; If you configure more than one in the .ini file, it will match in the order of configuration, +;; +;; If you configure more than one in the .ini file, it will match in the order of configuration, ;; and the first match will be finally printed in the log. ;; * E.g: ;; * In reuqest Header: X-Trace-ID: trace-id-1q2w3e4r ;; * Configuration in app.ini: REQUEST_ID_HEADERS = X-Request-ID, X-Trace-ID, X-Req-ID ;; * Print in log: 127.0.0.1:58384 - - [14/Feb/2023:16:33:51 +0800] "trace-id-1q2w3e4r" ;; -;; REQUEST_ID_HEADERS = +;; REQUEST_ID_HEADERS = ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; @@ -1886,6 +1886,9 @@ ROUTER = console ;; ;; Minio skip SSL verification available when STORAGE_TYPE is `minio` ;MINIO_INSECURE_SKIP_VERIFY = false +;; +;; Minio checksum algorithm: default (for MinIO or AWS S3) or md5 (for Cloudflare or Backblaze) +;MINIO_CHECKSUM_ALGORITHM = default ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; diff --git a/docs/content/doc/administration/config-cheat-sheet.en-us.md b/docs/content/doc/administration/config-cheat-sheet.en-us.md index 7f31a27f15..e12d96147f 100644 --- a/docs/content/doc/administration/config-cheat-sheet.en-us.md +++ b/docs/content/doc/administration/config-cheat-sheet.en-us.md @@ -855,6 +855,7 @@ Default templates for project boards: - `MINIO_BASE_PATH`: **attachments/**: Minio base path on the bucket only available when STORAGE_TYPE is `minio` - `MINIO_USE_SSL`: **false**: Minio enabled ssl only available when STORAGE_TYPE is `minio` - `MINIO_INSECURE_SKIP_VERIFY`: **false**: Minio skip SSL verification available when STORAGE_TYPE is `minio` +- `MINIO_CHECKSUM_ALGORITHM`: **default**: Minio checksum algorithm: `default` (for MinIO or AWS S3) or `md5` (for Cloudflare or Backblaze) ## Log (`log`) diff --git a/modules/lfs/content_store.go b/modules/lfs/content_store.go index a4ae21bfd6..53fac4ab85 100644 --- a/modules/lfs/content_store.go +++ b/modules/lfs/content_store.go @@ -60,15 +60,22 @@ func (s *ContentStore) Put(pointer Pointer, r io.Reader) error { return err } - // This shouldn't happen but it is sensible to test - if written != pointer.Size { - if err := s.Delete(p); err != nil { - log.Error("Cleaning the LFS OID[%s] failed: %v", pointer.Oid, err) - } - return ErrSizeMismatch + // check again whether there is any error during the Save operation + // because some errors might be ignored by the Reader's caller + if wrappedRd.lastError != nil && !errors.Is(wrappedRd.lastError, io.EOF) { + err = wrappedRd.lastError + } else if written != pointer.Size { + err = ErrSizeMismatch } - return nil + // if the upload failed, try to delete the file + if err != nil { + if errDel := s.Delete(p); errDel != nil { + log.Error("Cleaning the LFS OID[%s] failed: %v", pointer.Oid, errDel) + } + } + + return err } // Exists returns true if the object exists in the content store. @@ -109,6 +116,17 @@ type hashingReader struct { expectedSize int64 hash hash.Hash expectedHash string + lastError error +} + +// recordError records the last error during the Save operation +// Some callers of the Reader doesn't respect the returned "err" +// For example, MinIO's Put will ignore errors if the written size could equal to expected size +// So we must remember the error by ourselves, +// and later check again whether ErrSizeMismatch or ErrHashMismatch occurs during the Save operation +func (r *hashingReader) recordError(err error) error { + r.lastError = err + return err } func (r *hashingReader) Read(b []byte) (int, error) { @@ -118,22 +136,22 @@ func (r *hashingReader) Read(b []byte) (int, error) { r.currentSize += int64(n) wn, werr := r.hash.Write(b[:n]) if wn != n || werr != nil { - return n, werr + return n, r.recordError(werr) } } - if err != nil && err == io.EOF { + if errors.Is(err, io.EOF) || r.currentSize >= r.expectedSize { if r.currentSize != r.expectedSize { - return n, ErrSizeMismatch + return n, r.recordError(ErrSizeMismatch) } shaStr := hex.EncodeToString(r.hash.Sum(nil)) if shaStr != r.expectedHash { - return n, ErrHashMismatch + return n, r.recordError(ErrHashMismatch) } } - return n, err + return n, r.recordError(err) } func newHashingReader(expectedSize int64, expectedHash string, reader io.Reader) *hashingReader { diff --git a/modules/setting/storage.go b/modules/setting/storage.go index 4d401614e4..50d4c8439e 100644 --- a/modules/setting/storage.go +++ b/modules/setting/storage.go @@ -42,6 +42,7 @@ func getStorage(rootCfg ConfigProvider, name, typ string, targetSec *ini.Section sec.Key("MINIO_LOCATION").MustString("us-east-1") sec.Key("MINIO_USE_SSL").MustBool(false) sec.Key("MINIO_INSECURE_SKIP_VERIFY").MustBool(false) + sec.Key("MINIO_CHECKSUM_ALGORITHM").MustString("default") if targetSec == nil { targetSec, _ = rootCfg.NewSection(name) diff --git a/modules/storage/minio.go b/modules/storage/minio.go index 5c67dbf26a..250f17827f 100644 --- a/modules/storage/minio.go +++ b/modules/storage/minio.go @@ -6,6 +6,7 @@ package storage import ( "context" "crypto/tls" + "fmt" "io" "net/http" "net/url" @@ -53,10 +54,12 @@ type MinioStorageConfig struct { BasePath string `ini:"MINIO_BASE_PATH"` UseSSL bool `ini:"MINIO_USE_SSL"` InsecureSkipVerify bool `ini:"MINIO_INSECURE_SKIP_VERIFY"` + ChecksumAlgorithm string `ini:"MINIO_CHECKSUM_ALGORITHM"` } // MinioStorage returns a minio bucket storage type MinioStorage struct { + cfg *MinioStorageConfig ctx context.Context client *minio.Client bucket string @@ -91,6 +94,10 @@ func NewMinioStorage(ctx context.Context, cfg interface{}) (ObjectStorage, error } config := configInterface.(MinioStorageConfig) + if config.ChecksumAlgorithm != "" && config.ChecksumAlgorithm != "default" && config.ChecksumAlgorithm != "md5" { + return nil, fmt.Errorf("invalid minio checksum algorithm: %s", config.ChecksumAlgorithm) + } + log.Info("Creating Minio storage at %s:%s with base path %s", config.Endpoint, config.Bucket, config.BasePath) minioClient, err := minio.New(config.Endpoint, &minio.Options{ @@ -113,6 +120,7 @@ func NewMinioStorage(ctx context.Context, cfg interface{}) (ObjectStorage, error } return &MinioStorage{ + cfg: &config, ctx: ctx, client: minioClient, bucket: config.Bucket, @@ -124,7 +132,7 @@ func (m *MinioStorage) buildMinioPath(p string) string { return util.PathJoinRelX(m.basePath, p) } -// Open open a file +// Open opens a file func (m *MinioStorage) Open(path string) (Object, error) { opts := minio.GetObjectOptions{} object, err := m.client.GetObject(m.ctx, m.bucket, m.buildMinioPath(path), opts) @@ -134,7 +142,7 @@ func (m *MinioStorage) Open(path string) (Object, error) { return &minioObject{object}, nil } -// Save save a file to minio +// Save saves a file to minio func (m *MinioStorage) Save(path string, r io.Reader, size int64) (int64, error) { uploadInfo, err := m.client.PutObject( m.ctx, @@ -142,7 +150,14 @@ func (m *MinioStorage) Save(path string, r io.Reader, size int64) (int64, error) m.buildMinioPath(path), r, size, - minio.PutObjectOptions{ContentType: "application/octet-stream"}, + minio.PutObjectOptions{ + ContentType: "application/octet-stream", + // some storages like: + // * https://developers.cloudflare.com/r2/api/s3/api/ + // * https://www.backblaze.com/b2/docs/s3_compatible_api.html + // do not support "x-amz-checksum-algorithm" header, so use legacy MD5 checksum + SendContentMd5: m.cfg.ChecksumAlgorithm == "md5", + }, ) if err != nil { return 0, convertMinioErr(err) diff --git a/tests/pgsql.ini.tmpl b/tests/pgsql.ini.tmpl index fbfbae7c68..15349aa4c2 100644 --- a/tests/pgsql.ini.tmpl +++ b/tests/pgsql.ini.tmpl @@ -125,6 +125,7 @@ MINIO_SECRET_ACCESS_KEY = 12345678 MINIO_BUCKET = gitea MINIO_LOCATION = us-east-1 MINIO_USE_SSL = false +MINIO_CHECKSUM_ALGORITHM = md5 [packages] ENABLED = true