diff options
Diffstat (limited to 'modules/lfs')
-rw-r--r-- | modules/lfs/LICENSE | 20 | ||||
-rw-r--r-- | modules/lfs/client.go | 32 | ||||
-rw-r--r-- | modules/lfs/client_test.go | 21 | ||||
-rw-r--r-- | modules/lfs/content_store.go | 163 | ||||
-rw-r--r-- | modules/lfs/endpoint.go | 107 | ||||
-rw-r--r-- | modules/lfs/endpoint_test.go | 74 | ||||
-rw-r--r-- | modules/lfs/filesystem_client.go | 88 | ||||
-rw-r--r-- | modules/lfs/http_client.go | 259 | ||||
-rw-r--r-- | modules/lfs/http_client_test.go | 377 | ||||
-rw-r--r-- | modules/lfs/pointer.go | 129 | ||||
-rw-r--r-- | modules/lfs/pointer_scanner.go | 109 | ||||
-rw-r--r-- | modules/lfs/pointer_test.go | 103 | ||||
-rw-r--r-- | modules/lfs/shared.go | 115 | ||||
-rw-r--r-- | modules/lfs/transferadapter.go | 89 | ||||
-rw-r--r-- | modules/lfs/transferadapter_test.go | 172 |
15 files changed, 1858 insertions, 0 deletions
diff --git a/modules/lfs/LICENSE b/modules/lfs/LICENSE new file mode 100644 index 0000000..0a94a80 --- /dev/null +++ b/modules/lfs/LICENSE @@ -0,0 +1,20 @@ +Copyright (c) 2016 The Gitea Authors +Copyright (c) GitHub, Inc. and LFS Test Server contributors + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/modules/lfs/client.go b/modules/lfs/client.go new file mode 100644 index 0000000..f810e5c --- /dev/null +++ b/modules/lfs/client.go @@ -0,0 +1,32 @@ +// Copyright 2021 The Gitea Authors. All rights reserved. +// SPDX-License-Identifier: MIT + +package lfs + +import ( + "context" + "io" + "net/http" + "net/url" +) + +// DownloadCallback gets called for every requested LFS object to process its content +type DownloadCallback func(p Pointer, content io.ReadCloser, objectError error) error + +// UploadCallback gets called for every requested LFS object to provide its content +type UploadCallback func(p Pointer, objectError error) (io.ReadCloser, error) + +// Client is used to communicate with a LFS source +type Client interface { + BatchSize() int + Download(ctx context.Context, objects []Pointer, callback DownloadCallback) error + Upload(ctx context.Context, objects []Pointer, callback UploadCallback) error +} + +// NewClient creates a LFS client +func NewClient(endpoint *url.URL, httpTransport *http.Transport) Client { + if endpoint.Scheme == "file" { + return newFilesystemClient(endpoint) + } + return newHTTPClient(endpoint, httpTransport) +} diff --git a/modules/lfs/client_test.go b/modules/lfs/client_test.go new file mode 100644 index 0000000..a136930 --- /dev/null +++ b/modules/lfs/client_test.go @@ -0,0 +1,21 @@ +// Copyright 2021 The Gitea Authors. All rights reserved. +// SPDX-License-Identifier: MIT + +package lfs + +import ( + "net/url" + "testing" + + "github.com/stretchr/testify/assert" +) + +func TestNewClient(t *testing.T) { + u, _ := url.Parse("file:///test") + c := NewClient(u, nil) + assert.IsType(t, &FilesystemClient{}, c) + + u, _ = url.Parse("https://test.com/lfs") + c = NewClient(u, nil) + assert.IsType(t, &HTTPClient{}, c) +} diff --git a/modules/lfs/content_store.go b/modules/lfs/content_store.go new file mode 100644 index 0000000..0d9c0c9 --- /dev/null +++ b/modules/lfs/content_store.go @@ -0,0 +1,163 @@ +// Copyright 2020 The Gitea Authors. All rights reserved. +// SPDX-License-Identifier: MIT + +package lfs + +import ( + "crypto/sha256" + "encoding/hex" + "errors" + "hash" + "io" + "os" + + "code.gitea.io/gitea/modules/log" + "code.gitea.io/gitea/modules/storage" +) + +var ( + // ErrHashMismatch occurs if the content has does not match OID + ErrHashMismatch = errors.New("content hash does not match OID") + // ErrSizeMismatch occurs if the content size does not match + ErrSizeMismatch = errors.New("content size does not match") +) + +// ContentStore provides a simple file system based storage. +type ContentStore struct { + storage.ObjectStorage +} + +// NewContentStore creates the default ContentStore +func NewContentStore() *ContentStore { + contentStore := &ContentStore{ObjectStorage: storage.LFS} + return contentStore +} + +// Get takes a Meta object and retrieves the content from the store, returning +// it as an io.ReadSeekCloser. +func (s *ContentStore) Get(pointer Pointer) (storage.Object, error) { + f, err := s.Open(pointer.RelativePath()) + if err != nil { + log.Error("Whilst trying to read LFS OID[%s]: Unable to open Error: %v", pointer.Oid, err) + return nil, err + } + return f, err +} + +// Put takes a Meta object and an io.Reader and writes the content to the store. +func (s *ContentStore) Put(pointer Pointer, r io.Reader) error { + p := pointer.RelativePath() + + // Wrap the provided reader with an inline hashing and size checker + wrappedRd := newHashingReader(pointer.Size, pointer.Oid, r) + + // now pass the wrapped reader to Save - if there is a size mismatch or hash mismatch then + // the errors returned by the newHashingReader should percolate up to here + written, err := s.Save(p, wrappedRd, pointer.Size) + if err != nil { + log.Error("Whilst putting LFS OID[%s]: Failed to copy to tmpPath: %s Error: %v", pointer.Oid, p, err) + return err + } + + // check again whether there is any error during the Save operation + // because some errors might be ignored by the Reader's caller + if wrappedRd.lastError != nil && !errors.Is(wrappedRd.lastError, io.EOF) { + err = wrappedRd.lastError + } else if written != pointer.Size { + err = ErrSizeMismatch + } + + // if the upload failed, try to delete the file + if err != nil { + if errDel := s.Delete(p); errDel != nil { + log.Error("Cleaning the LFS OID[%s] failed: %v", pointer.Oid, errDel) + } + } + + return err +} + +// Exists returns true if the object exists in the content store. +func (s *ContentStore) Exists(pointer Pointer) (bool, error) { + _, err := s.ObjectStorage.Stat(pointer.RelativePath()) + if err != nil { + if os.IsNotExist(err) { + return false, nil + } + return false, err + } + return true, nil +} + +// Verify returns true if the object exists in the content store and size is correct. +func (s *ContentStore) Verify(pointer Pointer) (bool, error) { + p := pointer.RelativePath() + fi, err := s.ObjectStorage.Stat(p) + if os.IsNotExist(err) || (err == nil && fi.Size() != pointer.Size) { + return false, nil + } else if err != nil { + log.Error("Unable stat file: %s for LFS OID[%s] Error: %v", p, pointer.Oid, err) + return false, err + } + + return true, nil +} + +// ReadMetaObject will read a git_model.LFSMetaObject and return a reader +func ReadMetaObject(pointer Pointer) (io.ReadSeekCloser, error) { + contentStore := NewContentStore() + return contentStore.Get(pointer) +} + +type hashingReader struct { + internal io.Reader + currentSize int64 + expectedSize int64 + hash hash.Hash + expectedHash string + lastError error +} + +// recordError records the last error during the Save operation +// Some callers of the Reader doesn't respect the returned "err" +// For example, MinIO's Put will ignore errors if the written size could equal to expected size +// So we must remember the error by ourselves, +// and later check again whether ErrSizeMismatch or ErrHashMismatch occurs during the Save operation +func (r *hashingReader) recordError(err error) error { + r.lastError = err + return err +} + +func (r *hashingReader) Read(b []byte) (int, error) { + n, err := r.internal.Read(b) + + if n > 0 { + r.currentSize += int64(n) + wn, werr := r.hash.Write(b[:n]) + if wn != n || werr != nil { + return n, r.recordError(werr) + } + } + + if errors.Is(err, io.EOF) || r.currentSize >= r.expectedSize { + if r.currentSize != r.expectedSize { + return n, r.recordError(ErrSizeMismatch) + } + + shaStr := hex.EncodeToString(r.hash.Sum(nil)) + if shaStr != r.expectedHash { + return n, r.recordError(ErrHashMismatch) + } + } + + return n, r.recordError(err) +} + +func newHashingReader(expectedSize int64, expectedHash string, reader io.Reader) *hashingReader { + return &hashingReader{ + internal: reader, + expectedSize: expectedSize, + expectedHash: expectedHash, + hash: sha256.New(), + } +} diff --git a/modules/lfs/endpoint.go b/modules/lfs/endpoint.go new file mode 100644 index 0000000..97bd7d4 --- /dev/null +++ b/modules/lfs/endpoint.go @@ -0,0 +1,107 @@ +// Copyright 2021 The Gitea Authors. All rights reserved. +// SPDX-License-Identifier: MIT + +package lfs + +import ( + "net/url" + "os" + "path" + "path/filepath" + "strings" + + "code.gitea.io/gitea/modules/log" + "code.gitea.io/gitea/modules/util" +) + +// DetermineEndpoint determines an endpoint from the clone url or uses the specified LFS url. +func DetermineEndpoint(cloneurl, lfsurl string) *url.URL { + if len(lfsurl) > 0 { + return endpointFromURL(lfsurl) + } + return endpointFromCloneURL(cloneurl) +} + +func endpointFromCloneURL(rawurl string) *url.URL { + ep := endpointFromURL(rawurl) + if ep == nil { + return ep + } + + ep.Path = strings.TrimSuffix(ep.Path, "/") + + if ep.Scheme == "file" { + return ep + } + + if path.Ext(ep.Path) == ".git" { + ep.Path += "/info/lfs" + } else { + ep.Path += ".git/info/lfs" + } + + return ep +} + +func endpointFromURL(rawurl string) *url.URL { + if strings.HasPrefix(rawurl, "/") { + return endpointFromLocalPath(rawurl) + } + + u, err := url.Parse(rawurl) + if err != nil { + log.Error("lfs.endpointFromUrl: %v", err) + return nil + } + + switch u.Scheme { + case "http", "https": + return u + case "git": + u.Scheme = "https" + return u + case "ssh": + u.Scheme = "https" + u.User = nil + return u + case "file": + return u + default: + if _, err := os.Stat(rawurl); err == nil { + return endpointFromLocalPath(rawurl) + } + + log.Error("lfs.endpointFromUrl: unknown url") + return nil + } +} + +func endpointFromLocalPath(path string) *url.URL { + var slash string + if abs, err := filepath.Abs(path); err == nil { + if !strings.HasPrefix(abs, "/") { + slash = "/" + } + path = abs + } + + var gitpath string + if filepath.Base(path) == ".git" { + gitpath = path + path = filepath.Dir(path) + } else { + gitpath = filepath.Join(path, ".git") + } + + if _, err := os.Stat(gitpath); err == nil { + path = gitpath + } else if _, err := os.Stat(path); err != nil { + return nil + } + + path = "file://" + slash + util.PathEscapeSegments(filepath.ToSlash(path)) + + u, _ := url.Parse(path) + + return u +} diff --git a/modules/lfs/endpoint_test.go b/modules/lfs/endpoint_test.go new file mode 100644 index 0000000..118abe2 --- /dev/null +++ b/modules/lfs/endpoint_test.go @@ -0,0 +1,74 @@ +// Copyright 2021 The Gitea Authors. All rights reserved. +// SPDX-License-Identifier: MIT + +package lfs + +import ( + "net/url" + "testing" + + "github.com/stretchr/testify/assert" +) + +func str2url(raw string) *url.URL { + u, _ := url.Parse(raw) + return u +} + +func TestDetermineEndpoint(t *testing.T) { + // Test cases + cases := []struct { + cloneurl string + lfsurl string + expected *url.URL + }{ + // case 0 + { + cloneurl: "", + lfsurl: "", + expected: nil, + }, + // case 1 + { + cloneurl: "https://git.com/repo", + lfsurl: "", + expected: str2url("https://git.com/repo.git/info/lfs"), + }, + // case 2 + { + cloneurl: "https://git.com/repo.git", + lfsurl: "", + expected: str2url("https://git.com/repo.git/info/lfs"), + }, + // case 3 + { + cloneurl: "", + lfsurl: "https://gitlfs.com/repo", + expected: str2url("https://gitlfs.com/repo"), + }, + // case 4 + { + cloneurl: "https://git.com/repo.git", + lfsurl: "https://gitlfs.com/repo", + expected: str2url("https://gitlfs.com/repo"), + }, + // case 5 + { + cloneurl: "git://git.com/repo.git", + lfsurl: "", + expected: str2url("https://git.com/repo.git/info/lfs"), + }, + // case 6 + { + cloneurl: "", + lfsurl: "git://gitlfs.com/repo", + expected: str2url("https://gitlfs.com/repo"), + }, + } + + for n, c := range cases { + ep := DetermineEndpoint(c.cloneurl, c.lfsurl) + + assert.Equal(t, c.expected, ep, "case %d: error should match", n) + } +} diff --git a/modules/lfs/filesystem_client.go b/modules/lfs/filesystem_client.go new file mode 100644 index 0000000..71bef5c --- /dev/null +++ b/modules/lfs/filesystem_client.go @@ -0,0 +1,88 @@ +// Copyright 2021 The Gitea Authors. All rights reserved. +// SPDX-License-Identifier: MIT + +package lfs + +import ( + "context" + "io" + "net/url" + "os" + "path/filepath" + + "code.gitea.io/gitea/modules/util" +) + +// FilesystemClient is used to read LFS data from a filesystem path +type FilesystemClient struct { + lfsDir string +} + +// BatchSize returns the preferred size of batchs to process +func (c *FilesystemClient) BatchSize() int { + return 1 +} + +func newFilesystemClient(endpoint *url.URL) *FilesystemClient { + path, _ := util.FileURLToPath(endpoint) + lfsDir := filepath.Join(path, "lfs", "objects") + return &FilesystemClient{lfsDir} +} + +func (c *FilesystemClient) objectPath(oid string) string { + return filepath.Join(c.lfsDir, oid[0:2], oid[2:4], oid) +} + +// Download reads the specific LFS object from the target path +func (c *FilesystemClient) Download(ctx context.Context, objects []Pointer, callback DownloadCallback) error { + for _, object := range objects { + p := Pointer{object.Oid, object.Size} + + objectPath := c.objectPath(p.Oid) + + f, err := os.Open(objectPath) + if err != nil { + return err + } + defer f.Close() + if err := callback(p, f, nil); err != nil { + return err + } + } + return nil +} + +// Upload writes the specific LFS object to the target path +func (c *FilesystemClient) Upload(ctx context.Context, objects []Pointer, callback UploadCallback) error { + for _, object := range objects { + p := Pointer{object.Oid, object.Size} + + objectPath := c.objectPath(p.Oid) + + if err := os.MkdirAll(filepath.Dir(objectPath), os.ModePerm); err != nil { + return err + } + + content, err := callback(p, nil) + if err != nil { + return err + } + + err = func() error { + defer content.Close() + + f, err := os.Create(objectPath) + if err != nil { + return err + } + defer f.Close() + _, err = io.Copy(f, content) + + return err + }() + if err != nil { + return err + } + } + return nil +} diff --git a/modules/lfs/http_client.go b/modules/lfs/http_client.go new file mode 100644 index 0000000..4859fe6 --- /dev/null +++ b/modules/lfs/http_client.go @@ -0,0 +1,259 @@ +// Copyright 2021 The Gitea Authors. All rights reserved. +// SPDX-License-Identifier: MIT + +package lfs + +import ( + "bytes" + "context" + "errors" + "fmt" + "io" + "net/http" + "net/url" + "strings" + + "code.gitea.io/gitea/modules/json" + "code.gitea.io/gitea/modules/log" + "code.gitea.io/gitea/modules/proxy" +) + +const httpBatchSize = 20 + +// HTTPClient is used to communicate with the LFS server +// https://github.com/git-lfs/git-lfs/blob/main/docs/api/batch.md +type HTTPClient struct { + client *http.Client + endpoint string + transfers map[string]TransferAdapter +} + +// BatchSize returns the preferred size of batchs to process +func (c *HTTPClient) BatchSize() int { + return httpBatchSize +} + +func newHTTPClient(endpoint *url.URL, httpTransport *http.Transport) *HTTPClient { + if httpTransport == nil { + httpTransport = &http.Transport{ + Proxy: proxy.Proxy(), + } + } + + hc := &http.Client{ + Transport: httpTransport, + } + + basic := &BasicTransferAdapter{hc} + client := &HTTPClient{ + client: hc, + endpoint: strings.TrimSuffix(endpoint.String(), "/"), + transfers: map[string]TransferAdapter{ + basic.Name(): basic, + }, + } + + return client +} + +func (c *HTTPClient) transferNames() []string { + keys := make([]string, len(c.transfers)) + i := 0 + for k := range c.transfers { + keys[i] = k + i++ + } + return keys +} + +func (c *HTTPClient) batch(ctx context.Context, operation string, objects []Pointer) (*BatchResponse, error) { + log.Trace("BATCH operation with objects: %v", objects) + + url := fmt.Sprintf("%s/objects/batch", c.endpoint) + + request := &BatchRequest{operation, c.transferNames(), nil, objects} + payload := new(bytes.Buffer) + err := json.NewEncoder(payload).Encode(request) + if err != nil { + log.Error("Error encoding json: %v", err) + return nil, err + } + + req, err := createRequest(ctx, http.MethodPost, url, map[string]string{"Content-Type": MediaType}, payload) + if err != nil { + return nil, err + } + + res, err := performRequest(ctx, c.client, req) + if err != nil { + return nil, err + } + defer res.Body.Close() + + var response BatchResponse + err = json.NewDecoder(res.Body).Decode(&response) + if err != nil { + log.Error("Error decoding json: %v", err) + return nil, err + } + + if len(response.Transfer) == 0 { + response.Transfer = "basic" + } + + return &response, nil +} + +// Download reads the specific LFS object from the LFS server +func (c *HTTPClient) Download(ctx context.Context, objects []Pointer, callback DownloadCallback) error { + return c.performOperation(ctx, objects, callback, nil) +} + +// Upload sends the specific LFS object to the LFS server +func (c *HTTPClient) Upload(ctx context.Context, objects []Pointer, callback UploadCallback) error { + return c.performOperation(ctx, objects, nil, callback) +} + +func (c *HTTPClient) performOperation(ctx context.Context, objects []Pointer, dc DownloadCallback, uc UploadCallback) error { + if len(objects) == 0 { + return nil + } + + operation := "download" + if uc != nil { + operation = "upload" + } + + result, err := c.batch(ctx, operation, objects) + if err != nil { + return err + } + + transferAdapter, ok := c.transfers[result.Transfer] + if !ok { + return fmt.Errorf("TransferAdapter not found: %s", result.Transfer) + } + + for _, object := range result.Objects { + if object.Error != nil { + log.Trace("Error on object %v: %v", object.Pointer, object.Error) + if uc != nil { + if _, err := uc(object.Pointer, object.Error); err != nil { + return err + } + } else { + if err := dc(object.Pointer, nil, object.Error); err != nil { + return err + } + } + continue + } + + if uc != nil { + if len(object.Actions) == 0 { + log.Trace("%v already present on server", object.Pointer) + continue + } + + link, ok := object.Actions["upload"] + if !ok { + log.Debug("%+v", object) + return errors.New("missing action 'upload'") + } + + content, err := uc(object.Pointer, nil) + if err != nil { + return err + } + + err = transferAdapter.Upload(ctx, link, object.Pointer, content) + if err != nil { + return err + } + + link, ok = object.Actions["verify"] + if ok { + if err := transferAdapter.Verify(ctx, link, object.Pointer); err != nil { + return err + } + } + } else { + link, ok := object.Actions["download"] + if !ok { + // no actions block in response, try legacy response schema + link, ok = object.Links["download"] + } + if !ok { + log.Debug("%+v", object) + return errors.New("missing action 'download'") + } + + content, err := transferAdapter.Download(ctx, link) + if err != nil { + return err + } + + if err := dc(object.Pointer, content, nil); err != nil { + return err + } + } + } + + return nil +} + +// createRequest creates a new request, and sets the headers. +func createRequest(ctx context.Context, method, url string, headers map[string]string, body io.Reader) (*http.Request, error) { + log.Trace("createRequest: %s", url) + req, err := http.NewRequestWithContext(ctx, method, url, body) + if err != nil { + log.Error("Error creating request: %v", err) + return nil, err + } + + for key, value := range headers { + req.Header.Set(key, value) + } + req.Header.Set("Accept", AcceptHeader) + + return req, nil +} + +// performRequest sends a request, optionally performs a callback on the request and returns the response. +// If the status code is 200, the response is returned, and it will contain a non-nil Body. +// Otherwise, it will return an error, and the Body will be nil or closed. +func performRequest(ctx context.Context, client *http.Client, req *http.Request) (*http.Response, error) { + log.Trace("performRequest: %s", req.URL) + res, err := client.Do(req) + if err != nil { + select { + case <-ctx.Done(): + return res, ctx.Err() + default: + } + log.Error("Error while processing request: %v", err) + return res, err + } + + if res.StatusCode != http.StatusOK { + defer res.Body.Close() + return res, handleErrorResponse(res) + } + + return res, nil +} + +func handleErrorResponse(resp *http.Response) error { + var er ErrorResponse + err := json.NewDecoder(resp.Body).Decode(&er) + if err != nil { + if err == io.EOF { + return io.ErrUnexpectedEOF + } + log.Error("Error decoding json: %v", err) + return err + } + + log.Trace("ErrorResponse(%v): %v", resp.Status, er) + return errors.New(er.Message) +} diff --git a/modules/lfs/http_client_test.go b/modules/lfs/http_client_test.go new file mode 100644 index 0000000..534a445 --- /dev/null +++ b/modules/lfs/http_client_test.go @@ -0,0 +1,377 @@ +// Copyright 2021 The Gitea Authors. All rights reserved. +// SPDX-License-Identifier: MIT + +package lfs + +import ( + "bytes" + "context" + "io" + "net/http" + "strings" + "testing" + + "code.gitea.io/gitea/modules/json" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +type RoundTripFunc func(req *http.Request) *http.Response + +func (f RoundTripFunc) RoundTrip(req *http.Request) (*http.Response, error) { + return f(req), nil +} + +type DummyTransferAdapter struct{} + +func (a *DummyTransferAdapter) Name() string { + return "dummy" +} + +func (a *DummyTransferAdapter) Download(ctx context.Context, l *Link) (io.ReadCloser, error) { + return io.NopCloser(bytes.NewBufferString("dummy")), nil +} + +func (a *DummyTransferAdapter) Upload(ctx context.Context, l *Link, p Pointer, r io.Reader) error { + return nil +} + +func (a *DummyTransferAdapter) Verify(ctx context.Context, l *Link, p Pointer) error { + return nil +} + +func lfsTestRoundtripHandler(req *http.Request) *http.Response { + var batchResponse *BatchResponse + url := req.URL.String() + + if strings.Contains(url, "status-not-ok") { + return &http.Response{StatusCode: http.StatusBadRequest} + } else if strings.Contains(url, "invalid-json-response") { + return &http.Response{StatusCode: http.StatusOK, Body: io.NopCloser(bytes.NewBufferString("invalid json"))} + } else if strings.Contains(url, "valid-batch-request-download") { + batchResponse = &BatchResponse{ + Transfer: "dummy", + Objects: []*ObjectResponse{ + { + Actions: map[string]*Link{ + "download": {}, + }, + }, + }, + } + } else if strings.Contains(url, "legacy-batch-request-download") { + batchResponse = &BatchResponse{ + Transfer: "dummy", + Objects: []*ObjectResponse{ + { + Links: map[string]*Link{ + "download": {}, + }, + }, + }, + } + } else if strings.Contains(url, "valid-batch-request-upload") { + batchResponse = &BatchResponse{ + Transfer: "dummy", + Objects: []*ObjectResponse{ + { + Actions: map[string]*Link{ + "upload": {}, + }, + }, + }, + } + } else if strings.Contains(url, "response-no-objects") { + batchResponse = &BatchResponse{Transfer: "dummy"} + } else if strings.Contains(url, "unknown-transfer-adapter") { + batchResponse = &BatchResponse{Transfer: "unknown_adapter"} + } else if strings.Contains(url, "error-in-response-objects") { + batchResponse = &BatchResponse{ + Transfer: "dummy", + Objects: []*ObjectResponse{ + { + Error: &ObjectError{ + Code: http.StatusNotFound, + Message: "Object not found", + }, + }, + }, + } + } else if strings.Contains(url, "empty-actions-map") { + batchResponse = &BatchResponse{ + Transfer: "dummy", + Objects: []*ObjectResponse{ + { + Actions: map[string]*Link{}, + }, + }, + } + } else if strings.Contains(url, "download-actions-map") { + batchResponse = &BatchResponse{ + Transfer: "dummy", + Objects: []*ObjectResponse{ + { + Actions: map[string]*Link{ + "download": {}, + }, + }, + }, + } + } else if strings.Contains(url, "upload-actions-map") { + batchResponse = &BatchResponse{ + Transfer: "dummy", + Objects: []*ObjectResponse{ + { + Actions: map[string]*Link{ + "upload": {}, + }, + }, + }, + } + } else if strings.Contains(url, "verify-actions-map") { + batchResponse = &BatchResponse{ + Transfer: "dummy", + Objects: []*ObjectResponse{ + { + Actions: map[string]*Link{ + "verify": {}, + }, + }, + }, + } + } else if strings.Contains(url, "unknown-actions-map") { + batchResponse = &BatchResponse{ + Transfer: "dummy", + Objects: []*ObjectResponse{ + { + Actions: map[string]*Link{ + "unknown": {}, + }, + }, + }, + } + } else { + return nil + } + + payload := new(bytes.Buffer) + json.NewEncoder(payload).Encode(batchResponse) + + return &http.Response{StatusCode: http.StatusOK, Body: io.NopCloser(payload)} +} + +func TestHTTPClientDownload(t *testing.T) { + p := Pointer{Oid: "fb8f7d8435968c4f82a726a92395be4d16f2f63116caf36c8ad35c60831ab041", Size: 6} + + hc := &http.Client{Transport: RoundTripFunc(func(req *http.Request) *http.Response { + assert.Equal(t, "POST", req.Method) + assert.Equal(t, MediaType, req.Header.Get("Content-type")) + assert.Equal(t, AcceptHeader, req.Header.Get("Accept")) + + var batchRequest BatchRequest + err := json.NewDecoder(req.Body).Decode(&batchRequest) + require.NoError(t, err) + + assert.Equal(t, "download", batchRequest.Operation) + assert.Len(t, batchRequest.Objects, 1) + assert.Equal(t, p.Oid, batchRequest.Objects[0].Oid) + assert.Equal(t, p.Size, batchRequest.Objects[0].Size) + + return lfsTestRoundtripHandler(req) + })} + dummy := &DummyTransferAdapter{} + + cases := []struct { + endpoint string + expectederror string + }{ + // case 0 + { + endpoint: "https://status-not-ok.io", + expectederror: io.ErrUnexpectedEOF.Error(), + }, + // case 1 + { + endpoint: "https://invalid-json-response.io", + expectederror: "invalid json", + }, + // case 2 + { + endpoint: "https://valid-batch-request-download.io", + expectederror: "", + }, + // case 3 + { + endpoint: "https://response-no-objects.io", + expectederror: "", + }, + // case 4 + { + endpoint: "https://unknown-transfer-adapter.io", + expectederror: "TransferAdapter not found: ", + }, + // case 5 + { + endpoint: "https://error-in-response-objects.io", + expectederror: "Object not found", + }, + // case 6 + { + endpoint: "https://empty-actions-map.io", + expectederror: "missing action 'download'", + }, + // case 7 + { + endpoint: "https://download-actions-map.io", + expectederror: "", + }, + // case 8 + { + endpoint: "https://upload-actions-map.io", + expectederror: "missing action 'download'", + }, + // case 9 + { + endpoint: "https://verify-actions-map.io", + expectederror: "missing action 'download'", + }, + // case 10 + { + endpoint: "https://unknown-actions-map.io", + expectederror: "missing action 'download'", + }, + // case 11 + { + endpoint: "https://legacy-batch-request-download.io", + expectederror: "", + }, + } + + for n, c := range cases { + client := &HTTPClient{ + client: hc, + endpoint: c.endpoint, + transfers: map[string]TransferAdapter{ + "dummy": dummy, + }, + } + + err := client.Download(context.Background(), []Pointer{p}, func(p Pointer, content io.ReadCloser, objectError error) error { + if objectError != nil { + return objectError + } + b, err := io.ReadAll(content) + require.NoError(t, err) + assert.Equal(t, []byte("dummy"), b) + return nil + }) + if len(c.expectederror) > 0 { + assert.True(t, strings.Contains(err.Error(), c.expectederror), "case %d: '%s' should contain '%s'", n, err.Error(), c.expectederror) + } else { + require.NoError(t, err, "case %d", n) + } + } +} + +func TestHTTPClientUpload(t *testing.T) { + p := Pointer{Oid: "fb8f7d8435968c4f82a726a92395be4d16f2f63116caf36c8ad35c60831ab041", Size: 6} + + hc := &http.Client{Transport: RoundTripFunc(func(req *http.Request) *http.Response { + assert.Equal(t, "POST", req.Method) + assert.Equal(t, MediaType, req.Header.Get("Content-type")) + assert.Equal(t, AcceptHeader, req.Header.Get("Accept")) + + var batchRequest BatchRequest + err := json.NewDecoder(req.Body).Decode(&batchRequest) + require.NoError(t, err) + + assert.Equal(t, "upload", batchRequest.Operation) + assert.Len(t, batchRequest.Objects, 1) + assert.Equal(t, p.Oid, batchRequest.Objects[0].Oid) + assert.Equal(t, p.Size, batchRequest.Objects[0].Size) + + return lfsTestRoundtripHandler(req) + })} + dummy := &DummyTransferAdapter{} + + cases := []struct { + endpoint string + expectederror string + }{ + // case 0 + { + endpoint: "https://status-not-ok.io", + expectederror: io.ErrUnexpectedEOF.Error(), + }, + // case 1 + { + endpoint: "https://invalid-json-response.io", + expectederror: "invalid json", + }, + // case 2 + { + endpoint: "https://valid-batch-request-upload.io", + expectederror: "", + }, + // case 3 + { + endpoint: "https://response-no-objects.io", + expectederror: "", + }, + // case 4 + { + endpoint: "https://unknown-transfer-adapter.io", + expectederror: "TransferAdapter not found: ", + }, + // case 5 + { + endpoint: "https://error-in-response-objects.io", + expectederror: "Object not found", + }, + // case 6 + { + endpoint: "https://empty-actions-map.io", + expectederror: "", + }, + // case 7 + { + endpoint: "https://download-actions-map.io", + expectederror: "missing action 'upload'", + }, + // case 8 + { + endpoint: "https://upload-actions-map.io", + expectederror: "", + }, + // case 9 + { + endpoint: "https://verify-actions-map.io", + expectederror: "missing action 'upload'", + }, + // case 10 + { + endpoint: "https://unknown-actions-map.io", + expectederror: "missing action 'upload'", + }, + } + + for n, c := range cases { + client := &HTTPClient{ + client: hc, + endpoint: c.endpoint, + transfers: map[string]TransferAdapter{ + "dummy": dummy, + }, + } + + err := client.Upload(context.Background(), []Pointer{p}, func(p Pointer, objectError error) (io.ReadCloser, error) { + return io.NopCloser(new(bytes.Buffer)), objectError + }) + if len(c.expectederror) > 0 { + assert.True(t, strings.Contains(err.Error(), c.expectederror), "case %d: '%s' should contain '%s'", n, err.Error(), c.expectederror) + } else { + require.NoError(t, err, "case %d", n) + } + } +} diff --git a/modules/lfs/pointer.go b/modules/lfs/pointer.go new file mode 100644 index 0000000..ebde20f --- /dev/null +++ b/modules/lfs/pointer.go @@ -0,0 +1,129 @@ +// Copyright 2021 The Gitea Authors. All rights reserved. +// SPDX-License-Identifier: MIT + +package lfs + +import ( + "crypto/sha256" + "encoding/hex" + "errors" + "fmt" + "io" + "path" + "regexp" + "strconv" + "strings" +) + +const ( + blobSizeCutoff = 1024 + + // MetaFileIdentifier is the string appearing at the first line of LFS pointer files. + // https://github.com/git-lfs/git-lfs/blob/master/docs/spec.md + MetaFileIdentifier = "version https://git-lfs.github.com/spec/v1" + + // MetaFileOidPrefix appears in LFS pointer files on a line before the sha256 hash. + MetaFileOidPrefix = "oid sha256:" +) + +var ( + // ErrMissingPrefix occurs if the content lacks the LFS prefix + ErrMissingPrefix = errors.New("content lacks the LFS prefix") + + // ErrInvalidStructure occurs if the content has an invalid structure + ErrInvalidStructure = errors.New("content has an invalid structure") + + // ErrInvalidOIDFormat occurs if the oid has an invalid format + ErrInvalidOIDFormat = errors.New("OID has an invalid format") +) + +// ReadPointer tries to read LFS pointer data from the reader +func ReadPointer(reader io.Reader) (Pointer, error) { + buf := make([]byte, blobSizeCutoff) + n, err := io.ReadFull(reader, buf) + if err != nil && err != io.ErrUnexpectedEOF { + return Pointer{}, err + } + buf = buf[:n] + + return ReadPointerFromBuffer(buf) +} + +var oidPattern = regexp.MustCompile(`^[a-f\d]{64}$`) + +// ReadPointerFromBuffer will return a pointer if the provided byte slice is a pointer file or an error otherwise. +func ReadPointerFromBuffer(buf []byte) (Pointer, error) { + var p Pointer + + headString := string(buf) + if !strings.HasPrefix(headString, MetaFileIdentifier) { + return p, ErrMissingPrefix + } + + splitLines := strings.Split(headString, "\n") + if len(splitLines) < 3 { + return p, ErrInvalidStructure + } + + oid := strings.TrimPrefix(splitLines[1], MetaFileOidPrefix) + if len(oid) != 64 || !oidPattern.MatchString(oid) { + return p, ErrInvalidOIDFormat + } + size, err := strconv.ParseInt(strings.TrimPrefix(splitLines[2], "size "), 10, 64) + if err != nil { + return p, err + } + + p.Oid = oid + p.Size = size + + return p, nil +} + +// IsValid checks if the pointer has a valid structure. +// It doesn't check if the pointed-to-content exists. +func (p Pointer) IsValid() bool { + if len(p.Oid) != 64 { + return false + } + if !oidPattern.MatchString(p.Oid) { + return false + } + if p.Size < 0 { + return false + } + return true +} + +// StringContent returns the string representation of the pointer +// https://github.com/git-lfs/git-lfs/blob/main/docs/spec.md#the-pointer +func (p Pointer) StringContent() string { + return fmt.Sprintf("%s\n%s%s\nsize %d\n", MetaFileIdentifier, MetaFileOidPrefix, p.Oid, p.Size) +} + +// RelativePath returns the relative storage path of the pointer +func (p Pointer) RelativePath() string { + if len(p.Oid) < 5 { + return p.Oid + } + + return path.Join(p.Oid[0:2], p.Oid[2:4], p.Oid[4:]) +} + +func (p Pointer) LogString() string { + if p.Oid == "" && p.Size == 0 { + return "<LFSPointer empty>" + } + return fmt.Sprintf("<LFSPointer %s:%d>", p.Oid, p.Size) +} + +// GeneratePointer generates a pointer for arbitrary content +func GeneratePointer(content io.Reader) (Pointer, error) { + h := sha256.New() + c, err := io.Copy(h, content) + if err != nil { + return Pointer{}, err + } + sum := h.Sum(nil) + return Pointer{Oid: hex.EncodeToString(sum), Size: c}, nil +} diff --git a/modules/lfs/pointer_scanner.go b/modules/lfs/pointer_scanner.go new file mode 100644 index 0000000..8bbf7a8 --- /dev/null +++ b/modules/lfs/pointer_scanner.go @@ -0,0 +1,109 @@ +// Copyright 2021 The Gitea Authors. All rights reserved. +// SPDX-License-Identifier: MIT + +package lfs + +import ( + "bufio" + "context" + "io" + "strconv" + "strings" + "sync" + + "code.gitea.io/gitea/modules/git" + "code.gitea.io/gitea/modules/git/pipeline" +) + +// SearchPointerBlobs scans the whole repository for LFS pointer files +func SearchPointerBlobs(ctx context.Context, repo *git.Repository, pointerChan chan<- PointerBlob, errChan chan<- error) { + basePath := repo.Path + + catFileCheckReader, catFileCheckWriter := io.Pipe() + shasToBatchReader, shasToBatchWriter := io.Pipe() + catFileBatchReader, catFileBatchWriter := io.Pipe() + + wg := sync.WaitGroup{} + wg.Add(4) + + // Create the go-routines in reverse order. + + // 4. Take the output of cat-file --batch and check if each file in turn + // to see if they're pointers to files in the LFS store + go createPointerResultsFromCatFileBatch(ctx, catFileBatchReader, &wg, pointerChan) + + // 3. Take the shas of the blobs and batch read them + go pipeline.CatFileBatch(ctx, shasToBatchReader, catFileBatchWriter, &wg, basePath) + + // 2. From the provided objects restrict to blobs <=1k + go pipeline.BlobsLessThan1024FromCatFileBatchCheck(catFileCheckReader, shasToBatchWriter, &wg) + + // 1. Run batch-check on all objects in the repository + if git.CheckGitVersionAtLeast("2.6.0") != nil { + revListReader, revListWriter := io.Pipe() + shasToCheckReader, shasToCheckWriter := io.Pipe() + wg.Add(2) + go pipeline.CatFileBatchCheck(ctx, shasToCheckReader, catFileCheckWriter, &wg, basePath) + go pipeline.BlobsFromRevListObjects(revListReader, shasToCheckWriter, &wg) + go pipeline.RevListAllObjects(ctx, revListWriter, &wg, basePath, errChan) + } else { + go pipeline.CatFileBatchCheckAllObjects(ctx, catFileCheckWriter, &wg, basePath, errChan) + } + wg.Wait() + + close(pointerChan) + close(errChan) +} + +func createPointerResultsFromCatFileBatch(ctx context.Context, catFileBatchReader *io.PipeReader, wg *sync.WaitGroup, pointerChan chan<- PointerBlob) { + defer wg.Done() + defer catFileBatchReader.Close() + + bufferedReader := bufio.NewReader(catFileBatchReader) + buf := make([]byte, 1025) + +loop: + for { + select { + case <-ctx.Done(): + break loop + default: + } + + // File descriptor line: sha + sha, err := bufferedReader.ReadString(' ') + if err != nil { + _ = catFileBatchReader.CloseWithError(err) + break + } + sha = strings.TrimSpace(sha) + // Throw away the blob + if _, err := bufferedReader.ReadString(' '); err != nil { + _ = catFileBatchReader.CloseWithError(err) + break + } + sizeStr, err := bufferedReader.ReadString('\n') + if err != nil { + _ = catFileBatchReader.CloseWithError(err) + break + } + size, err := strconv.Atoi(sizeStr[:len(sizeStr)-1]) + if err != nil { + _ = catFileBatchReader.CloseWithError(err) + break + } + pointerBuf := buf[:size+1] + if _, err := io.ReadFull(bufferedReader, pointerBuf); err != nil { + _ = catFileBatchReader.CloseWithError(err) + break + } + pointerBuf = pointerBuf[:size] + // Now we need to check if the pointerBuf is an LFS pointer + pointer, _ := ReadPointerFromBuffer(pointerBuf) + if !pointer.IsValid() { + continue + } + + pointerChan <- PointerBlob{Hash: sha, Pointer: pointer} + } +} diff --git a/modules/lfs/pointer_test.go b/modules/lfs/pointer_test.go new file mode 100644 index 0000000..9299a8a --- /dev/null +++ b/modules/lfs/pointer_test.go @@ -0,0 +1,103 @@ +// Copyright 2021 The Gitea Authors. All rights reserved. +// SPDX-License-Identifier: MIT + +package lfs + +import ( + "path" + "strings" + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func TestStringContent(t *testing.T) { + p := Pointer{Oid: "4d7a214614ab2935c943f9e0ff69d22eadbb8f32b1258daaa5e2ca24d17e2393", Size: 1234} + expected := "version https://git-lfs.github.com/spec/v1\noid sha256:4d7a214614ab2935c943f9e0ff69d22eadbb8f32b1258daaa5e2ca24d17e2393\nsize 1234\n" + assert.Equal(t, expected, p.StringContent()) +} + +func TestRelativePath(t *testing.T) { + p := Pointer{Oid: "4d7a214614ab2935c943f9e0ff69d22eadbb8f32b1258daaa5e2ca24d17e2393"} + expected := path.Join("4d", "7a", "214614ab2935c943f9e0ff69d22eadbb8f32b1258daaa5e2ca24d17e2393") + assert.Equal(t, expected, p.RelativePath()) + + p2 := Pointer{Oid: "4d7a"} + assert.Equal(t, "4d7a", p2.RelativePath()) +} + +func TestIsValid(t *testing.T) { + p := Pointer{} + assert.False(t, p.IsValid()) + + p = Pointer{Oid: "123"} + assert.False(t, p.IsValid()) + + p = Pointer{Oid: "z4cb57646c54a297c9807697e80a30946f79a4b82cb079d2606847825b1812cc"} + assert.False(t, p.IsValid()) + + p = Pointer{Oid: "94cb57646c54a297c9807697e80a30946f79a4b82cb079d2606847825b1812cc"} + assert.True(t, p.IsValid()) + + p = Pointer{Oid: "94cb57646c54a297c9807697e80a30946f79a4b82cb079d2606847825b1812cc", Size: -1} + assert.False(t, p.IsValid()) +} + +func TestGeneratePointer(t *testing.T) { + p, err := GeneratePointer(strings.NewReader("Gitea")) + require.NoError(t, err) + assert.True(t, p.IsValid()) + assert.Equal(t, "94cb57646c54a297c9807697e80a30946f79a4b82cb079d2606847825b1812cc", p.Oid) + assert.Equal(t, int64(5), p.Size) +} + +func TestReadPointerFromBuffer(t *testing.T) { + p, err := ReadPointerFromBuffer([]byte{}) + require.ErrorIs(t, err, ErrMissingPrefix) + assert.False(t, p.IsValid()) + + p, err = ReadPointerFromBuffer([]byte("test")) + require.ErrorIs(t, err, ErrMissingPrefix) + assert.False(t, p.IsValid()) + + p, err = ReadPointerFromBuffer([]byte("version https://git-lfs.github.com/spec/v1\n")) + require.ErrorIs(t, err, ErrInvalidStructure) + assert.False(t, p.IsValid()) + + p, err = ReadPointerFromBuffer([]byte("version https://git-lfs.github.com/spec/v1\noid sha256:4d7a\nsize 1234\n")) + require.ErrorIs(t, err, ErrInvalidOIDFormat) + assert.False(t, p.IsValid()) + + p, err = ReadPointerFromBuffer([]byte("version https://git-lfs.github.com/spec/v1\noid sha256:4d7a2146z4ab2935c943f9e0ff69d22eadbb8f32b1258daaa5e2ca24d17e2393\nsize 1234\n")) + require.ErrorIs(t, err, ErrInvalidOIDFormat) + assert.False(t, p.IsValid()) + + p, err = ReadPointerFromBuffer([]byte("version https://git-lfs.github.com/spec/v1\noid sha256:4d7a214614ab2935c943f9e0ff69d22eadbb8f32b1258daaa5e2ca24d17e2393\ntest 1234\n")) + require.Error(t, err) + assert.False(t, p.IsValid()) + + p, err = ReadPointerFromBuffer([]byte("version https://git-lfs.github.com/spec/v1\noid sha256:4d7a214614ab2935c943f9e0ff69d22eadbb8f32b1258daaa5e2ca24d17e2393\nsize test\n")) + require.Error(t, err) + assert.False(t, p.IsValid()) + + p, err = ReadPointerFromBuffer([]byte("version https://git-lfs.github.com/spec/v1\noid sha256:4d7a214614ab2935c943f9e0ff69d22eadbb8f32b1258daaa5e2ca24d17e2393\nsize 1234\n")) + require.NoError(t, err) + assert.True(t, p.IsValid()) + assert.Equal(t, "4d7a214614ab2935c943f9e0ff69d22eadbb8f32b1258daaa5e2ca24d17e2393", p.Oid) + assert.Equal(t, int64(1234), p.Size) + + p, err = ReadPointerFromBuffer([]byte("version https://git-lfs.github.com/spec/v1\noid sha256:4d7a214614ab2935c943f9e0ff69d22eadbb8f32b1258daaa5e2ca24d17e2393\nsize 1234\ntest")) + require.NoError(t, err) + assert.True(t, p.IsValid()) + assert.Equal(t, "4d7a214614ab2935c943f9e0ff69d22eadbb8f32b1258daaa5e2ca24d17e2393", p.Oid) + assert.Equal(t, int64(1234), p.Size) +} + +func TestReadPointer(t *testing.T) { + p, err := ReadPointer(strings.NewReader("version https://git-lfs.github.com/spec/v1\noid sha256:4d7a214614ab2935c943f9e0ff69d22eadbb8f32b1258daaa5e2ca24d17e2393\nsize 1234\n")) + require.NoError(t, err) + assert.True(t, p.IsValid()) + assert.Equal(t, "4d7a214614ab2935c943f9e0ff69d22eadbb8f32b1258daaa5e2ca24d17e2393", p.Oid) + assert.Equal(t, int64(1234), p.Size) +} diff --git a/modules/lfs/shared.go b/modules/lfs/shared.go new file mode 100644 index 0000000..a4326b5 --- /dev/null +++ b/modules/lfs/shared.go @@ -0,0 +1,115 @@ +// Copyright 2020 The Gitea Authors. All rights reserved. +// SPDX-License-Identifier: MIT + +package lfs + +import ( + "errors" + "fmt" + "time" + + "code.gitea.io/gitea/modules/util" +) + +const ( + // MediaType contains the media type for LFS server requests + MediaType = "application/vnd.git-lfs+json" + // Some LFS servers offer content with other types, so fallback to '*/*' if application/vnd.git-lfs+json cannot be served + AcceptHeader = "application/vnd.git-lfs+json;q=0.9, */*;q=0.8" +) + +// BatchRequest contains multiple requests processed in one batch operation. +// https://github.com/git-lfs/git-lfs/blob/main/docs/api/batch.md#requests +type BatchRequest struct { + Operation string `json:"operation"` + Transfers []string `json:"transfers,omitempty"` + Ref *Reference `json:"ref,omitempty"` + Objects []Pointer `json:"objects"` +} + +// Reference contains a git reference. +// https://github.com/git-lfs/git-lfs/blob/main/docs/api/batch.md#ref-property +type Reference struct { + Name string `json:"name"` +} + +// Pointer contains LFS pointer data +type Pointer struct { + Oid string `json:"oid" xorm:"UNIQUE(s) INDEX NOT NULL"` + Size int64 `json:"size" xorm:"NOT NULL"` +} + +// BatchResponse contains multiple object metadata Representation structures +// for use with the batch API. +// https://github.com/git-lfs/git-lfs/blob/main/docs/api/batch.md#successful-responses +type BatchResponse struct { + Transfer string `json:"transfer,omitempty"` + Objects []*ObjectResponse `json:"objects"` +} + +// ObjectResponse is object metadata as seen by clients of the LFS server. +type ObjectResponse struct { + Pointer + Actions map[string]*Link `json:"actions,omitempty"` + Links map[string]*Link `json:"_links,omitempty"` + Error *ObjectError `json:"error,omitempty"` +} + +// Link provides a structure with information about how to access a object. +type Link struct { + Href string `json:"href"` + Header map[string]string `json:"header,omitempty"` + ExpiresAt *time.Time `json:"expires_at,omitempty"` +} + +// ObjectError defines the JSON structure returned to the client in case of an error. +type ObjectError struct { + Code int `json:"code"` + Message string `json:"message"` +} + +var ( + // See https://github.com/git-lfs/git-lfs/blob/main/docs/api/batch.md#successful-responses + // LFS object error codes should match HTTP status codes where possible: + // 404 - The object does not exist on the server. + // 409 - The specified hash algorithm disagrees with the server's acceptable options. + // 410 - The object was removed by the owner. + // 422 - Validation error. + + ErrObjectNotExist = util.ErrNotExist // the object does not exist on the server + ErrObjectHashMismatch = errors.New("the specified hash algorithm disagrees with the server's acceptable options") + ErrObjectRemoved = errors.New("the object was removed by the owner") + ErrObjectValidation = errors.New("validation error") +) + +func (e *ObjectError) Error() string { + return fmt.Sprintf("[%d] %s", e.Code, e.Message) +} + +func (e *ObjectError) Unwrap() error { + switch e.Code { + case 404: + return ErrObjectNotExist + case 409: + return ErrObjectHashMismatch + case 410: + return ErrObjectRemoved + case 422: + return ErrObjectValidation + default: + return errors.New(e.Message) + } +} + +// PointerBlob associates a Git blob with a Pointer. +type PointerBlob struct { + Hash string + Pointer +} + +// ErrorResponse describes the error to the client. +type ErrorResponse struct { + Message string + DocumentationURL string `json:"documentation_url,omitempty"` + RequestID string `json:"request_id,omitempty"` +} diff --git a/modules/lfs/transferadapter.go b/modules/lfs/transferadapter.go new file mode 100644 index 0000000..fbc3a3a --- /dev/null +++ b/modules/lfs/transferadapter.go @@ -0,0 +1,89 @@ +// Copyright 2021 The Gitea Authors. All rights reserved. +// SPDX-License-Identifier: MIT + +package lfs + +import ( + "bytes" + "context" + "io" + "net/http" + + "code.gitea.io/gitea/modules/json" + "code.gitea.io/gitea/modules/log" +) + +// TransferAdapter represents an adapter for downloading/uploading LFS objects. +type TransferAdapter interface { + Name() string + Download(ctx context.Context, l *Link) (io.ReadCloser, error) + Upload(ctx context.Context, l *Link, p Pointer, r io.Reader) error + Verify(ctx context.Context, l *Link, p Pointer) error +} + +// BasicTransferAdapter implements the "basic" adapter. +type BasicTransferAdapter struct { + client *http.Client +} + +// Name returns the name of the adapter. +func (a *BasicTransferAdapter) Name() string { + return "basic" +} + +// Download reads the download location and downloads the data. +func (a *BasicTransferAdapter) Download(ctx context.Context, l *Link) (io.ReadCloser, error) { + req, err := createRequest(ctx, http.MethodGet, l.Href, l.Header, nil) + if err != nil { + return nil, err + } + log.Debug("Download Request: %+v", req) + resp, err := performRequest(ctx, a.client, req) + if err != nil { + return nil, err + } + return resp.Body, nil +} + +// Upload sends the content to the LFS server. +func (a *BasicTransferAdapter) Upload(ctx context.Context, l *Link, p Pointer, r io.Reader) error { + req, err := createRequest(ctx, http.MethodPut, l.Href, l.Header, r) + if err != nil { + return err + } + if req.Header.Get("Content-Type") == "" { + req.Header.Set("Content-Type", "application/octet-stream") + } + if req.Header.Get("Transfer-Encoding") == "chunked" { + req.TransferEncoding = []string{"chunked"} + } + req.ContentLength = p.Size + + res, err := performRequest(ctx, a.client, req) + if err != nil { + return err + } + defer res.Body.Close() + return nil +} + +// Verify calls the verify handler on the LFS server +func (a *BasicTransferAdapter) Verify(ctx context.Context, l *Link, p Pointer) error { + b, err := json.Marshal(p) + if err != nil { + log.Error("Error encoding json: %v", err) + return err + } + + req, err := createRequest(ctx, http.MethodPost, l.Href, l.Header, bytes.NewReader(b)) + if err != nil { + return err + } + req.Header.Set("Content-Type", MediaType) + res, err := performRequest(ctx, a.client, req) + if err != nil { + return err + } + defer res.Body.Close() + return nil +} diff --git a/modules/lfs/transferadapter_test.go b/modules/lfs/transferadapter_test.go new file mode 100644 index 0000000..0766e4a --- /dev/null +++ b/modules/lfs/transferadapter_test.go @@ -0,0 +1,172 @@ +// Copyright 2021 The Gitea Authors. All rights reserved. +// SPDX-License-Identifier: MIT + +package lfs + +import ( + "bytes" + "context" + "io" + "net/http" + "strings" + "testing" + + "code.gitea.io/gitea/modules/json" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func TestBasicTransferAdapterName(t *testing.T) { + a := &BasicTransferAdapter{} + + assert.Equal(t, "basic", a.Name()) +} + +func TestBasicTransferAdapter(t *testing.T) { + p := Pointer{Oid: "b5a2c96250612366ea272ffac6d9744aaf4b45aacd96aa7cfcb931ee3b558259", Size: 5} + + roundTripHandler := func(req *http.Request) *http.Response { + assert.Equal(t, AcceptHeader, req.Header.Get("Accept")) + assert.Equal(t, "test-value", req.Header.Get("test-header")) + + url := req.URL.String() + if strings.Contains(url, "download-request") { + assert.Equal(t, "GET", req.Method) + + return &http.Response{StatusCode: http.StatusOK, Body: io.NopCloser(bytes.NewBufferString("dummy"))} + } else if strings.Contains(url, "upload-request") { + assert.Equal(t, "PUT", req.Method) + assert.Equal(t, "application/octet-stream", req.Header.Get("Content-Type")) + + b, err := io.ReadAll(req.Body) + require.NoError(t, err) + assert.Equal(t, "dummy", string(b)) + + return &http.Response{StatusCode: http.StatusOK} + } else if strings.Contains(url, "verify-request") { + assert.Equal(t, "POST", req.Method) + assert.Equal(t, MediaType, req.Header.Get("Content-Type")) + + var vp Pointer + err := json.NewDecoder(req.Body).Decode(&vp) + require.NoError(t, err) + assert.Equal(t, p.Oid, vp.Oid) + assert.Equal(t, p.Size, vp.Size) + + return &http.Response{StatusCode: http.StatusOK} + } else if strings.Contains(url, "error-response") { + er := &ErrorResponse{ + Message: "Object not found", + } + payload := new(bytes.Buffer) + json.NewEncoder(payload).Encode(er) + + return &http.Response{StatusCode: http.StatusNotFound, Body: io.NopCloser(payload)} + } + t.Errorf("Unknown test case: %s", url) + return nil + } + + hc := &http.Client{Transport: RoundTripFunc(roundTripHandler)} + a := &BasicTransferAdapter{hc} + + t.Run("Download", func(t *testing.T) { + cases := []struct { + link *Link + expectederror string + }{ + // case 0 + { + link: &Link{ + Href: "https://download-request.io", + Header: map[string]string{"test-header": "test-value"}, + }, + expectederror: "", + }, + // case 1 + { + link: &Link{ + Href: "https://error-response.io", + Header: map[string]string{"test-header": "test-value"}, + }, + expectederror: "Object not found", + }, + } + + for n, c := range cases { + _, err := a.Download(context.Background(), c.link) + if len(c.expectederror) > 0 { + assert.True(t, strings.Contains(err.Error(), c.expectederror), "case %d: '%s' should contain '%s'", n, err.Error(), c.expectederror) + } else { + require.NoError(t, err, "case %d", n) + } + } + }) + + t.Run("Upload", func(t *testing.T) { + cases := []struct { + link *Link + expectederror string + }{ + // case 0 + { + link: &Link{ + Href: "https://upload-request.io", + Header: map[string]string{"test-header": "test-value"}, + }, + expectederror: "", + }, + // case 1 + { + link: &Link{ + Href: "https://error-response.io", + Header: map[string]string{"test-header": "test-value"}, + }, + expectederror: "Object not found", + }, + } + + for n, c := range cases { + err := a.Upload(context.Background(), c.link, p, bytes.NewBufferString("dummy")) + if len(c.expectederror) > 0 { + assert.True(t, strings.Contains(err.Error(), c.expectederror), "case %d: '%s' should contain '%s'", n, err.Error(), c.expectederror) + } else { + require.NoError(t, err, "case %d", n) + } + } + }) + + t.Run("Verify", func(t *testing.T) { + cases := []struct { + link *Link + expectederror string + }{ + // case 0 + { + link: &Link{ + Href: "https://verify-request.io", + Header: map[string]string{"test-header": "test-value"}, + }, + expectederror: "", + }, + // case 1 + { + link: &Link{ + Href: "https://error-response.io", + Header: map[string]string{"test-header": "test-value"}, + }, + expectederror: "Object not found", + }, + } + + for n, c := range cases { + err := a.Verify(context.Background(), c.link, p) + if len(c.expectederror) > 0 { + assert.True(t, strings.Contains(err.Error(), c.expectederror), "case %d: '%s' should contain '%s'", n, err.Error(), c.expectederror) + } else { + require.NoError(t, err, "case %d", n) + } + } + }) +} |