1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
|
// Copyright 2023 The Gitea Authors. All rights reserved.
// SPDX-License-Identifier: MIT
package actions
import (
"crypto/md5"
"crypto/sha256"
"encoding/base64"
"encoding/hex"
"errors"
"fmt"
"hash"
"io"
"path/filepath"
"sort"
"strings"
"time"
"code.gitea.io/gitea/models/actions"
"code.gitea.io/gitea/models/db"
"code.gitea.io/gitea/modules/log"
"code.gitea.io/gitea/modules/storage"
)
func saveUploadChunkBase(st storage.ObjectStorage, ctx *ArtifactContext,
artifact *actions.ActionArtifact,
contentSize, runID, start, end, length int64, checkMd5 bool,
) (int64, error) {
// build chunk store path
storagePath := fmt.Sprintf("tmp%d/%d-%d-%d-%d.chunk", runID, runID, artifact.ID, start, end)
var r io.Reader = ctx.Req.Body
var hasher hash.Hash
if checkMd5 {
// use io.TeeReader to avoid reading all body to md5 sum.
// it writes data to hasher after reading end
// if hash is not matched, delete the read-end result
hasher = md5.New()
r = io.TeeReader(r, hasher)
}
// save chunk to storage
writtenSize, err := st.Save(storagePath, r, contentSize)
if err != nil {
return -1, fmt.Errorf("save chunk to storage error: %v", err)
}
var checkErr error
if checkMd5 {
// check md5
reqMd5String := ctx.Req.Header.Get(artifactXActionsResultsMD5Header)
chunkMd5String := base64.StdEncoding.EncodeToString(hasher.Sum(nil))
log.Info("[artifact] check chunk md5, sum: %s, header: %s", chunkMd5String, reqMd5String)
// if md5 not match, delete the chunk
if reqMd5String != chunkMd5String {
checkErr = fmt.Errorf("md5 not match")
}
}
if writtenSize != contentSize {
checkErr = errors.Join(checkErr, fmt.Errorf("contentSize not match body size"))
}
if checkErr != nil {
if err := st.Delete(storagePath); err != nil {
log.Error("Error deleting chunk: %s, %v", storagePath, err)
}
return -1, checkErr
}
log.Info("[artifact] save chunk %s, size: %d, artifact id: %d, start: %d, end: %d",
storagePath, contentSize, artifact.ID, start, end)
// return chunk total size
return length, nil
}
func saveUploadChunk(st storage.ObjectStorage, ctx *ArtifactContext,
artifact *actions.ActionArtifact,
contentSize, runID int64,
) (int64, error) {
// parse content-range header, format: bytes 0-1023/146515
contentRange := ctx.Req.Header.Get("Content-Range")
start, end, length := int64(0), int64(0), int64(0)
if _, err := fmt.Sscanf(contentRange, "bytes %d-%d/%d", &start, &end, &length); err != nil {
log.Warn("parse content range error: %v, content-range: %s", err, contentRange)
return -1, fmt.Errorf("parse content range error: %v", err)
}
return saveUploadChunkBase(st, ctx, artifact, contentSize, runID, start, end, length, true)
}
func appendUploadChunk(st storage.ObjectStorage, ctx *ArtifactContext,
artifact *actions.ActionArtifact,
start, contentSize, runID int64,
) (int64, error) {
end := start + contentSize - 1
return saveUploadChunkBase(st, ctx, artifact, contentSize, runID, start, end, contentSize, false)
}
type chunkFileItem struct {
RunID int64
ArtifactID int64
Start int64
End int64
Path string
}
func listChunksByRunID(st storage.ObjectStorage, runID int64) (map[int64][]*chunkFileItem, error) {
storageDir := fmt.Sprintf("tmp%d", runID)
var chunks []*chunkFileItem
if err := st.IterateObjects(storageDir, func(fpath string, obj storage.Object) error {
baseName := filepath.Base(fpath)
// when read chunks from storage, it only contains storage dir and basename,
// no matter the subdirectory setting in storage config
item := chunkFileItem{Path: storageDir + "/" + baseName}
if _, err := fmt.Sscanf(baseName, "%d-%d-%d-%d.chunk", &item.RunID, &item.ArtifactID, &item.Start, &item.End); err != nil {
return fmt.Errorf("parse content range error: %v", err)
}
chunks = append(chunks, &item)
return nil
}); err != nil {
return nil, err
}
// chunks group by artifact id
chunksMap := make(map[int64][]*chunkFileItem)
for _, c := range chunks {
chunksMap[c.ArtifactID] = append(chunksMap[c.ArtifactID], c)
}
return chunksMap, nil
}
func listChunksByRunIDV4(st storage.ObjectStorage, runID, artifactID int64, blist *BlockList) ([]*chunkFileItem, error) {
storageDir := fmt.Sprintf("tmpv4%d", runID)
var chunks []*chunkFileItem
chunkMap := map[string]*chunkFileItem{}
dummy := &chunkFileItem{}
for _, name := range blist.Latest {
chunkMap[name] = dummy
}
if err := st.IterateObjects(storageDir, func(fpath string, obj storage.Object) error {
baseName := filepath.Base(fpath)
if !strings.HasPrefix(baseName, "block-") {
return nil
}
// when read chunks from storage, it only contains storage dir and basename,
// no matter the subdirectory setting in storage config
item := chunkFileItem{Path: storageDir + "/" + baseName, ArtifactID: artifactID}
var size int64
var b64chunkName string
if _, err := fmt.Sscanf(baseName, "block-%d-%d-%s", &item.RunID, &size, &b64chunkName); err != nil {
return fmt.Errorf("parse content range error: %v", err)
}
rchunkName, err := base64.URLEncoding.DecodeString(b64chunkName)
if err != nil {
return fmt.Errorf("failed to parse chunkName: %v", err)
}
chunkName := string(rchunkName)
item.End = item.Start + size - 1
if _, ok := chunkMap[chunkName]; ok {
chunkMap[chunkName] = &item
}
return nil
}); err != nil {
return nil, err
}
for i, name := range blist.Latest {
chunk, ok := chunkMap[name]
if !ok || chunk.Path == "" {
return nil, fmt.Errorf("missing Chunk (%d/%d): %s", i, len(blist.Latest), name)
}
chunks = append(chunks, chunk)
if i > 0 {
chunk.Start = chunkMap[blist.Latest[i-1]].End + 1
chunk.End += chunk.Start
}
}
return chunks, nil
}
func mergeChunksForRun(ctx *ArtifactContext, st storage.ObjectStorage, runID int64, artifactName string) error {
// read all db artifacts by name
artifacts, err := db.Find[actions.ActionArtifact](ctx, actions.FindArtifactsOptions{
RunID: runID,
ArtifactName: artifactName,
})
if err != nil {
return err
}
// read all uploading chunks from storage
chunksMap, err := listChunksByRunID(st, runID)
if err != nil {
return err
}
// range db artifacts to merge chunks
for _, art := range artifacts {
chunks, ok := chunksMap[art.ID]
if !ok {
log.Debug("artifact %d chunks not found", art.ID)
continue
}
if err := mergeChunksForArtifact(ctx, chunks, st, art, ""); err != nil {
return err
}
}
return nil
}
func mergeChunksForArtifact(ctx *ArtifactContext, chunks []*chunkFileItem, st storage.ObjectStorage, artifact *actions.ActionArtifact, checksum string) error {
sort.Slice(chunks, func(i, j int) bool {
return chunks[i].Start < chunks[j].Start
})
allChunks := make([]*chunkFileItem, 0)
startAt := int64(-1)
// check if all chunks are uploaded and in order and clean repeated chunks
for _, c := range chunks {
// startAt is -1 means this is the first chunk
// previous c.ChunkEnd + 1 == c.ChunkStart means this chunk is in order
// StartAt is not -1 and c.ChunkStart is not startAt + 1 means there is a chunk missing
if c.Start == (startAt + 1) {
allChunks = append(allChunks, c)
startAt = c.End
}
}
// if the last chunk.End + 1 is not equal to chunk.ChunkLength, means chunks are not uploaded completely
if startAt+1 != artifact.FileCompressedSize {
log.Debug("[artifact] chunks are not uploaded completely, artifact_id: %d", artifact.ID)
return nil
}
// use multiReader
readers := make([]io.Reader, 0, len(allChunks))
closeReaders := func() {
for _, r := range readers {
_ = r.(io.Closer).Close() // it guarantees to be io.Closer by the following loop's Open function
}
readers = nil
}
defer closeReaders()
for _, c := range allChunks {
var readCloser io.ReadCloser
var err error
if readCloser, err = st.Open(c.Path); err != nil {
return fmt.Errorf("open chunk error: %v, %s", err, c.Path)
}
readers = append(readers, readCloser)
}
mergedReader := io.MultiReader(readers...)
shaPrefix := "sha256:"
var hash hash.Hash
if strings.HasPrefix(checksum, shaPrefix) {
hash = sha256.New()
}
if hash != nil {
mergedReader = io.TeeReader(mergedReader, hash)
}
// if chunk is gzip, use gz as extension
// download-artifact action will use content-encoding header to decide if it should decompress the file
extension := "chunk"
if artifact.ContentEncoding == "gzip" {
extension = "chunk.gz"
}
// save merged file
storagePath := fmt.Sprintf("%d/%d/%d.%s", artifact.RunID%255, artifact.ID%255, time.Now().UnixNano(), extension)
written, err := st.Save(storagePath, mergedReader, artifact.FileCompressedSize)
if err != nil {
return fmt.Errorf("save merged file error: %v", err)
}
if written != artifact.FileCompressedSize {
return fmt.Errorf("merged file size is not equal to chunk length")
}
defer func() {
closeReaders() // close before delete
// drop chunks
for _, c := range chunks {
if err := st.Delete(c.Path); err != nil {
log.Warn("Error deleting chunk: %s, %v", c.Path, err)
}
}
}()
if hash != nil {
rawChecksum := hash.Sum(nil)
actualChecksum := hex.EncodeToString(rawChecksum)
if !strings.HasSuffix(checksum, actualChecksum) {
return fmt.Errorf("update artifact error checksum is invalid %v vs %v", checksum, actualChecksum)
}
}
// save storage path to artifact
log.Debug("[artifact] merge chunks to artifact: %d, %s, old:%s", artifact.ID, storagePath, artifact.StoragePath)
// if artifact is already uploaded, delete the old file
if artifact.StoragePath != "" {
if err := st.Delete(artifact.StoragePath); err != nil {
log.Warn("Error deleting old artifact: %s, %v", artifact.StoragePath, err)
}
}
artifact.StoragePath = storagePath
artifact.Status = int64(actions.ArtifactStatusUploadConfirmed)
if err := actions.UpdateArtifactByID(ctx, artifact.ID, artifact); err != nil {
return fmt.Errorf("update artifact error: %v", err)
}
return nil
}
|