From dd136858f1ea40ad3c94191d647487fa4f31926c Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Fri, 18 Oct 2024 20:33:49 +0200 Subject: Adding upstream version 9.0.0. Signed-off-by: Daniel Baumann --- modules/repository/repo.go | 388 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 388 insertions(+) create mode 100644 modules/repository/repo.go (limited to 'modules/repository/repo.go') diff --git a/modules/repository/repo.go b/modules/repository/repo.go new file mode 100644 index 0000000..e08bc37 --- /dev/null +++ b/modules/repository/repo.go @@ -0,0 +1,388 @@ +// Copyright 2019 The Gitea Authors. All rights reserved. +// Copyright 2024 The Forgejo Authors c/o Codeberg e.V.. All rights reserved. +// SPDX-License-Identifier: MIT + +package repository + +import ( + "context" + "errors" + "fmt" + "io" + "strings" + "time" + + "code.gitea.io/gitea/models/db" + git_model "code.gitea.io/gitea/models/git" + repo_model "code.gitea.io/gitea/models/repo" + user_model "code.gitea.io/gitea/models/user" + "code.gitea.io/gitea/modules/container" + "code.gitea.io/gitea/modules/git" + "code.gitea.io/gitea/modules/gitrepo" + "code.gitea.io/gitea/modules/lfs" + "code.gitea.io/gitea/modules/log" + "code.gitea.io/gitea/modules/setting" + "code.gitea.io/gitea/modules/timeutil" +) + +/* +GitHub, GitLab, Gogs: *.wiki.git +BitBucket: *.git/wiki +*/ +var commonWikiURLSuffixes = []string{".wiki.git", ".git/wiki"} + +// WikiRemoteURL returns accessible repository URL for wiki if exists. +// Otherwise, it returns an empty string. +func WikiRemoteURL(ctx context.Context, remote string) string { + remote = strings.TrimSuffix(remote, ".git") + for _, suffix := range commonWikiURLSuffixes { + wikiURL := remote + suffix + if git.IsRepoURLAccessible(ctx, wikiURL) { + return wikiURL + } + } + return "" +} + +// SyncRepoTags synchronizes releases table with repository tags +func SyncRepoTags(ctx context.Context, repoID int64) error { + repo, err := repo_model.GetRepositoryByID(ctx, repoID) + if err != nil { + return err + } + + gitRepo, err := gitrepo.OpenRepository(ctx, repo) + if err != nil { + return err + } + defer gitRepo.Close() + + return SyncReleasesWithTags(ctx, repo, gitRepo) +} + +// SyncReleasesWithTags synchronizes release table with repository tags +func SyncReleasesWithTags(ctx context.Context, repo *repo_model.Repository, gitRepo *git.Repository) error { + log.Debug("SyncReleasesWithTags: in Repo[%d:%s/%s]", repo.ID, repo.OwnerName, repo.Name) + + // optimized procedure for pull-mirrors which saves a lot of time (in + // particular for repos with many tags). + if repo.IsMirror { + return pullMirrorReleaseSync(ctx, repo, gitRepo) + } + + existingRelTags := make(container.Set[string]) + opts := repo_model.FindReleasesOptions{ + IncludeDrafts: true, + IncludeTags: true, + ListOptions: db.ListOptions{PageSize: 50}, + RepoID: repo.ID, + } + for page := 1; ; page++ { + opts.Page = page + rels, err := db.Find[repo_model.Release](gitRepo.Ctx, opts) + if err != nil { + return fmt.Errorf("unable to GetReleasesByRepoID in Repo[%d:%s/%s]: %w", repo.ID, repo.OwnerName, repo.Name, err) + } + if len(rels) == 0 { + break + } + for _, rel := range rels { + if rel.IsDraft { + continue + } + commitID, err := gitRepo.GetTagCommitID(rel.TagName) + if err != nil && !git.IsErrNotExist(err) { + return fmt.Errorf("unable to GetTagCommitID for %q in Repo[%d:%s/%s]: %w", rel.TagName, repo.ID, repo.OwnerName, repo.Name, err) + } + if git.IsErrNotExist(err) || commitID != rel.Sha1 { + if err := repo_model.PushUpdateDeleteTag(ctx, repo, rel.TagName); err != nil { + return fmt.Errorf("unable to PushUpdateDeleteTag: %q in Repo[%d:%s/%s]: %w", rel.TagName, repo.ID, repo.OwnerName, repo.Name, err) + } + } else { + existingRelTags.Add(strings.ToLower(rel.TagName)) + } + } + } + + _, err := gitRepo.WalkReferences(git.ObjectTag, 0, 0, func(sha1, refname string) error { + tagName := strings.TrimPrefix(refname, git.TagPrefix) + if existingRelTags.Contains(strings.ToLower(tagName)) { + return nil + } + + if err := PushUpdateAddTag(ctx, repo, gitRepo, tagName, sha1, refname); err != nil { + // sometimes, some tags will be sync failed. i.e. https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tag/?h=v2.6.11 + // this is a tree object, not a tag object which created before git + log.Error("unable to PushUpdateAddTag: %q to Repo[%d:%s/%s]: %v", tagName, repo.ID, repo.OwnerName, repo.Name, err) + } + + return nil + }) + return err +} + +// PushUpdateAddTag must be called for any push actions to add tag +func PushUpdateAddTag(ctx context.Context, repo *repo_model.Repository, gitRepo *git.Repository, tagName, sha1, refname string) error { + tag, err := gitRepo.GetTagWithID(sha1, tagName) + if err != nil { + return fmt.Errorf("unable to GetTag: %w", err) + } + commit, err := tag.Commit(gitRepo) + if err != nil { + return fmt.Errorf("unable to get tag Commit: %w", err) + } + + sig := tag.Tagger + if sig == nil { + sig = commit.Author + } + if sig == nil { + sig = commit.Committer + } + + var author *user_model.User + createdAt := time.Unix(1, 0) + + if sig != nil { + author, err = user_model.GetUserByEmail(ctx, sig.Email) + if err != nil && !user_model.IsErrUserNotExist(err) { + return fmt.Errorf("unable to GetUserByEmail for %q: %w", sig.Email, err) + } + createdAt = sig.When + } + + commitsCount, err := commit.CommitsCount() + if err != nil { + return fmt.Errorf("unable to get CommitsCount: %w", err) + } + + rel := repo_model.Release{ + RepoID: repo.ID, + TagName: tagName, + LowerTagName: strings.ToLower(tagName), + Sha1: commit.ID.String(), + NumCommits: commitsCount, + CreatedUnix: timeutil.TimeStamp(createdAt.Unix()), + IsTag: true, + } + if author != nil { + rel.PublisherID = author.ID + } + + return repo_model.SaveOrUpdateTag(ctx, repo, &rel) +} + +// StoreMissingLfsObjectsInRepository downloads missing LFS objects +func StoreMissingLfsObjectsInRepository(ctx context.Context, repo *repo_model.Repository, gitRepo *git.Repository, lfsClient lfs.Client) error { + contentStore := lfs.NewContentStore() + + pointerChan := make(chan lfs.PointerBlob) + errChan := make(chan error, 1) + go lfs.SearchPointerBlobs(ctx, gitRepo, pointerChan, errChan) + + downloadObjects := func(pointers []lfs.Pointer) error { + err := lfsClient.Download(ctx, pointers, func(p lfs.Pointer, content io.ReadCloser, objectError error) error { + if objectError != nil { + if errors.Is(objectError, lfs.ErrObjectNotExist) { + log.Warn("Repo[%-v]: Ignore missing LFS object %-v: %v", repo, p, objectError) + return nil + } + return objectError + } + + defer content.Close() + + _, err := git_model.NewLFSMetaObject(ctx, repo.ID, p) + if err != nil { + log.Error("Repo[%-v]: Error creating LFS meta object %-v: %v", repo, p, err) + return err + } + + if err := contentStore.Put(p, content); err != nil { + log.Error("Repo[%-v]: Error storing content for LFS meta object %-v: %v", repo, p, err) + if _, err2 := git_model.RemoveLFSMetaObjectByOid(ctx, repo.ID, p.Oid); err2 != nil { + log.Error("Repo[%-v]: Error removing LFS meta object %-v: %v", repo, p, err2) + } + return err + } + return nil + }) + if err != nil { + select { + case <-ctx.Done(): + return nil + default: + } + } + return err + } + + var batch []lfs.Pointer + for pointerBlob := range pointerChan { + meta, err := git_model.GetLFSMetaObjectByOid(ctx, repo.ID, pointerBlob.Oid) + if err != nil && err != git_model.ErrLFSObjectNotExist { + log.Error("Repo[%-v]: Error querying LFS meta object %-v: %v", repo, pointerBlob.Pointer, err) + return err + } + if meta != nil { + log.Trace("Repo[%-v]: Skipping unknown LFS meta object %-v", repo, pointerBlob.Pointer) + continue + } + + log.Trace("Repo[%-v]: LFS object %-v not present in repository", repo, pointerBlob.Pointer) + + exist, err := contentStore.Exists(pointerBlob.Pointer) + if err != nil { + log.Error("Repo[%-v]: Error checking if LFS object %-v exists: %v", repo, pointerBlob.Pointer, err) + return err + } + + if exist { + log.Trace("Repo[%-v]: LFS object %-v already present; creating meta object", repo, pointerBlob.Pointer) + _, err := git_model.NewLFSMetaObject(ctx, repo.ID, pointerBlob.Pointer) + if err != nil { + log.Error("Repo[%-v]: Error creating LFS meta object %-v: %v", repo, pointerBlob.Pointer, err) + return err + } + } else { + if setting.LFS.MaxFileSize > 0 && pointerBlob.Size > setting.LFS.MaxFileSize { + log.Info("Repo[%-v]: LFS object %-v download denied because of LFS_MAX_FILE_SIZE=%d < size %d", repo, pointerBlob.Pointer, setting.LFS.MaxFileSize, pointerBlob.Size) + continue + } + + batch = append(batch, pointerBlob.Pointer) + if len(batch) >= lfsClient.BatchSize() { + if err := downloadObjects(batch); err != nil { + return err + } + batch = nil + } + } + } + if len(batch) > 0 { + if err := downloadObjects(batch); err != nil { + return err + } + } + + err, has := <-errChan + if has { + log.Error("Repo[%-v]: Error enumerating LFS objects for repository: %v", repo, err) + return err + } + + return nil +} + +// shortRelease to reduce load memory, this struct can replace repo_model.Release +type shortRelease struct { + ID int64 + TagName string + Sha1 string + IsTag bool +} + +func (shortRelease) TableName() string { + return "release" +} + +// pullMirrorReleaseSync is a pull-mirror specific tag<->release table +// synchronization which overwrites all Releases from the repository tags. This +// can be relied on since a pull-mirror is always identical to its +// upstream. Hence, after each sync we want the pull-mirror release set to be +// identical to the upstream tag set. This is much more efficient for +// repositories like https://github.com/vim/vim (with over 13000 tags). +func pullMirrorReleaseSync(ctx context.Context, repo *repo_model.Repository, gitRepo *git.Repository) error { + log.Trace("pullMirrorReleaseSync: rebuilding releases for pull-mirror Repo[%d:%s/%s]", repo.ID, repo.OwnerName, repo.Name) + tags, numTags, err := gitRepo.GetTagInfos(0, 0) + if err != nil { + return fmt.Errorf("unable to GetTagInfos in pull-mirror Repo[%d:%s/%s]: %w", repo.ID, repo.OwnerName, repo.Name, err) + } + err = db.WithTx(ctx, func(ctx context.Context) error { + dbReleases, err := db.Find[shortRelease](ctx, repo_model.FindReleasesOptions{ + RepoID: repo.ID, + IncludeDrafts: true, + IncludeTags: true, + }) + if err != nil { + return fmt.Errorf("unable to FindReleases in pull-mirror Repo[%d:%s/%s]: %w", repo.ID, repo.OwnerName, repo.Name, err) + } + + inserts, deletes, updates := calcSync(tags, dbReleases) + // + // make release set identical to upstream tags + // + for _, tag := range inserts { + release := repo_model.Release{ + RepoID: repo.ID, + TagName: tag.Name, + LowerTagName: strings.ToLower(tag.Name), + Sha1: tag.Object.String(), + // NOTE: ignored, since NumCommits are unused + // for pull-mirrors (only relevant when + // displaying releases, IsTag: false) + NumCommits: -1, + CreatedUnix: timeutil.TimeStamp(tag.Tagger.When.Unix()), + IsTag: true, + } + if err := db.Insert(ctx, release); err != nil { + return fmt.Errorf("unable insert tag %s for pull-mirror Repo[%d:%s/%s]: %w", tag.Name, repo.ID, repo.OwnerName, repo.Name, err) + } + } + + // only delete tags releases + if len(deletes) > 0 { + if _, err := db.GetEngine(ctx).Where("repo_id=?", repo.ID). + In("id", deletes). + Delete(&repo_model.Release{}); err != nil { + return fmt.Errorf("unable to delete tags for pull-mirror Repo[%d:%s/%s]: %w", repo.ID, repo.OwnerName, repo.Name, err) + } + } + + for _, tag := range updates { + if _, err := db.GetEngine(ctx).Where("repo_id = ? AND lower_tag_name = ?", repo.ID, strings.ToLower(tag.Name)). + Cols("sha1"). + Update(&repo_model.Release{ + Sha1: tag.Object.String(), + }); err != nil { + return fmt.Errorf("unable to update tag %s for pull-mirror Repo[%d:%s/%s]: %w", tag.Name, repo.ID, repo.OwnerName, repo.Name, err) + } + } + return nil + }) + if err != nil { + return fmt.Errorf("unable to rebuild release table for pull-mirror Repo[%d:%s/%s]: %w", repo.ID, repo.OwnerName, repo.Name, err) + } + + log.Trace("pullMirrorReleaseSync: done rebuilding %d releases", numTags) + return nil +} + +func calcSync(destTags []*git.Tag, dbTags []*shortRelease) ([]*git.Tag, []int64, []*git.Tag) { + destTagMap := make(map[string]*git.Tag) + for _, tag := range destTags { + destTagMap[tag.Name] = tag + } + dbTagMap := make(map[string]*shortRelease) + for _, rel := range dbTags { + dbTagMap[rel.TagName] = rel + } + + inserted := make([]*git.Tag, 0, 10) + updated := make([]*git.Tag, 0, 10) + for _, tag := range destTags { + rel := dbTagMap[tag.Name] + if rel == nil { + inserted = append(inserted, tag) + } else if rel.Sha1 != tag.Object.String() { + updated = append(updated, tag) + } + } + deleted := make([]int64, 0, 10) + for _, tag := range dbTags { + if destTagMap[tag.TagName] == nil && tag.IsTag { + deleted = append(deleted, tag.ID) + } + } + return inserted, deleted, updated +} -- cgit v1.2.3