Merge pull request '[Port] Sync branches to DB immediately when handle git hook calling gitea#29493' (#2684) from oliverpool/forgejo:sync_branch into forgejo

Reviewed-on: https://codeberg.org/forgejo/forgejo/pulls/2684
Reviewed-by: Otto <otto@codeberg.org>
Reviewed-by: Earl Warren <earl-warren@noreply.codeberg.org>
Reviewed-by: Gusted <gusted@noreply.codeberg.org>
This commit is contained in:
oliverpool 2024-03-19 08:37:30 +00:00
commit ce6db31efe
5 changed files with 280 additions and 44 deletions

View file

@ -162,6 +162,11 @@ func GetBranch(ctx context.Context, repoID int64, branchName string) (*Branch, e
return &branch, nil
}
func GetBranches(ctx context.Context, repoID int64, branchNames []string) ([]*Branch, error) {
branches := make([]*Branch, 0, len(branchNames))
return branches, db.GetEngine(ctx).Where("repo_id=?", repoID).In("name", branchNames).Find(&branches)
}
func AddBranches(ctx context.Context, branches []*Branch) error {
for _, branch := range branches {
if _, err := db.GetEngine(ctx).Insert(branch); err != nil {

View file

@ -8,9 +8,11 @@ import (
"net/http"
"strconv"
git_model "code.gitea.io/gitea/models/git"
issues_model "code.gitea.io/gitea/models/issues"
repo_model "code.gitea.io/gitea/models/repo"
"code.gitea.io/gitea/modules/git"
"code.gitea.io/gitea/modules/gitrepo"
"code.gitea.io/gitea/modules/log"
"code.gitea.io/gitea/modules/private"
repo_module "code.gitea.io/gitea/modules/repository"
@ -27,6 +29,7 @@ func HookPostReceive(ctx *gitea_context.PrivateContext) {
// We don't rely on RepoAssignment here because:
// a) we don't need the git repo in this function
// OUT OF DATE: we do need the git repo to sync the branch to the db now.
// b) our update function will likely change the repository in the db so we will need to refresh it
// c) we don't always need the repo
@ -34,7 +37,11 @@ func HookPostReceive(ctx *gitea_context.PrivateContext) {
repoName := ctx.Params(":repo")
// defer getting the repository at this point - as we should only retrieve it if we're going to call update
var repo *repo_model.Repository
var (
repo *repo_model.Repository
gitRepo *git.Repository
)
defer gitRepo.Close() // it's safe to call Close on a nil pointer
updates := make([]*repo_module.PushUpdateOptions, 0, len(opts.OldCommitIDs))
wasEmpty := false
@ -75,6 +82,61 @@ func HookPostReceive(ctx *gitea_context.PrivateContext) {
}
if repo != nil && len(updates) > 0 {
branchesToSync := make([]*repo_module.PushUpdateOptions, 0, len(updates))
for _, update := range updates {
if !update.RefFullName.IsBranch() {
continue
}
if repo == nil {
repo = loadRepository(ctx, ownerName, repoName)
if ctx.Written() {
return
}
wasEmpty = repo.IsEmpty
}
if update.IsDelRef() {
if err := git_model.AddDeletedBranch(ctx, repo.ID, update.RefFullName.BranchName(), update.PusherID); err != nil {
log.Error("Failed to add deleted branch: %s/%s Error: %v", ownerName, repoName, err)
ctx.JSON(http.StatusInternalServerError, private.HookPostReceiveResult{
Err: fmt.Sprintf("Failed to add deleted branch: %s/%s Error: %v", ownerName, repoName, err),
})
return
}
} else {
branchesToSync = append(branchesToSync, update)
}
}
if len(branchesToSync) > 0 {
if gitRepo == nil {
var err error
gitRepo, err = gitrepo.OpenRepository(ctx, repo)
if err != nil {
log.Error("Failed to open repository: %s/%s Error: %v", ownerName, repoName, err)
ctx.JSON(http.StatusInternalServerError, private.HookPostReceiveResult{
Err: fmt.Sprintf("Failed to open repository: %s/%s Error: %v", ownerName, repoName, err),
})
return
}
}
var (
branchNames = make([]string, 0, len(branchesToSync))
commitIDs = make([]string, 0, len(branchesToSync))
)
for _, update := range branchesToSync {
branchNames = append(branchNames, update.RefFullName.BranchName())
commitIDs = append(commitIDs, update.NewCommitID)
}
if err := repo_service.SyncBranchesToDB(ctx, repo.ID, opts.UserID, branchNames, commitIDs, gitRepo.GetCommit); err != nil {
ctx.JSON(http.StatusInternalServerError, private.HookPostReceiveResult{
Err: fmt.Sprintf("Failed to sync branch to DB in repository: %s/%s Error: %v", ownerName, repoName, err),
})
return
}
}
if err := repo_service.PushUpdates(updates); err != nil {
log.Error("Failed to Update: %s/%s Total Updates: %d", ownerName, repoName, len(updates))
for i, update := range updates {

View file

@ -225,20 +225,33 @@ func checkBranchName(ctx context.Context, repo *repo_model.Repository, name stri
return err
}
// syncBranchToDB sync the branch information in the database. It will try to update the branch first,
// if updated success with affect records > 0, then all are done. Because that means the branch has been in the database.
// If no record is affected, that means the branch does not exist in database. So there are two possibilities.
// One is this is a new branch, then we just need to insert the record. Another is the branches haven't been synced,
// then we need to sync all the branches into database.
func syncBranchToDB(ctx context.Context, repoID, pusherID int64, branchName string, commit *git.Commit) error {
cnt, err := git_model.UpdateBranch(ctx, repoID, pusherID, branchName, commit)
if err != nil {
return fmt.Errorf("git_model.UpdateBranch %d:%s failed: %v", repoID, branchName, err)
}
if cnt > 0 { // This means branch does exist, so it's a normal update. It also means the branch has been synced.
return nil
// SyncBranchesToDB sync the branch information in the database.
// It will check whether the branches of the repository have never been synced before.
// If so, it will sync all branches of the repository.
// Otherwise, it will sync the branches that need to be updated.
func SyncBranchesToDB(ctx context.Context, repoID, pusherID int64, branchNames, commitIDs []string, getCommit func(commitID string) (*git.Commit, error)) error {
// Some designs that make the code look strange but are made for performance optimization purposes:
// 1. Sync branches in a batch to reduce the number of DB queries.
// 2. Lazy load commit information since it may be not necessary.
// 3. Exit early if synced all branches of git repo when there's no branch in DB.
// 4. Check the branches in DB if they are already synced.
//
// If the user pushes many branches at once, the Git hook will call the internal API in batches, rather than all at once.
// See https://github.com/go-gitea/gitea/blob/cb52b17f92e2d2293f7c003649743464492bca48/cmd/hook.go#L27
// For the first batch, it will hit optimization 3.
// For other batches, it will hit optimization 4.
if len(branchNames) != len(commitIDs) {
return fmt.Errorf("branchNames and commitIDs length not match")
}
return db.WithTx(ctx, func(ctx context.Context) error {
branches, err := git_model.GetBranches(ctx, repoID, branchNames)
if err != nil {
return fmt.Errorf("git_model.GetBranches: %v", err)
}
if len(branches) == 0 {
// if user haven't visit UI but directly push to a branch after upgrading from 1.20 -> 1.21,
// we cannot simply insert the branch but need to check we have branches or not
hasBranch, err := db.Exist[git_model.Branch](ctx, git_model.FindBranchOptions{
@ -250,13 +263,40 @@ func syncBranchToDB(ctx context.Context, repoID, pusherID int64, branchName stri
}
if !hasBranch {
if _, err = repo_module.SyncRepoBranches(ctx, repoID, pusherID); err != nil {
return fmt.Errorf("repo_module.SyncRepoBranches %d:%s failed: %v", repoID, branchName, err)
return fmt.Errorf("repo_module.SyncRepoBranches %d failed: %v", repoID, err)
}
return nil
}
}
branchMap := make(map[string]*git_model.Branch, len(branches))
for _, branch := range branches {
branchMap[branch.Name] = branch
}
newBranches := make([]*git_model.Branch, 0, len(branchNames))
for i, branchName := range branchNames {
commitID := commitIDs[i]
branch, exist := branchMap[branchName]
if exist && branch.CommitID == commitID && !branch.IsDeleted {
continue
}
commit, err := getCommit(commitID)
if err != nil {
return fmt.Errorf("get commit of %s failed: %v", branchName, err)
}
if exist {
if _, err := git_model.UpdateBranch(ctx, repoID, pusherID, branchName, commit); err != nil {
return fmt.Errorf("git_model.UpdateBranch %d:%s failed: %v", repoID, branchName, err)
}
return nil
}
// if database have branches but not this branch, it means this is a new branch
return db.Insert(ctx, &git_model.Branch{
newBranches = append(newBranches, &git_model.Branch{
RepoID: repoID,
Name: branchName,
CommitID: commit.ID.String(),
@ -264,6 +304,13 @@ func syncBranchToDB(ctx context.Context, repoID, pusherID int64, branchName stri
PusherID: pusherID,
CommitTime: timeutil.TimeStamp(commit.Committer.When.Unix()),
})
}
if len(newBranches) > 0 {
return db.Insert(ctx, newBranches)
}
return nil
})
}
// CreateNewBranchFromCommit creates a new repository branch

View file

@ -11,7 +11,6 @@ import (
"time"
"code.gitea.io/gitea/models/db"
git_model "code.gitea.io/gitea/models/git"
repo_model "code.gitea.io/gitea/models/repo"
user_model "code.gitea.io/gitea/models/user"
"code.gitea.io/gitea/modules/cache"
@ -259,10 +258,6 @@ func pushUpdates(optsList []*repo_module.PushUpdateOptions) error {
commits.Commits = commits.Commits[:setting.UI.FeedMaxCommitNum]
}
if err = syncBranchToDB(ctx, repo.ID, opts.PusherID, branch, newCommit); err != nil {
return fmt.Errorf("git_model.UpdateBranch %s:%s failed: %v", repo.FullName(), branch, err)
}
notify_service.PushCommits(ctx, pusher, repo, opts, commits)
// Cache for big repository
@ -275,10 +270,6 @@ func pushUpdates(optsList []*repo_module.PushUpdateOptions) error {
// close all related pulls
log.Error("close related pull request failed: %v", err)
}
if err := git_model.AddDeletedBranch(ctx, repo.ID, branch, pusher.ID); err != nil {
return fmt.Errorf("AddDeletedBranch %s:%s failed: %v", repo.FullName(), branch, err)
}
}
// Even if user delete a branch on a repository which he didn't watch, he will be watch that.

View file

@ -0,0 +1,131 @@
// Copyright 2024 The Gitea Authors. All rights reserved.
// SPDX-License-Identifier: MIT
package integration
import (
"fmt"
"net/url"
"testing"
"code.gitea.io/gitea/models/db"
git_model "code.gitea.io/gitea/models/git"
"code.gitea.io/gitea/models/unittest"
user_model "code.gitea.io/gitea/models/user"
"code.gitea.io/gitea/modules/git"
repo_service "code.gitea.io/gitea/services/repository"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)
func TestGitPush(t *testing.T) {
onGiteaRun(t, testGitPush)
}
func testGitPush(t *testing.T, u *url.URL) {
t.Run("Push branches at once", func(t *testing.T) {
runTestGitPush(t, u, func(t *testing.T, gitPath string) (pushed, deleted []string) {
for i := 0; i < 100; i++ {
branchName := fmt.Sprintf("branch-%d", i)
pushed = append(pushed, branchName)
doGitCreateBranch(gitPath, branchName)(t)
}
pushed = append(pushed, "master")
doGitPushTestRepository(gitPath, "origin", "--all")(t)
return pushed, deleted
})
})
t.Run("Push branches one by one", func(t *testing.T) {
runTestGitPush(t, u, func(t *testing.T, gitPath string) (pushed, deleted []string) {
for i := 0; i < 100; i++ {
branchName := fmt.Sprintf("branch-%d", i)
doGitCreateBranch(gitPath, branchName)(t)
doGitPushTestRepository(gitPath, "origin", branchName)(t)
pushed = append(pushed, branchName)
}
return pushed, deleted
})
})
t.Run("Delete branches", func(t *testing.T) {
runTestGitPush(t, u, func(t *testing.T, gitPath string) (pushed, deleted []string) {
doGitPushTestRepository(gitPath, "origin", "master")(t) // make sure master is the default branch instead of a branch we are going to delete
pushed = append(pushed, "master")
for i := 0; i < 100; i++ {
branchName := fmt.Sprintf("branch-%d", i)
pushed = append(pushed, branchName)
doGitCreateBranch(gitPath, branchName)(t)
}
doGitPushTestRepository(gitPath, "origin", "--all")(t)
for i := 0; i < 10; i++ {
branchName := fmt.Sprintf("branch-%d", i)
doGitPushTestRepository(gitPath, "origin", "--delete", branchName)(t)
deleted = append(deleted, branchName)
}
return pushed, deleted
})
})
}
func runTestGitPush(t *testing.T, u *url.URL, gitOperation func(t *testing.T, gitPath string) (pushed, deleted []string)) {
user := unittest.AssertExistsAndLoadBean(t, &user_model.User{ID: 2})
repo, err := repo_service.CreateRepository(db.DefaultContext, user, user, repo_service.CreateRepoOptions{
Name: "repo-to-push",
Description: "test git push",
AutoInit: false,
DefaultBranch: "main",
IsPrivate: false,
})
require.NoError(t, err)
require.NotEmpty(t, repo)
gitPath := t.TempDir()
doGitInitTestRepository(gitPath)(t)
oldPath := u.Path
oldUser := u.User
defer func() {
u.Path = oldPath
u.User = oldUser
}()
u.Path = repo.FullName() + ".git"
u.User = url.UserPassword(user.LowerName, userPassword)
doGitAddRemote(gitPath, "origin", u)(t)
gitRepo, err := git.OpenRepository(git.DefaultContext, gitPath)
require.NoError(t, err)
defer gitRepo.Close()
pushedBranches, deletedBranches := gitOperation(t, gitPath)
dbBranches := make([]*git_model.Branch, 0)
require.NoError(t, db.GetEngine(db.DefaultContext).Where("repo_id=?", repo.ID).Find(&dbBranches))
assert.Equalf(t, len(pushedBranches), len(dbBranches), "mismatched number of branches in db")
dbBranchesMap := make(map[string]*git_model.Branch, len(dbBranches))
for _, branch := range dbBranches {
dbBranchesMap[branch.Name] = branch
}
deletedBranchesMap := make(map[string]bool, len(deletedBranches))
for _, branchName := range deletedBranches {
deletedBranchesMap[branchName] = true
}
for _, branchName := range pushedBranches {
branch, ok := dbBranchesMap[branchName]
deleted := deletedBranchesMap[branchName]
assert.True(t, ok, "branch %s not found in database", branchName)
assert.Equal(t, deleted, branch.IsDeleted, "IsDeleted of %s is %v, but it's expected to be %v", branchName, branch.IsDeleted, deleted)
commitID, err := gitRepo.GetBranchCommitID(branchName)
require.NoError(t, err)
assert.Equal(t, commitID, branch.CommitID)
}
require.NoError(t, repo_service.DeleteRepositoryDirectly(db.DefaultContext, user, repo.ID))
}