mirror of
				https://github.com/go-gitea/gitea.git
				synced 2025-10-26 12:27:06 +00:00 
			
		
		
		
	Fix synchronization bug in repo indexer (#3455)
This commit is contained in:
		| @@ -5,9 +5,7 @@ | |||||||
| package models | package models | ||||||
|  |  | ||||||
| import ( | import ( | ||||||
| 	"io/ioutil" | 	"fmt" | ||||||
| 	"os" |  | ||||||
| 	"path" |  | ||||||
| 	"strconv" | 	"strconv" | ||||||
| 	"strings" | 	"strings" | ||||||
|  |  | ||||||
| @@ -16,8 +14,6 @@ import ( | |||||||
| 	"code.gitea.io/gitea/modules/indexer" | 	"code.gitea.io/gitea/modules/indexer" | ||||||
| 	"code.gitea.io/gitea/modules/log" | 	"code.gitea.io/gitea/modules/log" | ||||||
| 	"code.gitea.io/gitea/modules/setting" | 	"code.gitea.io/gitea/modules/setting" | ||||||
|  |  | ||||||
| 	"github.com/Unknwon/com" |  | ||||||
| ) | ) | ||||||
|  |  | ||||||
| // RepoIndexerStatus status of a repo's entry in the repo indexer | // RepoIndexerStatus status of a repo's entry in the repo indexer | ||||||
| @@ -132,7 +128,11 @@ func populateRepoIndexer(maxRepoID int64) { | |||||||
| } | } | ||||||
|  |  | ||||||
| func updateRepoIndexer(repo *Repository) error { | func updateRepoIndexer(repo *Repository) error { | ||||||
| 	changes, err := getRepoChanges(repo) | 	sha, err := getDefaultBranchSha(repo) | ||||||
|  | 	if err != nil { | ||||||
|  | 		return err | ||||||
|  | 	} | ||||||
|  | 	changes, err := getRepoChanges(repo, sha) | ||||||
| 	if err != nil { | 	if err != nil { | ||||||
| 		return err | 		return err | ||||||
| 	} else if changes == nil { | 	} else if changes == nil { | ||||||
| @@ -140,12 +140,12 @@ func updateRepoIndexer(repo *Repository) error { | |||||||
| 	} | 	} | ||||||
|  |  | ||||||
| 	batch := indexer.RepoIndexerBatch() | 	batch := indexer.RepoIndexerBatch() | ||||||
| 	for _, filename := range changes.UpdatedFiles { | 	for _, update := range changes.Updates { | ||||||
| 		if err := addUpdate(filename, repo, batch); err != nil { | 		if err := addUpdate(update, repo, batch); err != nil { | ||||||
| 			return err | 			return err | ||||||
| 		} | 		} | ||||||
| 	} | 	} | ||||||
| 	for _, filename := range changes.RemovedFiles { | 	for _, filename := range changes.RemovedFilenames { | ||||||
| 		if err := addDelete(filename, repo, batch); err != nil { | 		if err := addDelete(filename, repo, batch); err != nil { | ||||||
| 			return err | 			return err | ||||||
| 		} | 		} | ||||||
| @@ -153,56 +153,61 @@ func updateRepoIndexer(repo *Repository) error { | |||||||
| 	if err = batch.Flush(); err != nil { | 	if err = batch.Flush(); err != nil { | ||||||
| 		return err | 		return err | ||||||
| 	} | 	} | ||||||
| 	return updateLastIndexSync(repo) | 	return repo.updateIndexerStatus(sha) | ||||||
| } | } | ||||||
|  |  | ||||||
| // repoChanges changes (file additions/updates/removals) to a repo | // repoChanges changes (file additions/updates/removals) to a repo | ||||||
| type repoChanges struct { | type repoChanges struct { | ||||||
| 	UpdatedFiles []string | 	Updates          []fileUpdate | ||||||
| 	RemovedFiles []string | 	RemovedFilenames []string | ||||||
|  | } | ||||||
|  |  | ||||||
|  | type fileUpdate struct { | ||||||
|  | 	Filename string | ||||||
|  | 	BlobSha  string | ||||||
|  | } | ||||||
|  |  | ||||||
|  | func getDefaultBranchSha(repo *Repository) (string, error) { | ||||||
|  | 	stdout, err := git.NewCommand("show-ref", "-s", repo.DefaultBranch).RunInDir(repo.RepoPath()) | ||||||
|  | 	if err != nil { | ||||||
|  | 		return "", err | ||||||
|  | 	} | ||||||
|  | 	return strings.TrimSpace(stdout), nil | ||||||
| } | } | ||||||
|  |  | ||||||
| // getRepoChanges returns changes to repo since last indexer update | // getRepoChanges returns changes to repo since last indexer update | ||||||
| func getRepoChanges(repo *Repository) (*repoChanges, error) { | func getRepoChanges(repo *Repository, revision string) (*repoChanges, error) { | ||||||
| 	repoWorkingPool.CheckIn(com.ToStr(repo.ID)) | 	if err := repo.getIndexerStatus(); err != nil { | ||||||
| 	defer repoWorkingPool.CheckOut(com.ToStr(repo.ID)) |  | ||||||
|  |  | ||||||
| 	if err := repo.UpdateLocalCopyBranch(""); err != nil { |  | ||||||
| 		return nil, err |  | ||||||
| 	} else if !git.IsBranchExist(repo.LocalCopyPath(), repo.DefaultBranch) { |  | ||||||
| 		// repo does not have any commits yet, so nothing to update |  | ||||||
| 		return nil, nil |  | ||||||
| 	} else if err = repo.UpdateLocalCopyBranch(repo.DefaultBranch); err != nil { |  | ||||||
| 		return nil, err |  | ||||||
| 	} else if err = repo.getIndexerStatus(); err != nil { |  | ||||||
| 		return nil, err | 		return nil, err | ||||||
| 	} | 	} | ||||||
|  |  | ||||||
| 	if len(repo.IndexerStatus.CommitSha) == 0 { | 	if len(repo.IndexerStatus.CommitSha) == 0 { | ||||||
| 		return genesisChanges(repo) | 		return genesisChanges(repo, revision) | ||||||
| 	} | 	} | ||||||
| 	return nonGenesisChanges(repo) | 	return nonGenesisChanges(repo, revision) | ||||||
| } | } | ||||||
|  |  | ||||||
| func addUpdate(filename string, repo *Repository, batch *indexer.Batch) error { | func addUpdate(update fileUpdate, repo *Repository, batch *indexer.Batch) error { | ||||||
| 	filepath := path.Join(repo.LocalCopyPath(), filename) | 	stdout, err := git.NewCommand("cat-file", "-s", update.BlobSha). | ||||||
| 	if stat, err := os.Stat(filepath); err != nil { | 		RunInDir(repo.RepoPath()) | ||||||
|  | 	if err != nil { | ||||||
| 		return err | 		return err | ||||||
| 	} else if stat.Size() > setting.Indexer.MaxIndexerFileSize { | 	} | ||||||
| 		return nil | 	if size, err := strconv.Atoi(strings.TrimSpace(stdout)); err != nil { | ||||||
| 	} else if stat.IsDir() { | 		return fmt.Errorf("Misformatted git cat-file output: %v", err) | ||||||
| 		// file could actually be a directory, if it is the root of a submodule. | 	} else if int64(size) > setting.Indexer.MaxIndexerFileSize { | ||||||
| 		// We do not index submodule contents, so don't do anything. |  | ||||||
| 		return nil | 		return nil | ||||||
| 	} | 	} | ||||||
| 	fileContents, err := ioutil.ReadFile(filepath) |  | ||||||
|  | 	fileContents, err := git.NewCommand("cat-file", "blob", update.BlobSha). | ||||||
|  | 		RunInDirBytes(repo.RepoPath()) | ||||||
| 	if err != nil { | 	if err != nil { | ||||||
| 		return err | 		return err | ||||||
| 	} else if !base.IsTextFile(fileContents) { | 	} else if !base.IsTextFile(fileContents) { | ||||||
| 		return nil | 		return nil | ||||||
| 	} | 	} | ||||||
| 	return batch.Add(indexer.RepoIndexerUpdate{ | 	return batch.Add(indexer.RepoIndexerUpdate{ | ||||||
| 		Filepath: filename, | 		Filepath: update.Filename, | ||||||
| 		Op:       indexer.RepoIndexerOpUpdate, | 		Op:       indexer.RepoIndexerOpUpdate, | ||||||
| 		Data: &indexer.RepoIndexerData{ | 		Data: &indexer.RepoIndexerData{ | ||||||
| 			RepoID:  repo.ID, | 			RepoID:  repo.ID, | ||||||
| @@ -221,42 +226,76 @@ func addDelete(filename string, repo *Repository, batch *indexer.Batch) error { | |||||||
| 	}) | 	}) | ||||||
| } | } | ||||||
|  |  | ||||||
| // genesisChanges get changes to add repo to the indexer for the first time | // parseGitLsTreeOutput parses the output of a `git ls-tree -r --full-name` command | ||||||
| func genesisChanges(repo *Repository) (*repoChanges, error) { | func parseGitLsTreeOutput(stdout string) ([]fileUpdate, error) { | ||||||
| 	var changes repoChanges | 	lines := strings.Split(stdout, "\n") | ||||||
| 	stdout, err := git.NewCommand("ls-files").RunInDir(repo.LocalCopyPath()) | 	updates := make([]fileUpdate, 0, len(lines)) | ||||||
| 	if err != nil { | 	for _, line := range lines { | ||||||
| 		return nil, err | 		// expect line to be "<mode> <object-type> <object-sha>\t<filename>" | ||||||
| 	} | 		line = strings.TrimSpace(line) | ||||||
| 	for _, line := range strings.Split(stdout, "\n") { | 		if len(line) == 0 { | ||||||
| 		filename := strings.TrimSpace(line) |  | ||||||
| 		if len(filename) == 0 { |  | ||||||
| 			continue | 			continue | ||||||
| 		} else if filename[0] == '"' { | 		} | ||||||
|  | 		firstSpaceIndex := strings.IndexByte(line, ' ') | ||||||
|  | 		if firstSpaceIndex < 0 { | ||||||
|  | 			log.Error(4, "Misformatted git ls-tree output: %s", line) | ||||||
|  | 			continue | ||||||
|  | 		} | ||||||
|  | 		tabIndex := strings.IndexByte(line, '\t') | ||||||
|  | 		if tabIndex < 42+firstSpaceIndex || tabIndex == len(line)-1 { | ||||||
|  | 			log.Error(4, "Misformatted git ls-tree output: %s", line) | ||||||
|  | 			continue | ||||||
|  | 		} | ||||||
|  | 		if objectType := line[firstSpaceIndex+1 : tabIndex-41]; objectType != "blob" { | ||||||
|  | 			// submodules appear as commit objects, we do not index submodules | ||||||
|  | 			continue | ||||||
|  | 		} | ||||||
|  |  | ||||||
|  | 		blobSha := line[tabIndex-40 : tabIndex] | ||||||
|  | 		filename := line[tabIndex+1:] | ||||||
|  | 		if filename[0] == '"' { | ||||||
|  | 			var err error | ||||||
| 			filename, err = strconv.Unquote(filename) | 			filename, err = strconv.Unquote(filename) | ||||||
| 			if err != nil { | 			if err != nil { | ||||||
| 				return nil, err | 				return nil, err | ||||||
| 			} | 			} | ||||||
| 		} | 		} | ||||||
| 		changes.UpdatedFiles = append(changes.UpdatedFiles, filename) | 		updates = append(updates, fileUpdate{ | ||||||
|  | 			Filename: filename, | ||||||
|  | 			BlobSha:  blobSha, | ||||||
|  | 		}) | ||||||
| 	} | 	} | ||||||
| 	return &changes, nil | 	return updates, nil | ||||||
|  | } | ||||||
|  |  | ||||||
|  | // genesisChanges get changes to add repo to the indexer for the first time | ||||||
|  | func genesisChanges(repo *Repository, revision string) (*repoChanges, error) { | ||||||
|  | 	var changes repoChanges | ||||||
|  | 	stdout, err := git.NewCommand("ls-tree", "--full-tree", "-r", revision). | ||||||
|  | 		RunInDir(repo.RepoPath()) | ||||||
|  | 	if err != nil { | ||||||
|  | 		return nil, err | ||||||
|  | 	} | ||||||
|  | 	changes.Updates, err = parseGitLsTreeOutput(stdout) | ||||||
|  | 	return &changes, err | ||||||
| } | } | ||||||
|  |  | ||||||
| // nonGenesisChanges get changes since the previous indexer update | // nonGenesisChanges get changes since the previous indexer update | ||||||
| func nonGenesisChanges(repo *Repository) (*repoChanges, error) { | func nonGenesisChanges(repo *Repository, revision string) (*repoChanges, error) { | ||||||
| 	diffCmd := git.NewCommand("diff", "--name-status", | 	diffCmd := git.NewCommand("diff", "--name-status", | ||||||
| 		repo.IndexerStatus.CommitSha, "HEAD") | 		repo.IndexerStatus.CommitSha, revision) | ||||||
| 	stdout, err := diffCmd.RunInDir(repo.LocalCopyPath()) | 	stdout, err := diffCmd.RunInDir(repo.RepoPath()) | ||||||
| 	if err != nil { | 	if err != nil { | ||||||
| 		// previous commit sha may have been removed by a force push, so | 		// previous commit sha may have been removed by a force push, so | ||||||
| 		// try rebuilding from scratch | 		// try rebuilding from scratch | ||||||
|  | 		log.Warn("git diff: %v", err) | ||||||
| 		if err = indexer.DeleteRepoFromIndexer(repo.ID); err != nil { | 		if err = indexer.DeleteRepoFromIndexer(repo.ID); err != nil { | ||||||
| 			return nil, err | 			return nil, err | ||||||
| 		} | 		} | ||||||
| 		return genesisChanges(repo) | 		return genesisChanges(repo, revision) | ||||||
| 	} | 	} | ||||||
| 	var changes repoChanges | 	var changes repoChanges | ||||||
|  | 	updatedFilenames := make([]string, 0, 10) | ||||||
| 	for _, line := range strings.Split(stdout, "\n") { | 	for _, line := range strings.Split(stdout, "\n") { | ||||||
| 		line = strings.TrimSpace(line) | 		line = strings.TrimSpace(line) | ||||||
| 		if len(line) == 0 { | 		if len(line) == 0 { | ||||||
| @@ -274,23 +313,22 @@ func nonGenesisChanges(repo *Repository) (*repoChanges, error) { | |||||||
|  |  | ||||||
| 		switch status := line[0]; status { | 		switch status := line[0]; status { | ||||||
| 		case 'M', 'A': | 		case 'M', 'A': | ||||||
| 			changes.UpdatedFiles = append(changes.UpdatedFiles, filename) | 			updatedFilenames = append(updatedFilenames, filename) | ||||||
| 		case 'D': | 		case 'D': | ||||||
| 			changes.RemovedFiles = append(changes.RemovedFiles, filename) | 			changes.RemovedFilenames = append(changes.RemovedFilenames, filename) | ||||||
| 		default: | 		default: | ||||||
| 			log.Warn("Unrecognized status: %c (line=%s)", status, line) | 			log.Warn("Unrecognized status: %c (line=%s)", status, line) | ||||||
| 		} | 		} | ||||||
| 	} | 	} | ||||||
| 	return &changes, nil |  | ||||||
| } |  | ||||||
|  |  | ||||||
| func updateLastIndexSync(repo *Repository) error { | 	cmd := git.NewCommand("ls-tree", "--full-tree", revision, "--") | ||||||
| 	stdout, err := git.NewCommand("rev-parse", "HEAD").RunInDir(repo.LocalCopyPath()) | 	cmd.AddArguments(updatedFilenames...) | ||||||
|  | 	stdout, err = cmd.RunInDir(repo.RepoPath()) | ||||||
| 	if err != nil { | 	if err != nil { | ||||||
| 		return err | 		return nil, err | ||||||
| 	} | 	} | ||||||
| 	sha := strings.TrimSpace(stdout) | 	changes.Updates, err = parseGitLsTreeOutput(stdout) | ||||||
| 	return repo.updateIndexerStatus(sha) | 	return &changes, err | ||||||
| } | } | ||||||
|  |  | ||||||
| func processRepoIndexerOperationQueue() { | func processRepoIndexerOperationQueue() { | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user
	 Ethan Koenig
					Ethan Koenig