mirror of
				https://github.com/go-gitea/gitea.git
				synced 2025-10-26 12:27:06 +00:00 
			
		
		
		
	Reduce calls to git cat-file -s (#14682)
* Reduce calls to git cat-file -s There are multiple places where there are repeated calls to git cat-file -s due to the blobs not being created with their size. Through judicious use of git ls-tree -l and slight adjustments to the indexer code we can avoid a lot of these calls. * simplify by always expecting the long format * Also always set the sized field and tell the indexer the update is sized
This commit is contained in:
		| @@ -10,12 +10,13 @@ import ( | |||||||
| 	"bytes" | 	"bytes" | ||||||
| 	"fmt" | 	"fmt" | ||||||
| 	"strconv" | 	"strconv" | ||||||
|  | 	"strings" | ||||||
|  |  | ||||||
| 	"github.com/go-git/go-git/v5/plumbing/filemode" | 	"github.com/go-git/go-git/v5/plumbing/filemode" | ||||||
| 	"github.com/go-git/go-git/v5/plumbing/object" | 	"github.com/go-git/go-git/v5/plumbing/object" | ||||||
| ) | ) | ||||||
|  |  | ||||||
| // ParseTreeEntries parses the output of a `git ls-tree` command. | // ParseTreeEntries parses the output of a `git ls-tree -l` command. | ||||||
| func ParseTreeEntries(data []byte) ([]*TreeEntry, error) { | func ParseTreeEntries(data []byte) ([]*TreeEntry, error) { | ||||||
| 	return parseTreeEntries(data, nil) | 	return parseTreeEntries(data, nil) | ||||||
| } | } | ||||||
| @@ -23,7 +24,7 @@ func ParseTreeEntries(data []byte) ([]*TreeEntry, error) { | |||||||
| func parseTreeEntries(data []byte, ptree *Tree) ([]*TreeEntry, error) { | func parseTreeEntries(data []byte, ptree *Tree) ([]*TreeEntry, error) { | ||||||
| 	entries := make([]*TreeEntry, 0, 10) | 	entries := make([]*TreeEntry, 0, 10) | ||||||
| 	for pos := 0; pos < len(data); { | 	for pos := 0; pos < len(data); { | ||||||
| 		// expect line to be of the form "<mode> <type> <sha>\t<filename>" | 		// expect line to be of the form "<mode> <type> <sha> <space-padded-size>\t<filename>" | ||||||
| 		entry := new(TreeEntry) | 		entry := new(TreeEntry) | ||||||
| 		entry.gogitTreeEntry = &object.TreeEntry{} | 		entry.gogitTreeEntry = &object.TreeEntry{} | ||||||
| 		entry.ptree = ptree | 		entry.ptree = ptree | ||||||
| @@ -61,7 +62,16 @@ func parseTreeEntries(data []byte, ptree *Tree) ([]*TreeEntry, error) { | |||||||
| 		entry.gogitTreeEntry.Hash = id | 		entry.gogitTreeEntry.Hash = id | ||||||
| 		pos += 41 // skip over sha and trailing space | 		pos += 41 // skip over sha and trailing space | ||||||
|  |  | ||||||
| 		end := pos + bytes.IndexByte(data[pos:], '\n') | 		end := pos + bytes.IndexByte(data[pos:], '\t') | ||||||
|  | 		if end < pos { | ||||||
|  | 			return nil, fmt.Errorf("Invalid ls-tree -l output: %s", string(data)) | ||||||
|  | 		} | ||||||
|  | 		entry.size, _ = strconv.ParseInt(strings.TrimSpace(string(data[pos:end])), 10, 64) | ||||||
|  | 		entry.sized = true | ||||||
|  |  | ||||||
|  | 		pos = end + 1 | ||||||
|  |  | ||||||
|  | 		end = pos + bytes.IndexByte(data[pos:], '\n') | ||||||
| 		if end < pos { | 		if end < pos { | ||||||
| 			return nil, fmt.Errorf("Invalid ls-tree output: %s", string(data)) | 			return nil, fmt.Errorf("Invalid ls-tree output: %s", string(data)) | ||||||
| 		} | 		} | ||||||
|   | |||||||
| @@ -24,7 +24,7 @@ func TestParseTreeEntries(t *testing.T) { | |||||||
| 			Expected: []*TreeEntry{}, | 			Expected: []*TreeEntry{}, | ||||||
| 		}, | 		}, | ||||||
| 		{ | 		{ | ||||||
| 			Input: "100644 blob 61ab7345a1a3bbc590068ccae37b8515cfc5843c\texample/file2.txt\n", | 			Input: "100644 blob 61ab7345a1a3bbc590068ccae37b8515cfc5843c    1022\texample/file2.txt\n", | ||||||
| 			Expected: []*TreeEntry{ | 			Expected: []*TreeEntry{ | ||||||
| 				{ | 				{ | ||||||
| 					ID: MustIDFromString("61ab7345a1a3bbc590068ccae37b8515cfc5843c"), | 					ID: MustIDFromString("61ab7345a1a3bbc590068ccae37b8515cfc5843c"), | ||||||
| @@ -33,12 +33,14 @@ func TestParseTreeEntries(t *testing.T) { | |||||||
| 						Name: "example/file2.txt", | 						Name: "example/file2.txt", | ||||||
| 						Mode: filemode.Regular, | 						Mode: filemode.Regular, | ||||||
| 					}, | 					}, | ||||||
|  | 					size:  1022, | ||||||
|  | 					sized: true, | ||||||
| 				}, | 				}, | ||||||
| 			}, | 			}, | ||||||
| 		}, | 		}, | ||||||
| 		{ | 		{ | ||||||
| 			Input: "120000 blob 61ab7345a1a3bbc590068ccae37b8515cfc5843c\t\"example/\\n.txt\"\n" + | 			Input: "120000 blob 61ab7345a1a3bbc590068ccae37b8515cfc5843c  234131\t\"example/\\n.txt\"\n" + | ||||||
| 				"040000 tree 1d01fb729fb0db5881daaa6030f9f2d3cd3d5ae8\texample\n", | 				"040000 tree 1d01fb729fb0db5881daaa6030f9f2d3cd3d5ae8       -\texample\n", | ||||||
| 			Expected: []*TreeEntry{ | 			Expected: []*TreeEntry{ | ||||||
| 				{ | 				{ | ||||||
| 					ID: MustIDFromString("61ab7345a1a3bbc590068ccae37b8515cfc5843c"), | 					ID: MustIDFromString("61ab7345a1a3bbc590068ccae37b8515cfc5843c"), | ||||||
| @@ -47,9 +49,12 @@ func TestParseTreeEntries(t *testing.T) { | |||||||
| 						Name: "example/\n.txt", | 						Name: "example/\n.txt", | ||||||
| 						Mode: filemode.Symlink, | 						Mode: filemode.Symlink, | ||||||
| 					}, | 					}, | ||||||
|  | 					size:  234131, | ||||||
|  | 					sized: true, | ||||||
| 				}, | 				}, | ||||||
| 				{ | 				{ | ||||||
| 					ID:    MustIDFromString("1d01fb729fb0db5881daaa6030f9f2d3cd3d5ae8"), | 					ID:    MustIDFromString("1d01fb729fb0db5881daaa6030f9f2d3cd3d5ae8"), | ||||||
|  | 					sized: true, | ||||||
| 					gogitTreeEntry: &object.TreeEntry{ | 					gogitTreeEntry: &object.TreeEntry{ | ||||||
| 						Hash: MustIDFromString("1d01fb729fb0db5881daaa6030f9f2d3cd3d5ae8"), | 						Hash: MustIDFromString("1d01fb729fb0db5881daaa6030f9f2d3cd3d5ae8"), | ||||||
| 						Name: "example", | 						Name: "example", | ||||||
|   | |||||||
| @@ -10,9 +10,10 @@ import ( | |||||||
| 	"bytes" | 	"bytes" | ||||||
| 	"fmt" | 	"fmt" | ||||||
| 	"strconv" | 	"strconv" | ||||||
|  | 	"strings" | ||||||
| ) | ) | ||||||
|  |  | ||||||
| // ParseTreeEntries parses the output of a `git ls-tree` command. | // ParseTreeEntries parses the output of a `git ls-tree -l` command. | ||||||
| func ParseTreeEntries(data []byte) ([]*TreeEntry, error) { | func ParseTreeEntries(data []byte) ([]*TreeEntry, error) { | ||||||
| 	return parseTreeEntries(data, nil) | 	return parseTreeEntries(data, nil) | ||||||
| } | } | ||||||
| @@ -20,7 +21,7 @@ func ParseTreeEntries(data []byte) ([]*TreeEntry, error) { | |||||||
| func parseTreeEntries(data []byte, ptree *Tree) ([]*TreeEntry, error) { | func parseTreeEntries(data []byte, ptree *Tree) ([]*TreeEntry, error) { | ||||||
| 	entries := make([]*TreeEntry, 0, 10) | 	entries := make([]*TreeEntry, 0, 10) | ||||||
| 	for pos := 0; pos < len(data); { | 	for pos := 0; pos < len(data); { | ||||||
| 		// expect line to be of the form "<mode> <type> <sha>\t<filename>" | 		// expect line to be of the form "<mode> <type> <sha> <space-padded-size>\t<filename>" | ||||||
| 		entry := new(TreeEntry) | 		entry := new(TreeEntry) | ||||||
| 		entry.ptree = ptree | 		entry.ptree = ptree | ||||||
| 		if pos+6 > len(data) { | 		if pos+6 > len(data) { | ||||||
| @@ -56,7 +57,16 @@ func parseTreeEntries(data []byte, ptree *Tree) ([]*TreeEntry, error) { | |||||||
| 		entry.ID = id | 		entry.ID = id | ||||||
| 		pos += 41 // skip over sha and trailing space | 		pos += 41 // skip over sha and trailing space | ||||||
|  |  | ||||||
| 		end := pos + bytes.IndexByte(data[pos:], '\n') | 		end := pos + bytes.IndexByte(data[pos:], '\t') | ||||||
|  | 		if end < pos { | ||||||
|  | 			return nil, fmt.Errorf("Invalid ls-tree -l output: %s", string(data)) | ||||||
|  | 		} | ||||||
|  | 		entry.size, _ = strconv.ParseInt(strings.TrimSpace(string(data[pos:end])), 10, 64) | ||||||
|  | 		entry.sized = true | ||||||
|  |  | ||||||
|  | 		pos = end + 1 | ||||||
|  |  | ||||||
|  | 		end = pos + bytes.IndexByte(data[pos:], '\n') | ||||||
| 		if end < pos { | 		if end < pos { | ||||||
| 			return nil, fmt.Errorf("Invalid ls-tree output: %s", string(data)) | 			return nil, fmt.Errorf("Invalid ls-tree output: %s", string(data)) | ||||||
| 		} | 		} | ||||||
|   | |||||||
							
								
								
									
										70
									
								
								modules/git/parse_nogogit_test.go
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										70
									
								
								modules/git/parse_nogogit_test.go
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,70 @@ | |||||||
|  | // Copyright 2021 The Gitea Authors. All rights reserved. | ||||||
|  | // Use of this source code is governed by a MIT-style | ||||||
|  | // license that can be found in the LICENSE file. | ||||||
|  |  | ||||||
|  | // +build !gogit | ||||||
|  |  | ||||||
|  | package git | ||||||
|  |  | ||||||
|  | import ( | ||||||
|  | 	"testing" | ||||||
|  |  | ||||||
|  | 	"github.com/stretchr/testify/assert" | ||||||
|  | ) | ||||||
|  |  | ||||||
|  | func TestParseTreeEntries(t *testing.T) { | ||||||
|  |  | ||||||
|  | 	testCases := []struct { | ||||||
|  | 		Input    string | ||||||
|  | 		Expected []*TreeEntry | ||||||
|  | 	}{ | ||||||
|  | 		{ | ||||||
|  | 			Input: `100644 blob ea0d83c9081af9500ac9f804101b3fd0a5c293af    8218	README.md | ||||||
|  | 100644 blob 037f27dc9d353ae4fd50f0474b2194c593914e35    4681	README_ZH.md | ||||||
|  | 100644 blob 9846a94f7e8350a916632929d0fda38c90dd2ca8     429	SECURITY.md | ||||||
|  | 040000 tree 84b90550547016f73c5dd3f50dea662389e67b6d       -	assets | ||||||
|  | `, | ||||||
|  | 			Expected: []*TreeEntry{ | ||||||
|  | 				{ | ||||||
|  | 					ID:        MustIDFromString("ea0d83c9081af9500ac9f804101b3fd0a5c293af"), | ||||||
|  | 					name:      "README.md", | ||||||
|  | 					entryMode: EntryModeBlob, | ||||||
|  | 					size:      8218, | ||||||
|  | 					sized:     true, | ||||||
|  | 				}, | ||||||
|  | 				{ | ||||||
|  | 					ID:        MustIDFromString("037f27dc9d353ae4fd50f0474b2194c593914e35"), | ||||||
|  | 					name:      "README_ZH.md", | ||||||
|  | 					entryMode: EntryModeBlob, | ||||||
|  | 					size:      4681, | ||||||
|  | 					sized:     true, | ||||||
|  | 				}, | ||||||
|  | 				{ | ||||||
|  | 					ID:        MustIDFromString("9846a94f7e8350a916632929d0fda38c90dd2ca8"), | ||||||
|  | 					name:      "SECURITY.md", | ||||||
|  | 					entryMode: EntryModeBlob, | ||||||
|  | 					size:      429, | ||||||
|  | 					sized:     true, | ||||||
|  | 				}, | ||||||
|  | 				{ | ||||||
|  | 					ID:        MustIDFromString("84b90550547016f73c5dd3f50dea662389e67b6d"), | ||||||
|  | 					name:      "assets", | ||||||
|  | 					entryMode: EntryModeTree, | ||||||
|  | 					sized:     true, | ||||||
|  | 				}, | ||||||
|  | 			}, | ||||||
|  | 		}, | ||||||
|  | 	} | ||||||
|  | 	for _, testCase := range testCases { | ||||||
|  | 		entries, err := ParseTreeEntries([]byte(testCase.Input)) | ||||||
|  | 		assert.NoError(t, err) | ||||||
|  | 		assert.EqualValues(t, len(testCase.Expected), len(entries)) | ||||||
|  | 		for i, entry := range entries { | ||||||
|  | 			assert.EqualValues(t, testCase.Expected[i].ID, entry.ID) | ||||||
|  | 			assert.EqualValues(t, testCase.Expected[i].name, entry.name) | ||||||
|  | 			assert.EqualValues(t, testCase.Expected[i].entryMode, entry.entryMode) | ||||||
|  | 			assert.EqualValues(t, testCase.Expected[i].sized, entry.sized) | ||||||
|  | 			assert.EqualValues(t, testCase.Expected[i].size, entry.size) | ||||||
|  | 		} | ||||||
|  | 	} | ||||||
|  | } | ||||||
| @@ -87,5 +87,7 @@ func (te *TreeEntry) Blob() *Blob { | |||||||
| 		ID:       te.ID, | 		ID:       te.ID, | ||||||
| 		repoPath: te.ptree.repo.Path, | 		repoPath: te.ptree.repo.Path, | ||||||
| 		name:     te.Name(), | 		name:     te.Name(), | ||||||
|  | 		size:     te.size, | ||||||
|  | 		gotSize:  te.sized, | ||||||
| 	} | 	} | ||||||
| } | } | ||||||
|   | |||||||
| @@ -32,7 +32,7 @@ func (t *Tree) ListEntries() (Entries, error) { | |||||||
| 		return t.entries, nil | 		return t.entries, nil | ||||||
| 	} | 	} | ||||||
|  |  | ||||||
| 	stdout, err := NewCommand("ls-tree", t.ID.String()).RunInDirBytes(t.repo.Path) | 	stdout, err := NewCommand("ls-tree", "-l", t.ID.String()).RunInDirBytes(t.repo.Path) | ||||||
| 	if err != nil { | 	if err != nil { | ||||||
| 		if strings.Contains(err.Error(), "fatal: Not a valid object name") || strings.Contains(err.Error(), "fatal: not a tree object") { | 		if strings.Contains(err.Error(), "fatal: Not a valid object name") || strings.Contains(err.Error(), "fatal: not a tree object") { | ||||||
| 			return nil, ErrNotExist{ | 			return nil, ErrNotExist{ | ||||||
| @@ -55,7 +55,7 @@ func (t *Tree) ListEntriesRecursive() (Entries, error) { | |||||||
| 	if t.entriesRecursiveParsed { | 	if t.entriesRecursiveParsed { | ||||||
| 		return t.entriesRecursive, nil | 		return t.entriesRecursive, nil | ||||||
| 	} | 	} | ||||||
| 	stdout, err := NewCommand("ls-tree", "-t", "-r", t.ID.String()).RunInDirBytes(t.repo.Path) | 	stdout, err := NewCommand("ls-tree", "-t", "-l", "-r", t.ID.String()).RunInDirBytes(t.repo.Path) | ||||||
| 	if err != nil { | 	if err != nil { | ||||||
| 		return nil, err | 		return nil, err | ||||||
| 	} | 	} | ||||||
|   | |||||||
| @@ -179,14 +179,20 @@ func (b *BleveIndexer) addUpdate(commitSha string, update fileUpdate, repo *mode | |||||||
| 		return nil | 		return nil | ||||||
| 	} | 	} | ||||||
|  |  | ||||||
|  | 	size := update.Size | ||||||
|  |  | ||||||
|  | 	if !update.Sized { | ||||||
| 		stdout, err := git.NewCommand("cat-file", "-s", update.BlobSha). | 		stdout, err := git.NewCommand("cat-file", "-s", update.BlobSha). | ||||||
| 			RunInDir(repo.RepoPath()) | 			RunInDir(repo.RepoPath()) | ||||||
| 		if err != nil { | 		if err != nil { | ||||||
| 			return err | 			return err | ||||||
| 		} | 		} | ||||||
| 	if size, err := strconv.Atoi(strings.TrimSpace(stdout)); err != nil { | 		if size, err = strconv.ParseInt(strings.TrimSpace(stdout), 10, 64); err != nil { | ||||||
| 			return fmt.Errorf("Misformatted git cat-file output: %v", err) | 			return fmt.Errorf("Misformatted git cat-file output: %v", err) | ||||||
| 	} else if int64(size) > setting.Indexer.MaxIndexerFileSize { | 		} | ||||||
|  | 	} | ||||||
|  |  | ||||||
|  | 	if size > setting.Indexer.MaxIndexerFileSize { | ||||||
| 		return b.addDelete(update.Filename, repo, batch) | 		return b.addDelete(update.Filename, repo, batch) | ||||||
| 	} | 	} | ||||||
|  |  | ||||||
|   | |||||||
| @@ -178,14 +178,20 @@ func (b *ElasticSearchIndexer) addUpdate(sha string, update fileUpdate, repo *mo | |||||||
| 		return nil, nil | 		return nil, nil | ||||||
| 	} | 	} | ||||||
|  |  | ||||||
|  | 	size := update.Size | ||||||
|  |  | ||||||
|  | 	if !update.Sized { | ||||||
| 		stdout, err := git.NewCommand("cat-file", "-s", update.BlobSha). | 		stdout, err := git.NewCommand("cat-file", "-s", update.BlobSha). | ||||||
| 			RunInDir(repo.RepoPath()) | 			RunInDir(repo.RepoPath()) | ||||||
| 		if err != nil { | 		if err != nil { | ||||||
| 			return nil, err | 			return nil, err | ||||||
| 		} | 		} | ||||||
| 	if size, err := strconv.Atoi(strings.TrimSpace(stdout)); err != nil { | 		if size, err = strconv.ParseInt(strings.TrimSpace(stdout), 10, 64); err != nil { | ||||||
| 			return nil, fmt.Errorf("Misformatted git cat-file output: %v", err) | 			return nil, fmt.Errorf("Misformatted git cat-file output: %v", err) | ||||||
| 	} else if int64(size) > setting.Indexer.MaxIndexerFileSize { | 		} | ||||||
|  | 	} | ||||||
|  |  | ||||||
|  | 	if size > setting.Indexer.MaxIndexerFileSize { | ||||||
| 		return []elastic.BulkableRequest{b.addDelete(update.Filename, repo)}, nil | 		return []elastic.BulkableRequest{b.addDelete(update.Filename, repo)}, nil | ||||||
| 	} | 	} | ||||||
|  |  | ||||||
|   | |||||||
| @@ -17,6 +17,8 @@ import ( | |||||||
| type fileUpdate struct { | type fileUpdate struct { | ||||||
| 	Filename string | 	Filename string | ||||||
| 	BlobSha  string | 	BlobSha  string | ||||||
|  | 	Size     int64 | ||||||
|  | 	Sized    bool | ||||||
| } | } | ||||||
|  |  | ||||||
| // repoChanges changes (file additions/updates/removals) to a repo | // repoChanges changes (file additions/updates/removals) to a repo | ||||||
| @@ -77,6 +79,8 @@ func parseGitLsTreeOutput(stdout []byte) ([]fileUpdate, error) { | |||||||
| 			updates[idxCount] = fileUpdate{ | 			updates[idxCount] = fileUpdate{ | ||||||
| 				Filename: entry.Name(), | 				Filename: entry.Name(), | ||||||
| 				BlobSha:  entry.ID.String(), | 				BlobSha:  entry.ID.String(), | ||||||
|  | 				Size:     entry.Size(), | ||||||
|  | 				Sized:    true, | ||||||
| 			} | 			} | ||||||
| 			idxCount++ | 			idxCount++ | ||||||
| 		} | 		} | ||||||
| @@ -87,7 +91,7 @@ func parseGitLsTreeOutput(stdout []byte) ([]fileUpdate, error) { | |||||||
| // genesisChanges get changes to add repo to the indexer for the first time | // genesisChanges get changes to add repo to the indexer for the first time | ||||||
| func genesisChanges(repo *models.Repository, revision string) (*repoChanges, error) { | func genesisChanges(repo *models.Repository, revision string) (*repoChanges, error) { | ||||||
| 	var changes repoChanges | 	var changes repoChanges | ||||||
| 	stdout, err := git.NewCommand("ls-tree", "--full-tree", "-r", revision). | 	stdout, err := git.NewCommand("ls-tree", "--full-tree", "-l", "-r", revision). | ||||||
| 		RunInDirBytes(repo.RepoPath()) | 		RunInDirBytes(repo.RepoPath()) | ||||||
| 	if err != nil { | 	if err != nil { | ||||||
| 		return nil, err | 		return nil, err | ||||||
| @@ -162,7 +166,7 @@ func nonGenesisChanges(repo *models.Repository, revision string) (*repoChanges, | |||||||
| 		} | 		} | ||||||
| 	} | 	} | ||||||
|  |  | ||||||
| 	cmd := git.NewCommand("ls-tree", "--full-tree", revision, "--") | 	cmd := git.NewCommand("ls-tree", "--full-tree", "-l", revision, "--") | ||||||
| 	cmd.AddArguments(updatedFilenames...) | 	cmd.AddArguments(updatedFilenames...) | ||||||
| 	lsTreeStdout, err := cmd.RunInDirBytes(repo.RepoPath()) | 	lsTreeStdout, err := cmd.RunInDirBytes(repo.RepoPath()) | ||||||
| 	if err != nil { | 	if err != nil { | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user
	 zeripath
					zeripath