mirror of
				https://github.com/go-gitea/gitea.git
				synced 2025-10-26 12:27:06 +00:00 
			
		
		
		
	Add .gitattribute assisted language detection to blame, diff and render (#17590)
Use check attribute code to check the assigned language of a file and send that in to chroma as a hint for the language of the file. Signed-off-by: Andrew Thornton <art27@cantab.net>
This commit is contained in:
		@@ -22,6 +22,8 @@ type CheckAttributeOpts struct {
 | 
			
		||||
	AllAttributes bool
 | 
			
		||||
	Attributes    []string
 | 
			
		||||
	Filenames     []string
 | 
			
		||||
	IndexFile     string
 | 
			
		||||
	WorkTree      string
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// CheckAttribute return the Blame object of file
 | 
			
		||||
@@ -31,6 +33,19 @@ func (repo *Repository) CheckAttribute(opts CheckAttributeOpts) (map[string]map[
 | 
			
		||||
		return nil, fmt.Errorf("git version missing: %v", err)
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	env := []string{}
 | 
			
		||||
 | 
			
		||||
	if len(opts.IndexFile) > 0 && CheckGitVersionAtLeast("1.7.8") == nil {
 | 
			
		||||
		env = append(env, "GIT_INDEX_FILE="+opts.IndexFile)
 | 
			
		||||
	}
 | 
			
		||||
	if len(opts.WorkTree) > 0 && CheckGitVersionAtLeast("1.7.8") == nil {
 | 
			
		||||
		env = append(env, "GIT_WORK_TREE="+opts.WorkTree)
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	if len(env) > 0 {
 | 
			
		||||
		env = append(os.Environ(), env...)
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	stdOut := new(bytes.Buffer)
 | 
			
		||||
	stdErr := new(bytes.Buffer)
 | 
			
		||||
 | 
			
		||||
@@ -61,7 +76,7 @@ func (repo *Repository) CheckAttribute(opts CheckAttributeOpts) (map[string]map[
 | 
			
		||||
 | 
			
		||||
	cmd := NewCommand(cmdArgs...)
 | 
			
		||||
 | 
			
		||||
	if err := cmd.RunInDirPipeline(repo.Path, stdOut, stdErr); err != nil {
 | 
			
		||||
	if err := cmd.RunInDirTimeoutEnvPipeline(env, -1, repo.Path, stdOut, stdErr); err != nil {
 | 
			
		||||
		return nil, fmt.Errorf("failed to run check-attr: %v\n%s\n%s", err, stdOut.String(), stdErr.String())
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
 
 | 
			
		||||
@@ -8,6 +8,7 @@ import (
 | 
			
		||||
	"bytes"
 | 
			
		||||
	"context"
 | 
			
		||||
	"os"
 | 
			
		||||
	"path/filepath"
 | 
			
		||||
	"strings"
 | 
			
		||||
 | 
			
		||||
	"code.gitea.io/gitea/modules/log"
 | 
			
		||||
@@ -45,14 +46,15 @@ func (repo *Repository) readTreeToIndex(id SHA1, indexFilename ...string) error
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// ReadTreeToTemporaryIndex reads a treeish to a temporary index file
 | 
			
		||||
func (repo *Repository) ReadTreeToTemporaryIndex(treeish string) (filename string, cancel context.CancelFunc, err error) {
 | 
			
		||||
	tmpIndex, err := os.CreateTemp("", "index")
 | 
			
		||||
func (repo *Repository) ReadTreeToTemporaryIndex(treeish string) (filename, tmpDir string, cancel context.CancelFunc, err error) {
 | 
			
		||||
	tmpDir, err = os.MkdirTemp("", "index")
 | 
			
		||||
	if err != nil {
 | 
			
		||||
		return
 | 
			
		||||
	}
 | 
			
		||||
	filename = tmpIndex.Name()
 | 
			
		||||
 | 
			
		||||
	filename = filepath.Join(tmpDir, ".tmp-index")
 | 
			
		||||
	cancel = func() {
 | 
			
		||||
		err := util.Remove(filename)
 | 
			
		||||
		err := util.RemoveAll(tmpDir)
 | 
			
		||||
		if err != nil {
 | 
			
		||||
			log.Error("failed to remove tmp index file: %v", err)
 | 
			
		||||
		}
 | 
			
		||||
@@ -60,7 +62,7 @@ func (repo *Repository) ReadTreeToTemporaryIndex(treeish string) (filename strin
 | 
			
		||||
	err = repo.ReadTreeToIndex(treeish, filename)
 | 
			
		||||
	if err != nil {
 | 
			
		||||
		defer cancel()
 | 
			
		||||
		return "", func() {}, err
 | 
			
		||||
		return "", "", func() {}, err
 | 
			
		||||
	}
 | 
			
		||||
	return
 | 
			
		||||
}
 | 
			
		||||
 
 | 
			
		||||
@@ -11,11 +11,10 @@ import (
 | 
			
		||||
	"bytes"
 | 
			
		||||
	"context"
 | 
			
		||||
	"io"
 | 
			
		||||
	"os"
 | 
			
		||||
	"strings"
 | 
			
		||||
 | 
			
		||||
	"code.gitea.io/gitea/modules/analyze"
 | 
			
		||||
	"code.gitea.io/gitea/modules/log"
 | 
			
		||||
	"code.gitea.io/gitea/modules/util"
 | 
			
		||||
 | 
			
		||||
	"github.com/go-enry/go-enry/v2"
 | 
			
		||||
	"github.com/go-git/go-git/v5"
 | 
			
		||||
@@ -48,35 +47,28 @@ func (repo *Repository) GetLanguageStats(commitID string) (map[string]int64, err
 | 
			
		||||
	var checker *CheckAttributeReader
 | 
			
		||||
 | 
			
		||||
	if CheckGitVersionAtLeast("1.7.8") == nil {
 | 
			
		||||
		indexFilename, deleteTemporaryFile, err := repo.ReadTreeToTemporaryIndex(commitID)
 | 
			
		||||
		indexFilename, workTree, deleteTemporaryFile, err := repo.ReadTreeToTemporaryIndex(commitID)
 | 
			
		||||
		if err == nil {
 | 
			
		||||
			defer deleteTemporaryFile()
 | 
			
		||||
			tmpWorkTree, err := os.MkdirTemp("", "empty-work-dir")
 | 
			
		||||
			if err == nil {
 | 
			
		||||
				defer func() {
 | 
			
		||||
					_ = util.RemoveAll(tmpWorkTree)
 | 
			
		||||
				}()
 | 
			
		||||
 | 
			
		||||
				checker = &CheckAttributeReader{
 | 
			
		||||
					Attributes: []string{"linguist-vendored", "linguist-generated", "linguist-language"},
 | 
			
		||||
					Repo:       repo,
 | 
			
		||||
					IndexFile:  indexFilename,
 | 
			
		||||
					WorkTree:   tmpWorkTree,
 | 
			
		||||
				}
 | 
			
		||||
				ctx, cancel := context.WithCancel(DefaultContext)
 | 
			
		||||
				if err := checker.Init(ctx); err != nil {
 | 
			
		||||
					log.Error("Unable to open checker for %s. Error: %v", commitID, err)
 | 
			
		||||
				} else {
 | 
			
		||||
					go func() {
 | 
			
		||||
						err = checker.Run()
 | 
			
		||||
						if err != nil {
 | 
			
		||||
							log.Error("Unable to open checker for %s. Error: %v", commitID, err)
 | 
			
		||||
							cancel()
 | 
			
		||||
						}
 | 
			
		||||
					}()
 | 
			
		||||
				}
 | 
			
		||||
				defer cancel()
 | 
			
		||||
			checker = &CheckAttributeReader{
 | 
			
		||||
				Attributes: []string{"linguist-vendored", "linguist-generated", "linguist-language", "gitlab-language"},
 | 
			
		||||
				Repo:       repo,
 | 
			
		||||
				IndexFile:  indexFilename,
 | 
			
		||||
				WorkTree:   workTree,
 | 
			
		||||
			}
 | 
			
		||||
			ctx, cancel := context.WithCancel(DefaultContext)
 | 
			
		||||
			if err := checker.Init(ctx); err != nil {
 | 
			
		||||
				log.Error("Unable to open checker for %s. Error: %v", commitID, err)
 | 
			
		||||
			} else {
 | 
			
		||||
				go func() {
 | 
			
		||||
					err = checker.Run()
 | 
			
		||||
					if err != nil {
 | 
			
		||||
						log.Error("Unable to open checker for %s. Error: %v", commitID, err)
 | 
			
		||||
						cancel()
 | 
			
		||||
					}
 | 
			
		||||
				}()
 | 
			
		||||
			}
 | 
			
		||||
			defer cancel()
 | 
			
		||||
		}
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
@@ -114,6 +106,21 @@ func (repo *Repository) GetLanguageStats(commitID string) (map[string]int64, err
 | 
			
		||||
					sizes[language] += f.Size
 | 
			
		||||
 | 
			
		||||
					return nil
 | 
			
		||||
				} else if language, has := attrs["gitlab-language"]; has && language != "unspecified" && language != "" {
 | 
			
		||||
					// strip off a ? if present
 | 
			
		||||
					if idx := strings.IndexByte(language, '?'); idx >= 0 {
 | 
			
		||||
						language = language[:idx]
 | 
			
		||||
					}
 | 
			
		||||
					if len(language) != 0 {
 | 
			
		||||
						// group languages, such as Pug -> HTML; SCSS -> CSS
 | 
			
		||||
						group := enry.GetLanguageGroup(language)
 | 
			
		||||
						if len(group) != 0 {
 | 
			
		||||
							language = group
 | 
			
		||||
						}
 | 
			
		||||
 | 
			
		||||
						sizes[language] += f.Size
 | 
			
		||||
						return nil
 | 
			
		||||
					}
 | 
			
		||||
				}
 | 
			
		||||
			}
 | 
			
		||||
		}
 | 
			
		||||
 
 | 
			
		||||
@@ -13,11 +13,10 @@ import (
 | 
			
		||||
	"context"
 | 
			
		||||
	"io"
 | 
			
		||||
	"math"
 | 
			
		||||
	"os"
 | 
			
		||||
	"strings"
 | 
			
		||||
 | 
			
		||||
	"code.gitea.io/gitea/modules/analyze"
 | 
			
		||||
	"code.gitea.io/gitea/modules/log"
 | 
			
		||||
	"code.gitea.io/gitea/modules/util"
 | 
			
		||||
 | 
			
		||||
	"github.com/go-enry/go-enry/v2"
 | 
			
		||||
)
 | 
			
		||||
@@ -68,35 +67,28 @@ func (repo *Repository) GetLanguageStats(commitID string) (map[string]int64, err
 | 
			
		||||
	var checker *CheckAttributeReader
 | 
			
		||||
 | 
			
		||||
	if CheckGitVersionAtLeast("1.7.8") == nil {
 | 
			
		||||
		indexFilename, deleteTemporaryFile, err := repo.ReadTreeToTemporaryIndex(commitID)
 | 
			
		||||
		indexFilename, worktree, deleteTemporaryFile, err := repo.ReadTreeToTemporaryIndex(commitID)
 | 
			
		||||
		if err == nil {
 | 
			
		||||
			defer deleteTemporaryFile()
 | 
			
		||||
			tmpWorkTree, err := os.MkdirTemp("", "empty-work-dir")
 | 
			
		||||
			if err == nil {
 | 
			
		||||
				defer func() {
 | 
			
		||||
					_ = util.RemoveAll(tmpWorkTree)
 | 
			
		||||
				}()
 | 
			
		||||
 | 
			
		||||
				checker = &CheckAttributeReader{
 | 
			
		||||
					Attributes: []string{"linguist-vendored", "linguist-generated", "linguist-language"},
 | 
			
		||||
					Repo:       repo,
 | 
			
		||||
					IndexFile:  indexFilename,
 | 
			
		||||
					WorkTree:   tmpWorkTree,
 | 
			
		||||
				}
 | 
			
		||||
				ctx, cancel := context.WithCancel(DefaultContext)
 | 
			
		||||
				if err := checker.Init(ctx); err != nil {
 | 
			
		||||
					log.Error("Unable to open checker for %s. Error: %v", commitID, err)
 | 
			
		||||
				} else {
 | 
			
		||||
					go func() {
 | 
			
		||||
						err = checker.Run()
 | 
			
		||||
						if err != nil {
 | 
			
		||||
							log.Error("Unable to open checker for %s. Error: %v", commitID, err)
 | 
			
		||||
							cancel()
 | 
			
		||||
						}
 | 
			
		||||
					}()
 | 
			
		||||
				}
 | 
			
		||||
				defer cancel()
 | 
			
		||||
			checker = &CheckAttributeReader{
 | 
			
		||||
				Attributes: []string{"linguist-vendored", "linguist-generated", "linguist-language", "gitlab-language"},
 | 
			
		||||
				Repo:       repo,
 | 
			
		||||
				IndexFile:  indexFilename,
 | 
			
		||||
				WorkTree:   worktree,
 | 
			
		||||
			}
 | 
			
		||||
			ctx, cancel := context.WithCancel(DefaultContext)
 | 
			
		||||
			if err := checker.Init(ctx); err != nil {
 | 
			
		||||
				log.Error("Unable to open checker for %s. Error: %v", commitID, err)
 | 
			
		||||
			} else {
 | 
			
		||||
				go func() {
 | 
			
		||||
					err = checker.Run()
 | 
			
		||||
					if err != nil {
 | 
			
		||||
						log.Error("Unable to open checker for %s. Error: %v", commitID, err)
 | 
			
		||||
						cancel()
 | 
			
		||||
					}
 | 
			
		||||
				}()
 | 
			
		||||
			}
 | 
			
		||||
			defer cancel()
 | 
			
		||||
		}
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
@@ -138,7 +130,23 @@ func (repo *Repository) GetLanguageStats(commitID string) (map[string]int64, err
 | 
			
		||||
 | 
			
		||||
					sizes[language] += f.Size()
 | 
			
		||||
					continue
 | 
			
		||||
				} else if language, has := attrs["gitlab-language"]; has && language != "unspecified" && language != "" {
 | 
			
		||||
					// strip off a ? if present
 | 
			
		||||
					if idx := strings.IndexByte(language, '?'); idx >= 0 {
 | 
			
		||||
						language = language[:idx]
 | 
			
		||||
					}
 | 
			
		||||
					if len(language) != 0 {
 | 
			
		||||
						// group languages, such as Pug -> HTML; SCSS -> CSS
 | 
			
		||||
						group := enry.GetLanguageGroup(language)
 | 
			
		||||
						if len(group) != 0 {
 | 
			
		||||
							language = group
 | 
			
		||||
						}
 | 
			
		||||
 | 
			
		||||
						sizes[language] += f.Size()
 | 
			
		||||
						continue
 | 
			
		||||
					}
 | 
			
		||||
				}
 | 
			
		||||
 | 
			
		||||
			}
 | 
			
		||||
		}
 | 
			
		||||
 | 
			
		||||
 
 | 
			
		||||
		Reference in New Issue
	
	Block a user