summaryrefslogtreecommitdiffstats
path: root/modules/git/repo_attribute.go
blob: 3ccc1b84a6c41b244f876570dd13e431f80ff89f (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
// Copyright 2019 The Gitea Authors. All rights reserved.
// SPDX-License-Identifier: MIT

package git

import (
	"bufio"
	"bytes"
	"context"
	"fmt"
	"io"
	"os"
	"strings"
	"sync/atomic"

	"code.gitea.io/gitea/modules/optional"
)

var LinguistAttributes = []string{"linguist-vendored", "linguist-generated", "linguist-language", "gitlab-language", "linguist-documentation", "linguist-detectable"}

// newCheckAttrStdoutReader parses the nul-byte separated output of git check-attr on each call of
// the returned function. The first reading error will stop the reading and be returned on all
// subsequent calls.
func newCheckAttrStdoutReader(r io.Reader, count int) func() (map[string]GitAttribute, error) {
	scanner := bufio.NewScanner(r)

	// adapted from bufio.ScanLines to split on nul-byte \x00
	scanner.Split(func(data []byte, atEOF bool) (advance int, token []byte, err error) {
		if atEOF && len(data) == 0 {
			return 0, nil, nil
		}
		if i := bytes.IndexByte(data, '\x00'); i >= 0 {
			// We have a full nul-terminated line.
			return i + 1, data[0:i], nil
		}
		// If we're at EOF, we have a final, non-terminated line. Return it.
		if atEOF {
			return len(data), data, nil
		}
		// Request more data.
		return 0, nil, nil
	})

	var err error
	nextText := func() string {
		if err != nil {
			return ""
		}
		if !scanner.Scan() {
			err = scanner.Err()
			if err == nil {
				err = io.ErrUnexpectedEOF
			}
			return ""
		}
		return scanner.Text()
	}
	nextAttribute := func() (string, GitAttribute, error) {
		nextText() // discard filename
		key := nextText()
		value := GitAttribute(nextText())
		return key, value, err
	}
	return func() (map[string]GitAttribute, error) {
		values := make(map[string]GitAttribute, count)
		for range count {
			k, v, err := nextAttribute()
			if err != nil {
				return values, err
			}
			values[k] = v
		}
		return values, scanner.Err()
	}
}

// GitAttribute exposes an attribute from the .gitattribute file
type GitAttribute string //nolint:revive

// IsSpecified returns true if the gitattribute is set and not empty
func (ca GitAttribute) IsSpecified() bool {
	return ca != "" && ca != "unspecified"
}

// String returns the value of the attribute or "" if unspecified
func (ca GitAttribute) String() string {
	if !ca.IsSpecified() {
		return ""
	}
	return string(ca)
}

// Prefix returns the value of the attribute before any question mark '?'
//
// sometimes used within gitlab-language: https://docs.gitlab.com/ee/user/project/highlighting.html#override-syntax-highlighting-for-a-file-type
func (ca GitAttribute) Prefix() string {
	s := ca.String()
	if i := strings.IndexByte(s, '?'); i >= 0 {
		return s[:i]
	}
	return s
}

// Bool returns true if "set"/"true", false if "unset"/"false", none otherwise
func (ca GitAttribute) Bool() optional.Option[bool] {
	switch ca {
	case "set", "true":
		return optional.Some(true)
	case "unset", "false":
		return optional.Some(false)
	}
	return optional.None[bool]()
}

// gitCheckAttrCommand prepares the "git check-attr" command for later use as one-shot or streaming
// instantiation.
func (repo *Repository) gitCheckAttrCommand(treeish string, attributes ...string) (*Command, *RunOpts, context.CancelFunc, error) {
	if len(attributes) == 0 {
		return nil, nil, nil, fmt.Errorf("no provided attributes to check-attr")
	}

	env := os.Environ()
	var removeTempFiles context.CancelFunc = func() {}

	// git < 2.40 cannot run check-attr on bare repo, but needs INDEX + WORK_TREE
	hasIndex := treeish == ""
	if !hasIndex && !SupportCheckAttrOnBare {
		indexFilename, worktree, cancel, err := repo.ReadTreeToTemporaryIndex(treeish)
		if err != nil {
			return nil, nil, nil, err
		}
		removeTempFiles = cancel

		env = append(env, "GIT_INDEX_FILE="+indexFilename, "GIT_WORK_TREE="+worktree)

		hasIndex = true

		// clear treeish to read from provided index/work_tree
		treeish = ""
	}

	cmd := NewCommand(repo.Ctx, "check-attr", "-z")

	if hasIndex {
		cmd.AddArguments("--cached")
	}

	if len(treeish) > 0 {
		cmd.AddArguments("--source")
		cmd.AddDynamicArguments(treeish)
	}
	cmd.AddDynamicArguments(attributes...)

	// Version 2.43.1 has a bug where the behavior of `GIT_FLUSH` is flipped.
	// Ref: https://lore.kernel.org/git/CABn0oJvg3M_kBW-u=j3QhKnO=6QOzk-YFTgonYw_UvFS1NTX4g@mail.gmail.com
	if InvertedGitFlushEnv {
		env = append(env, "GIT_FLUSH=0")
	} else {
		env = append(env, "GIT_FLUSH=1")
	}

	return cmd, &RunOpts{
		Env: env,
		Dir: repo.Path,
	}, removeTempFiles, nil
}

// GitAttributeFirst returns the first specified attribute of the given filename.
//
// If treeish is empty, the gitattribute will be read from the current repo (which MUST be a working directory and NOT bare).
func (repo *Repository) GitAttributeFirst(treeish, filename string, attributes ...string) (GitAttribute, error) {
	values, err := repo.GitAttributes(treeish, filename, attributes...)
	if err != nil {
		return "", err
	}
	for _, a := range attributes {
		if values[a].IsSpecified() {
			return values[a], nil
		}
	}
	return "", nil
}

// GitAttributes returns the gitattribute of the given filename.
//
// If treeish is empty, the gitattribute will be read from the current repo (which MUST be a working directory and NOT bare).
func (repo *Repository) GitAttributes(treeish, filename string, attributes ...string) (map[string]GitAttribute, error) {
	cmd, runOpts, removeTempFiles, err := repo.gitCheckAttrCommand(treeish, attributes...)
	if err != nil {
		return nil, err
	}
	defer removeTempFiles()

	stdOut := new(bytes.Buffer)
	runOpts.Stdout = stdOut

	stdErr := new(bytes.Buffer)
	runOpts.Stderr = stdErr

	cmd.AddDashesAndList(filename)

	if err := cmd.Run(runOpts); err != nil {
		return nil, fmt.Errorf("failed to run check-attr: %w\n%s\n%s", err, stdOut.String(), stdErr.String())
	}

	return newCheckAttrStdoutReader(stdOut, len(attributes))()
}

// GitAttributeChecker creates an AttributeChecker for the given repository and provided commit ID
// to retrieve the attributes of multiple files. The AttributeChecker must be closed after use.
//
// If treeish is empty, the gitattribute will be read from the current repo (which MUST be a working directory and NOT bare).
func (repo *Repository) GitAttributeChecker(treeish string, attributes ...string) (AttributeChecker, error) {
	cmd, runOpts, removeTempFiles, err := repo.gitCheckAttrCommand(treeish, attributes...)
	if err != nil {
		return AttributeChecker{}, err
	}

	cmd.AddArguments("--stdin")

	// os.Pipe is needed (and not io.Pipe), otherwise cmd.Wait will wait for the stdinReader
	// to be closed before returning (which would require another goroutine)
	// https://go.dev/issue/23019
	stdinReader, stdinWriter, err := os.Pipe() // reader closed in goroutine / writer closed on ac.Close
	if err != nil {
		return AttributeChecker{}, err
	}
	stdoutReader, stdoutWriter := io.Pipe() // closed in goroutine

	ac := AttributeChecker{
		removeTempFiles: removeTempFiles, // called on ac.Close
		stdinWriter:     stdinWriter,
		readStdout:      newCheckAttrStdoutReader(stdoutReader, len(attributes)),
		err:             &atomic.Value{},
	}

	go func() {
		defer stdinReader.Close()
		defer stdoutWriter.Close() // in case of a panic (no-op if already closed by CloseWithError at the end)

		stdErr := new(bytes.Buffer)
		runOpts.Stdin = stdinReader
		runOpts.Stdout = stdoutWriter
		runOpts.Stderr = stdErr

		err := cmd.Run(runOpts)

		// if the context was cancelled, Run error is irrelevant
		if e := cmd.parentContext.Err(); e != nil {
			err = e
		}

		if err != nil { // decorate the returned error
			err = fmt.Errorf("git check-attr (stderr: %q): %w", strings.TrimSpace(stdErr.String()), err)
			ac.err.Store(err)
		}
		stdoutWriter.CloseWithError(err)
	}()

	return ac, nil
}

type AttributeChecker struct {
	removeTempFiles context.CancelFunc
	stdinWriter     io.WriteCloser
	readStdout      func() (map[string]GitAttribute, error)
	err             *atomic.Value
}

func (ac AttributeChecker) CheckPath(path string) (map[string]GitAttribute, error) {
	if _, err := ac.stdinWriter.Write([]byte(path + "\x00")); err != nil {
		// try to return the Run error if available, since it is likely more helpful
		// than just "broken pipe"
		if aerr, _ := ac.err.Load().(error); aerr != nil {
			return nil, aerr
		}
		return nil, fmt.Errorf("git check-attr: %w", err)
	}

	return ac.readStdout()
}

func (ac AttributeChecker) Close() error {
	ac.removeTempFiles()
	return ac.stdinWriter.Close()
}