diff --git a/.github/workflows/go.yml b/.github/workflows/go.yml index 13ed2a9..c89fc7e 100644 --- a/.github/workflows/go.yml +++ b/.github/workflows/go.yml @@ -9,18 +9,18 @@ jobs: name: Verify runs-on: ubuntu-latest steps: - - name: Set up Go 1.16 - uses: actions/setup-go@v2 + - name: Set up Go 1.21 + uses: actions/setup-go@v5 with: - go-version: 1.16 + go-version: 1.21 - name: Check out code into the Go module directory - uses: actions/checkout@v2 + uses: actions/checkout@v4 - name: Lint - uses: golangci/golangci-lint-action@v2 + uses: golangci/golangci-lint-action@v7 with: - version: v1.28 + version: v2.0 - name: Test run: go test -v ./... diff --git a/.golangci.yml b/.golangci.yml index 12cdbd2..655cb5a 100644 --- a/.golangci.yml +++ b/.golangci.yml @@ -1,23 +1,51 @@ +version: "2" + run: tests: false linters: - disable-all: true + default: none enable: - - deadcode - errcheck - - gofmt - - goimports - - golint - govet - ineffassign - - typecheck + - misspell + - revive - unconvert - - varcheck - -issues: - exclude-use-default: false + - unused + settings: + errcheck: + exclude-functions: + - (*github.com/bluekeyes/go-gitdiff/gitdiff.formatter).Write + - (*github.com/bluekeyes/go-gitdiff/gitdiff.formatter).WriteString + - (*github.com/bluekeyes/go-gitdiff/gitdiff.formatter).WriteByte + - fmt.Fprintf(*github.com/bluekeyes/go-gitdiff/gitdiff.formatter) + revive: + rules: + - name: context-keys-type + - name: time-naming + - name: var-declaration + - name: unexported-return + - name: errorf + - name: blank-imports + - name: context-as-argument + - name: dot-imports + - name: error-return + - name: error-strings + - name: error-naming + - name: exported + - name: increment-decrement + - name: var-naming + - name: package-comments + - name: range + - name: receiver-naming + - name: indent-error-flow -linter-settings: - goimports: - local-prefixes: github.com/bluekeyes/go-gitdiff +formatters: + enable: + - gofmt + - goimports + settings: + goimports: + local-prefixes: + - github.com/bluekeyes/go-gitdiff diff --git a/README.md b/README.md index 8f9671b..0f65127 100644 --- a/README.md +++ b/README.md @@ -4,7 +4,7 @@ A Go library for parsing and applying patches generated by `git diff`, `git show`, and `git format-patch`. It can also parse and apply unified diffs -generated by the standard `diff` tool. +generated by the standard GNU `diff` tool. It supports standard line-oriented text patches and Git binary patches, and aims to parse anything accepted by the `git apply` command. @@ -29,23 +29,23 @@ if err != nil { // apply the changes in the patch to a source file var output bytes.Buffer -if err := gitdiff.NewApplier(code).ApplyFile(&output, files[0]); err != nil { +if err := gitdiff.Apply(&output, code, files[0]); err != nil { log.Fatal(err) } ``` ## Development Status -Mostly complete. API changes are possible, particularly for patch application, -but I expect the parsing interface and types to remain stable. +The parsing API and types are complete and I expect will remain stable. Version +0.7.0 introduced a new apply API that may change more in the future to support +non-strict patch application. -Patch parsing and strict application are well-covered by unit tests and the -library is used in a production application that parses and applies thousands of -patches every day, but the space of all possible patches is large, so there are -likely undiscovered bugs. +Parsing and strict application are well-covered by unit tests and the library +is used in a production application that parses and applies thousands of +patches every day. However, the space of all possible patches is large, so +there are likely undiscovered bugs. -The parsing code has also had a modest amount of fuzz testing. I hope to continue -this testing in the future. +The parsing code has also had a modest amount of fuzz testing. ## Why another git/unified diff parser? @@ -99,3 +99,7 @@ this testing in the future. context of each fragment must exactly match the source file; `git apply` implements a search algorithm that tries different lines and amounts of context, with further options to normalize or ignore whitespace changes. + +7. When parsing mail-formatted patch headers, leading and trailing whitespace + is always removed from `Subject` lines. There is no exact equivalent to `git + mailinfo -k`. diff --git a/gitdiff/apply.go b/gitdiff/apply.go index 05a4526..44bbcca 100644 --- a/gitdiff/apply.go +++ b/gitdiff/apply.go @@ -13,10 +13,9 @@ import ( // Users can test if an error was caused by a conflict by using errors.Is with // an empty Conflict: // -// if errors.Is(err, &Conflict{}) { -// // handle conflict -// } -// +// if errors.Is(err, &Conflict{}) { +// // handle conflict +// } type Conflict struct { msg string } @@ -89,85 +88,36 @@ func applyError(err error, args ...interface{}) error { var ( errApplyInProgress = errors.New("gitdiff: incompatible apply in progress") + errApplierClosed = errors.New("gitdiff: applier is closed") ) -const ( - applyInitial = iota - applyText - applyBinary - applyFile -) - -// Apply is a convenience function that creates an Applier for src with default -// settings and applies the changes in f, writing the result to dst. -func Apply(dst io.Writer, src io.ReaderAt, f *File) error { - return NewApplier(src).ApplyFile(dst, f) -} - -// Applier applies changes described in fragments to source data. If changes -// are described in multiple fragments, those fragments must be applied in -// order, usually by calling ApplyFile. -// -// By default, Applier operates in "strict" mode, where fragment content and -// positions must exactly match those of the source. +// Apply applies the changes in f to src, writing the result to dst. It can +// apply both text and binary changes. // -// If an error occurs while applying, methods on Applier return instances of -// *ApplyError that annotate the wrapped error with additional information -// when available. If the error is because of a conflict between a fragment and -// the source, the wrapped error will be a *Conflict. -// -// While an Applier can apply both text and binary fragments, only one fragment -// type can be used without resetting the Applier. The first fragment applied -// sets the type for the Applier. Mixing fragment types or mixing -// fragment-level and file-level applies results in an error. -type Applier struct { - src io.ReaderAt - lineSrc LineReaderAt - nextLine int64 - applyType int -} - -// NewApplier creates an Applier that reads data from src. If src is a -// LineReaderAt, it is used directly to apply text fragments. -func NewApplier(src io.ReaderAt) *Applier { - a := new(Applier) - a.Reset(src) - return a -} - -// Reset resets the input and internal state of the Applier. If src is nil, the -// existing source is reused. -func (a *Applier) Reset(src io.ReaderAt) { - if src != nil { - a.src = src - if lineSrc, ok := src.(LineReaderAt); ok { - a.lineSrc = lineSrc - } else { - a.lineSrc = &lineReaderAt{r: src} +// If an error occurs while applying, Apply returns an *ApplyError that +// annotates the error with additional information. If the error is because of +// a conflict with the source, the wrapped error will be a *Conflict. +func Apply(dst io.Writer, src io.ReaderAt, f *File) error { + if f.IsBinary { + if len(f.TextFragments) > 0 { + return applyError(errors.New("binary file contains text fragments")) + } + if f.BinaryFragment == nil { + return applyError(errors.New("binary file does not contain a binary fragment")) + } + } else { + if f.BinaryFragment != nil { + return applyError(errors.New("text file contains a binary fragment")) } - } - a.nextLine = 0 - a.applyType = applyInitial -} - -// ApplyFile applies the changes in all of the fragments of f and writes the -// result to dst. -func (a *Applier) ApplyFile(dst io.Writer, f *File) error { - if a.applyType != applyInitial { - return applyError(errApplyInProgress) - } - defer func() { a.applyType = applyFile }() - - if f.IsBinary && len(f.TextFragments) > 0 { - return applyError(errors.New("binary file contains text fragments")) - } - if !f.IsBinary && f.BinaryFragment != nil { - return applyError(errors.New("text file contains binary fragment")) } switch { case f.BinaryFragment != nil: - return a.ApplyBinaryFragment(dst, f.BinaryFragment) + applier := NewBinaryApplier(dst, src) + if err := applier.ApplyFragment(f.BinaryFragment); err != nil { + return err + } + return applier.Close() case len(f.TextFragments) > 0: frags := make([]*TextFragment, len(f.TextFragments)) @@ -181,271 +131,17 @@ func (a *Applier) ApplyFile(dst io.Writer, f *File) error { // right now, the application fails if fragments overlap, but it should be // possible to precompute the result of applying them in order + applier := NewTextApplier(dst, src) for i, frag := range frags { - if err := a.ApplyTextFragment(dst, frag); err != nil { + if err := applier.ApplyFragment(frag); err != nil { return applyError(err, fragNum(i)) } } - } - - return applyError(a.Flush(dst)) -} - -// ApplyTextFragment applies the changes in the fragment f and writes unwritten -// data before the start of the fragment and the result to dst. If multiple -// text fragments apply to the same source, ApplyTextFragment must be called in -// order of increasing start position. As a result, each fragment can be -// applied at most once before a call to Reset. -func (a *Applier) ApplyTextFragment(dst io.Writer, f *TextFragment) error { - if a.applyType != applyInitial && a.applyType != applyText { - return applyError(errApplyInProgress) - } - defer func() { a.applyType = applyText }() - - // application code assumes fragment fields are consistent - if err := f.Validate(); err != nil { - return applyError(err) - } - - // lines are 0-indexed, positions are 1-indexed (but new files have position = 0) - fragStart := f.OldPosition - 1 - if fragStart < 0 { - fragStart = 0 - } - fragEnd := fragStart + f.OldLines - - start := a.nextLine - if fragStart < start { - return applyError(&Conflict{"fragment overlaps with an applied fragment"}) - } - - if f.OldPosition == 0 { - ok, err := isLen(a.src, 0) - if err != nil { - return applyError(err) - } - if !ok { - return applyError(&Conflict{"cannot create new file from non-empty src"}) - } - } - - preimage := make([][]byte, fragEnd-start) - n, err := a.lineSrc.ReadLinesAt(preimage, start) - if err != nil { - return applyError(err, lineNum(start+int64(n))) - } - - // copy leading data before the fragment starts - for i, line := range preimage[:fragStart-start] { - if _, err := dst.Write(line); err != nil { - a.nextLine = start + int64(i) - return applyError(err, lineNum(a.nextLine)) - } - } - preimage = preimage[fragStart-start:] - - // apply the changes in the fragment - used := int64(0) - for i, line := range f.Lines { - if err := applyTextLine(dst, line, preimage, used); err != nil { - a.nextLine = fragStart + used - return applyError(err, lineNum(a.nextLine), fragLineNum(i)) - } - if line.Old() { - used++ - } - } - a.nextLine = fragStart + used - - // new position of +0,0 mean a full delete, so check for leftovers - if f.NewPosition == 0 && f.NewLines == 0 { - var b [1][]byte - n, err := a.lineSrc.ReadLinesAt(b[:], a.nextLine) - if err != nil && err != io.EOF { - return applyError(err, lineNum(a.nextLine)) - } - if n > 0 { - return applyError(&Conflict{"src still has content after full delete"}, lineNum(a.nextLine)) - } - } - - return nil -} - -func applyTextLine(dst io.Writer, line Line, preimage [][]byte, i int64) (err error) { - if line.Old() && string(preimage[i]) != line.Line { - return &Conflict{"fragment line does not match src line"} - } - if line.New() { - _, err = io.WriteString(dst, line.Line) - } - return err -} - -// Flush writes any data following the last applied fragment to dst. -func (a *Applier) Flush(dst io.Writer) (err error) { - switch a.applyType { - case applyInitial: - _, err = copyFrom(dst, a.src, 0) - case applyText: - _, err = copyLinesFrom(dst, a.lineSrc, a.nextLine) - case applyBinary: - // nothing to flush, binary apply "consumes" full source - } - return err -} - -// ApplyBinaryFragment applies the changes in the fragment f and writes the -// result to dst. At most one binary fragment can be applied before a call to -// Reset. -func (a *Applier) ApplyBinaryFragment(dst io.Writer, f *BinaryFragment) error { - if a.applyType != applyInitial { - return applyError(errApplyInProgress) - } - defer func() { a.applyType = applyBinary }() + return applier.Close() - if f == nil { - return applyError(errors.New("nil fragment")) - } - - switch f.Method { - case BinaryPatchLiteral: - if _, err := dst.Write(f.Data); err != nil { - return applyError(err) - } - case BinaryPatchDelta: - if err := applyBinaryDeltaFragment(dst, a.src, f.Data); err != nil { - return applyError(err) - } default: - return applyError(fmt.Errorf("unsupported binary patch method: %v", f.Method)) - } - return nil -} - -func applyBinaryDeltaFragment(dst io.Writer, src io.ReaderAt, frag []byte) error { - srcSize, delta := readBinaryDeltaSize(frag) - if err := checkBinarySrcSize(src, srcSize); err != nil { + // nothing to apply, just copy all the data + _, err := copyFrom(dst, src, 0) return err } - - dstSize, delta := readBinaryDeltaSize(delta) - - for len(delta) > 0 { - op := delta[0] - if op == 0 { - return errors.New("invalid delta opcode 0") - } - - var n int64 - var err error - switch op & 0x80 { - case 0x80: - n, delta, err = applyBinaryDeltaCopy(dst, op, delta[1:], src) - case 0x00: - n, delta, err = applyBinaryDeltaAdd(dst, op, delta[1:]) - } - if err != nil { - return err - } - dstSize -= n - } - - if dstSize != 0 { - return errors.New("corrupt binary delta: insufficient or extra data") - } - return nil -} - -// readBinaryDeltaSize reads a variable length size from a delta-encoded binary -// fragment, returing the size and the unused data. Data is encoded as: -// -// [[1xxxxxxx]...] [0xxxxxxx] -// -// in little-endian order, with 7 bits of the value per byte. -func readBinaryDeltaSize(d []byte) (size int64, rest []byte) { - shift := uint(0) - for i, b := range d { - size |= int64(b&0x7F) << shift - shift += 7 - if b <= 0x7F { - return size, d[i+1:] - } - } - return size, nil -} - -// applyBinaryDeltaAdd applies an add opcode in a delta-encoded binary -// fragment, returning the amount of data written and the usused part of the -// fragment. An add operation takes the form: -// -// [0xxxxxx][[data1]...] -// -// where the lower seven bits of the opcode is the number of data bytes -// following the opcode. See also pack-format.txt in the Git source. -func applyBinaryDeltaAdd(w io.Writer, op byte, delta []byte) (n int64, rest []byte, err error) { - size := int(op) - if len(delta) < size { - return 0, delta, errors.New("corrupt binary delta: incomplete add") - } - _, err = w.Write(delta[:size]) - return int64(size), delta[size:], err -} - -// applyBinaryDeltaCopy applies a copy opcode in a delta-encoded binary -// fragment, returing the amount of data written and the unused part of the -// fragment. A copy operation takes the form: -// -// [1xxxxxxx][offset1][offset2][offset3][offset4][size1][size2][size3] -// -// where the lower seven bits of the opcode determine which non-zero offset and -// size bytes are present in little-endian order: if bit 0 is set, offset1 is -// present, etc. If no offset or size bytes are present, offset is 0 and size -// is 0x10000. See also pack-format.txt in the Git source. -func applyBinaryDeltaCopy(w io.Writer, op byte, delta []byte, src io.ReaderAt) (n int64, rest []byte, err error) { - const defaultSize = 0x10000 - - unpack := func(start, bits uint) (v int64) { - for i := uint(0); i < bits; i++ { - mask := byte(1 << (i + start)) - if op&mask > 0 { - if len(delta) == 0 { - err = errors.New("corrupt binary delta: incomplete copy") - return - } - v |= int64(delta[0]) << (8 * i) - delta = delta[1:] - } - } - return - } - - offset := unpack(0, 4) - size := unpack(4, 3) - if err != nil { - return 0, delta, err - } - if size == 0 { - size = defaultSize - } - - // TODO(bkeyes): consider pooling these buffers - b := make([]byte, size) - if _, err := src.ReadAt(b, offset); err != nil { - return 0, delta, err - } - - _, err = w.Write(b) - return size, delta, err -} - -func checkBinarySrcSize(r io.ReaderAt, size int64) error { - ok, err := isLen(r, size) - if err != nil { - return err - } - if !ok { - return &Conflict{"fragment src size does not match actual src size"} - } - return nil } diff --git a/gitdiff/apply_binary.go b/gitdiff/apply_binary.go new file mode 100644 index 0000000..b34772d --- /dev/null +++ b/gitdiff/apply_binary.go @@ -0,0 +1,206 @@ +package gitdiff + +import ( + "errors" + "fmt" + "io" +) + +// BinaryApplier applies binary changes described in a fragment to source data. +// The applier must be closed after use. +type BinaryApplier struct { + dst io.Writer + src io.ReaderAt + + closed bool + dirty bool +} + +// NewBinaryApplier creates an BinaryApplier that reads data from src and +// writes modified data to dst. +func NewBinaryApplier(dst io.Writer, src io.ReaderAt) *BinaryApplier { + a := BinaryApplier{ + dst: dst, + src: src, + } + return &a +} + +// ApplyFragment applies the changes in the fragment f and writes the result to +// dst. ApplyFragment can be called at most once. +// +// If an error occurs while applying, ApplyFragment returns an *ApplyError that +// annotates the error with additional information. If the error is because of +// a conflict between a fragment and the source, the wrapped error will be a +// *Conflict. +func (a *BinaryApplier) ApplyFragment(f *BinaryFragment) error { + if f == nil { + return applyError(errors.New("nil fragment")) + } + if a.closed { + return applyError(errApplierClosed) + } + if a.dirty { + return applyError(errApplyInProgress) + } + + // mark an apply as in progress, even if it fails before making changes + a.dirty = true + + switch f.Method { + case BinaryPatchLiteral: + if _, err := a.dst.Write(f.Data); err != nil { + return applyError(err) + } + case BinaryPatchDelta: + if err := applyBinaryDeltaFragment(a.dst, a.src, f.Data); err != nil { + return applyError(err) + } + default: + return applyError(fmt.Errorf("unsupported binary patch method: %v", f.Method)) + } + return nil +} + +// Close writes any data following the last applied fragment and prevents +// future calls to ApplyFragment. +func (a *BinaryApplier) Close() (err error) { + if a.closed { + return nil + } + + a.closed = true + if !a.dirty { + _, err = copyFrom(a.dst, a.src, 0) + } else { + // do nothing, applying a binary fragment copies all data + } + return err +} + +func applyBinaryDeltaFragment(dst io.Writer, src io.ReaderAt, frag []byte) error { + srcSize, delta := readBinaryDeltaSize(frag) + if err := checkBinarySrcSize(src, srcSize); err != nil { + return err + } + + dstSize, delta := readBinaryDeltaSize(delta) + + for len(delta) > 0 { + op := delta[0] + if op == 0 { + return errors.New("invalid delta opcode 0") + } + + var n int64 + var err error + switch op & 0x80 { + case 0x80: + n, delta, err = applyBinaryDeltaCopy(dst, op, delta[1:], src) + case 0x00: + n, delta, err = applyBinaryDeltaAdd(dst, op, delta[1:]) + } + if err != nil { + return err + } + dstSize -= n + } + + if dstSize != 0 { + return errors.New("corrupt binary delta: insufficient or extra data") + } + return nil +} + +// readBinaryDeltaSize reads a variable length size from a delta-encoded binary +// fragment, returing the size and the unused data. Data is encoded as: +// +// [[1xxxxxxx]...] [0xxxxxxx] +// +// in little-endian order, with 7 bits of the value per byte. +func readBinaryDeltaSize(d []byte) (size int64, rest []byte) { + shift := uint(0) + for i, b := range d { + size |= int64(b&0x7F) << shift + shift += 7 + if b <= 0x7F { + return size, d[i+1:] + } + } + return size, nil +} + +// applyBinaryDeltaAdd applies an add opcode in a delta-encoded binary +// fragment, returning the amount of data written and the usused part of the +// fragment. An add operation takes the form: +// +// [0xxxxxx][[data1]...] +// +// where the lower seven bits of the opcode is the number of data bytes +// following the opcode. See also pack-format.txt in the Git source. +func applyBinaryDeltaAdd(w io.Writer, op byte, delta []byte) (n int64, rest []byte, err error) { + size := int(op) + if len(delta) < size { + return 0, delta, errors.New("corrupt binary delta: incomplete add") + } + _, err = w.Write(delta[:size]) + return int64(size), delta[size:], err +} + +// applyBinaryDeltaCopy applies a copy opcode in a delta-encoded binary +// fragment, returing the amount of data written and the unused part of the +// fragment. A copy operation takes the form: +// +// [1xxxxxxx][offset1][offset2][offset3][offset4][size1][size2][size3] +// +// where the lower seven bits of the opcode determine which non-zero offset and +// size bytes are present in little-endian order: if bit 0 is set, offset1 is +// present, etc. If no offset or size bytes are present, offset is 0 and size +// is 0x10000. See also pack-format.txt in the Git source. +func applyBinaryDeltaCopy(w io.Writer, op byte, delta []byte, src io.ReaderAt) (n int64, rest []byte, err error) { + const defaultSize = 0x10000 + + unpack := func(start, bits uint) (v int64) { + for i := uint(0); i < bits; i++ { + mask := byte(1 << (i + start)) + if op&mask > 0 { + if len(delta) == 0 { + err = errors.New("corrupt binary delta: incomplete copy") + return + } + v |= int64(delta[0]) << (8 * i) + delta = delta[1:] + } + } + return + } + + offset := unpack(0, 4) + size := unpack(4, 3) + if err != nil { + return 0, delta, err + } + if size == 0 { + size = defaultSize + } + + // TODO(bkeyes): consider pooling these buffers + b := make([]byte, size) + if _, err := src.ReadAt(b, offset); err != nil { + return 0, delta, err + } + + _, err = w.Write(b) + return size, delta, err +} + +func checkBinarySrcSize(r io.ReaderAt, size int64) error { + ok, err := isLen(r, size) + if err != nil { + return err + } + if !ok { + return &Conflict{"fragment src size does not match actual src size"} + } + return nil +} diff --git a/gitdiff/apply_test.go b/gitdiff/apply_test.go index d981e96..05915ba 100644 --- a/gitdiff/apply_test.go +++ b/gitdiff/apply_test.go @@ -9,69 +9,6 @@ import ( "testing" ) -func TestApplierInvariants(t *testing.T) { - binary := &BinaryFragment{ - Method: BinaryPatchLiteral, - Size: 2, - Data: []byte("\xbe\xef"), - } - - text := &TextFragment{ - NewPosition: 1, - NewLines: 1, - LinesAdded: 1, - Lines: []Line{ - {Op: OpAdd, Line: "new line\n"}, - }, - } - - file := &File{ - TextFragments: []*TextFragment{text}, - } - - src := bytes.NewReader(nil) - dst := ioutil.Discard - - assertInProgress := func(t *testing.T, kind string, err error) { - if !errors.Is(err, errApplyInProgress) { - t.Fatalf("expected in-progress error for %s apply, but got: %v", kind, err) - } - } - - t.Run("binaryFirst", func(t *testing.T) { - a := NewApplier(src) - if err := a.ApplyBinaryFragment(dst, binary); err != nil { - t.Fatalf("unexpected error applying fragment: %v", err) - } - assertInProgress(t, "text", a.ApplyTextFragment(dst, text)) - assertInProgress(t, "binary", a.ApplyBinaryFragment(dst, binary)) - assertInProgress(t, "file", a.ApplyFile(dst, file)) - }) - - t.Run("textFirst", func(t *testing.T) { - a := NewApplier(src) - if err := a.ApplyTextFragment(dst, text); err != nil { - t.Fatalf("unexpected error applying fragment: %v", err) - } - // additional text fragments are allowed - if err := a.ApplyTextFragment(dst, text); err != nil { - t.Fatalf("unexpected error applying second fragment: %v", err) - } - assertInProgress(t, "binary", a.ApplyBinaryFragment(dst, binary)) - assertInProgress(t, "file", a.ApplyFile(dst, file)) - }) - - t.Run("fileFirst", func(t *testing.T) { - a := NewApplier(src) - if err := a.ApplyFile(dst, file); err != nil { - t.Fatalf("unexpected error applying file: %v", err) - } - assertInProgress(t, "text", a.ApplyTextFragment(dst, text)) - assertInProgress(t, "binary", a.ApplyBinaryFragment(dst, binary)) - assertInProgress(t, "file", a.ApplyFile(dst, file)) - }) -} - func TestApplyTextFragment(t *testing.T) { tests := map[string]applyTest{ "createFile": {Files: getApplyFiles("text_fragment_new")}, @@ -85,6 +22,7 @@ func TestApplyTextFragment(t *testing.T) { "changeStart": {Files: getApplyFiles("text_fragment_change_start")}, "changeMiddle": {Files: getApplyFiles("text_fragment_change_middle")}, "changeEnd": {Files: getApplyFiles("text_fragment_change_end")}, + "changeEndEOL": {Files: getApplyFiles("text_fragment_change_end_eol")}, "changeExact": {Files: getApplyFiles("text_fragment_change_exact")}, "changeSingleNoEOL": {Files: getApplyFiles("text_fragment_change_single_noeol")}, @@ -93,14 +31,14 @@ func TestApplyTextFragment(t *testing.T) { Src: "text_fragment_error.src", Patch: "text_fragment_error_short_src_before.patch", }, - Err: io.ErrUnexpectedEOF, + Err: &Conflict{}, }, "errorShortSrc": { Files: applyFiles{ Src: "text_fragment_error.src", Patch: "text_fragment_error_short_src.patch", }, - Err: io.ErrUnexpectedEOF, + Err: &Conflict{}, }, "errorContextConflict": { Files: applyFiles{ @@ -127,11 +65,12 @@ func TestApplyTextFragment(t *testing.T) { for name, test := range tests { t.Run(name, func(t *testing.T) { - test.run(t, func(w io.Writer, applier *Applier, file *File) error { + test.run(t, func(dst io.Writer, src io.ReaderAt, file *File) error { if len(file.TextFragments) != 1 { t.Fatalf("patch should contain exactly one fragment, but it has %d", len(file.TextFragments)) } - return applier.ApplyTextFragment(w, file.TextFragments[0]) + applier := NewTextApplier(dst, src) + return applier.ApplyFragment(file.TextFragments[0]) }) }) } @@ -176,8 +115,9 @@ func TestApplyBinaryFragment(t *testing.T) { for name, test := range tests { t.Run(name, func(t *testing.T) { - test.run(t, func(w io.Writer, applier *Applier, file *File) error { - return applier.ApplyBinaryFragment(w, file.BinaryFragment) + test.run(t, func(dst io.Writer, src io.ReaderAt, file *File) error { + applier := NewBinaryApplier(dst, src) + return applier.ApplyFragment(file.BinaryFragment) }) }) } @@ -216,8 +156,8 @@ func TestApplyFile(t *testing.T) { for name, test := range tests { t.Run(name, func(t *testing.T) { - test.run(t, func(w io.Writer, applier *Applier, file *File) error { - return applier.ApplyFile(w, file) + test.run(t, func(dst io.Writer, src io.ReaderAt, file *File) error { + return Apply(dst, src, file) }) }) } @@ -228,7 +168,7 @@ type applyTest struct { Err interface{} } -func (at applyTest) run(t *testing.T, apply func(io.Writer, *Applier, *File) error) { +func (at applyTest) run(t *testing.T, apply func(io.Writer, io.ReaderAt, *File) error) { src, patch, out := at.Files.Load(t) files, _, err := Parse(bytes.NewReader(patch)) @@ -239,10 +179,8 @@ func (at applyTest) run(t *testing.T, apply func(io.Writer, *Applier, *File) err t.Fatalf("patch should contain exactly one file, but it has %d", len(files)) } - applier := NewApplier(bytes.NewReader(src)) - var dst bytes.Buffer - err = apply(&dst, applier, files[0]) + err = apply(&dst, bytes.NewReader(src), files[0]) if at.Err != nil { assertError(t, at.Err, err, "applying fragment") return diff --git a/gitdiff/apply_text.go b/gitdiff/apply_text.go new file mode 100644 index 0000000..8d0accb --- /dev/null +++ b/gitdiff/apply_text.go @@ -0,0 +1,158 @@ +package gitdiff + +import ( + "errors" + "io" +) + +// TextApplier applies changes described in text fragments to source data. If +// changes are described in multiple fragments, those fragments must be applied +// in order. The applier must be closed after use. +// +// By default, TextApplier operates in "strict" mode, where fragment content +// and positions must exactly match those of the source. +type TextApplier struct { + dst io.Writer + src io.ReaderAt + lineSrc LineReaderAt + nextLine int64 + + closed bool + dirty bool +} + +// NewTextApplier creates a TextApplier that reads data from src and writes +// modified data to dst. If src implements LineReaderAt, it is used directly. +func NewTextApplier(dst io.Writer, src io.ReaderAt) *TextApplier { + a := TextApplier{ + dst: dst, + src: src, + } + + if lineSrc, ok := src.(LineReaderAt); ok { + a.lineSrc = lineSrc + } else { + a.lineSrc = &lineReaderAt{r: src} + } + + return &a +} + +// ApplyFragment applies the changes in the fragment f, writing unwritten data +// before the start of the fragment and any changes from the fragment. If +// multiple text fragments apply to the same content, ApplyFragment must be +// called in order of increasing start position. As a result, each fragment can +// be applied at most once. +// +// If an error occurs while applying, ApplyFragment returns an *ApplyError that +// annotates the error with additional information. If the error is because of +// a conflict between the fragment and the source, the wrapped error will be a +// *Conflict. +func (a *TextApplier) ApplyFragment(f *TextFragment) error { + if a.closed { + return applyError(errApplierClosed) + } + + // mark an apply as in progress, even if it fails before making changes + a.dirty = true + + // application code assumes fragment fields are consistent + if err := f.Validate(); err != nil { + return applyError(err) + } + + // lines are 0-indexed, positions are 1-indexed (but new files have position = 0) + fragStart := f.OldPosition - 1 + if fragStart < 0 { + fragStart = 0 + } + fragEnd := fragStart + f.OldLines + + start := a.nextLine + if fragStart < start { + return applyError(&Conflict{"fragment overlaps with an applied fragment"}) + } + + if f.OldPosition == 0 { + ok, err := isLen(a.src, 0) + if err != nil { + return applyError(err) + } + if !ok { + return applyError(&Conflict{"cannot create new file from non-empty src"}) + } + } + + preimage := make([][]byte, fragEnd-start) + n, err := a.lineSrc.ReadLinesAt(preimage, start) + if err != nil { + // an EOF indicates that source file is shorter than the patch expects, + // which should be reported as a conflict rather than a generic error + if errors.Is(err, io.EOF) { + err = &Conflict{"src has fewer lines than required by fragment"} + } + return applyError(err, lineNum(start+int64(n))) + } + + // copy leading data before the fragment starts + for i, line := range preimage[:fragStart-start] { + if _, err := a.dst.Write(line); err != nil { + a.nextLine = start + int64(i) + return applyError(err, lineNum(a.nextLine)) + } + } + preimage = preimage[fragStart-start:] + + // apply the changes in the fragment + used := int64(0) + for i, line := range f.Lines { + if err := applyTextLine(a.dst, line, preimage, used); err != nil { + a.nextLine = fragStart + used + return applyError(err, lineNum(a.nextLine), fragLineNum(i)) + } + if line.Old() { + used++ + } + } + a.nextLine = fragStart + used + + // new position of +0,0 mean a full delete, so check for leftovers + if f.NewPosition == 0 && f.NewLines == 0 { + var b [1][]byte + n, err := a.lineSrc.ReadLinesAt(b[:], a.nextLine) + if err != nil && err != io.EOF { + return applyError(err, lineNum(a.nextLine)) + } + if n > 0 { + return applyError(&Conflict{"src still has content after full delete"}, lineNum(a.nextLine)) + } + } + + return nil +} + +func applyTextLine(dst io.Writer, line Line, preimage [][]byte, i int64) (err error) { + if line.Old() && string(preimage[i]) != line.Line { + return &Conflict{"fragment line does not match src line"} + } + if line.New() { + _, err = io.WriteString(dst, line.Line) + } + return err +} + +// Close writes any data following the last applied fragment and prevents +// future calls to ApplyFragment. +func (a *TextApplier) Close() (err error) { + if a.closed { + return nil + } + + a.closed = true + if !a.dirty { + _, err = copyFrom(a.dst, a.src, 0) + } else { + _, err = copyLinesFrom(a.dst, a.lineSrc, a.nextLine) + } + return err +} diff --git a/gitdiff/base85.go b/gitdiff/base85.go index 385aa64..86db117 100644 --- a/gitdiff/base85.go +++ b/gitdiff/base85.go @@ -19,8 +19,8 @@ func init() { } // base85Decode decodes Base85-encoded data from src into dst. It uses the -// alphabet defined by base85.c in the Git source tree, which appears to be -// unique. src must contain at least len(dst) bytes of encoded data. +// alphabet defined by base85.c in the Git source tree. src must contain at +// least len(dst) bytes of encoded data. func base85Decode(dst, src []byte) error { var v uint32 var n, ndst int @@ -50,3 +50,42 @@ func base85Decode(dst, src []byte) error { } return nil } + +// base85Encode encodes src in Base85, writing the result to dst. It uses the +// alphabet defined by base85.c in the Git source tree. +func base85Encode(dst, src []byte) { + var di, si int + + encode := func(v uint32) { + dst[di+0] = b85Alpha[(v/(85*85*85*85))%85] + dst[di+1] = b85Alpha[(v/(85*85*85))%85] + dst[di+2] = b85Alpha[(v/(85*85))%85] + dst[di+3] = b85Alpha[(v/85)%85] + dst[di+4] = b85Alpha[v%85] + } + + n := (len(src) / 4) * 4 + for si < n { + encode(uint32(src[si+0])<<24 | uint32(src[si+1])<<16 | uint32(src[si+2])<<8 | uint32(src[si+3])) + si += 4 + di += 5 + } + + var v uint32 + switch len(src) - si { + case 3: + v |= uint32(src[si+2]) << 8 + fallthrough + case 2: + v |= uint32(src[si+1]) << 16 + fallthrough + case 1: + v |= uint32(src[si+0]) << 24 + encode(v) + } +} + +// base85Len returns the length of n bytes of Base85 encoded data. +func base85Len(n int) int { + return (n + 3) / 4 * 5 +} diff --git a/gitdiff/base85_test.go b/gitdiff/base85_test.go index 068da63..672c471 100644 --- a/gitdiff/base85_test.go +++ b/gitdiff/base85_test.go @@ -1,6 +1,7 @@ package gitdiff import ( + "bytes" "testing" ) @@ -58,3 +59,60 @@ func TestBase85Decode(t *testing.T) { }) } } + +func TestBase85Encode(t *testing.T) { + tests := map[string]struct { + Input []byte + Output string + }{ + "zeroBytes": { + Input: []byte{}, + Output: "", + }, + "twoBytes": { + Input: []byte{0xCA, 0xFE}, + Output: "%KiWV", + }, + "fourBytes": { + Input: []byte{0x0, 0x0, 0xCA, 0xFE}, + Output: "007GV", + }, + "sixBytes": { + Input: []byte{0x0, 0x0, 0xCA, 0xFE, 0xCA, 0xFE}, + Output: "007GV%KiWV", + }, + } + + for name, test := range tests { + t.Run(name, func(t *testing.T) { + dst := make([]byte, len(test.Output)) + base85Encode(dst, test.Input) + for i, b := range test.Output { + if dst[i] != byte(b) { + t.Errorf("incorrect character at index %d: expected '%c', actual '%c'", i, b, dst[i]) + } + } + }) + } +} + +func FuzzBase85Roundtrip(f *testing.F) { + f.Add([]byte{0x2b, 0x0d}) + f.Add([]byte{0xbc, 0xb4, 0x3f}) + f.Add([]byte{0xfa, 0x62, 0x05, 0x83, 0x24, 0x39, 0xd5, 0x25}) + f.Add([]byte{0x31, 0x59, 0x02, 0xa0, 0x61, 0x12, 0xd9, 0x43, 0xb8, 0x23, 0x1a, 0xb4, 0x02, 0xae, 0xfa, 0xcc, 0x22, 0xad, 0x41, 0xb9, 0xb8}) + + f.Fuzz(func(t *testing.T, in []byte) { + n := len(in) + dst := make([]byte, base85Len(n)) + out := make([]byte, n) + + base85Encode(dst, in) + if err := base85Decode(out, dst); err != nil { + t.Fatalf("unexpected error decoding base85 data: %v", err) + } + if !bytes.Equal(in, out) { + t.Errorf("decoded data differed from input data:\n input: %x\n output: %x\nencoding: %s\n", in, out, string(dst)) + } + }) +} diff --git a/gitdiff/binary.go b/gitdiff/binary.go index c65a9a6..282e323 100644 --- a/gitdiff/binary.go +++ b/gitdiff/binary.go @@ -50,11 +50,11 @@ func (p *parser) ParseBinaryFragments(f *File) (n int, err error) { } func (p *parser) ParseBinaryMarker() (isBinary bool, hasData bool, err error) { - switch p.Line(0) { - case "GIT binary patch\n": + line := p.Line(0) + switch { + case line == "GIT binary patch\n": hasData = true - case "Binary files differ\n": - case "Files differ\n": + case isBinaryNoDataMarker(line): default: return false, false, nil } @@ -65,6 +65,13 @@ func (p *parser) ParseBinaryMarker() (isBinary bool, hasData bool, err error) { return true, hasData, nil } +func isBinaryNoDataMarker(line string) bool { + if strings.HasSuffix(line, " differ\n") { + return strings.HasPrefix(line, "Binary files ") || strings.HasPrefix(line, "Files ") + } + return false +} + func (p *parser) ParseBinaryFragmentHeader() (*BinaryFragment, error) { parts := strings.SplitN(strings.TrimSuffix(p.Line(0), "\n"), " ", 2) if len(parts) < 2 { diff --git a/gitdiff/binary_test.go b/gitdiff/binary_test.go index a31a0e0..64db243 100644 --- a/gitdiff/binary_test.go +++ b/gitdiff/binary_test.go @@ -25,6 +25,16 @@ func TestParseBinaryMarker(t *testing.T) { IsBinary: true, HasData: false, }, + "binaryFileNoPatchPaths": { + Input: "Binary files a/foo.bin and b/foo.bin differ\n", + IsBinary: true, + HasData: false, + }, + "fileNoPatch": { + Input: "Files differ\n", + IsBinary: true, + HasData: false, + }, "textFile": { Input: "@@ -10,14 +22,31 @@\n", IsBinary: false, diff --git a/gitdiff/file_header.go b/gitdiff/file_header.go index 58904b4..7ae4bc9 100644 --- a/gitdiff/file_header.go +++ b/gitdiff/file_header.go @@ -57,7 +57,7 @@ func (p *parser) ParseNextFileHeader() (*File, string, error) { return nil, "", err } } - return nil, "", nil + return nil, preamble.String(), nil } func (p *parser) ParseGitFileHeader() (*File, error) { @@ -324,12 +324,12 @@ func parseGitHeaderNewName(f *File, line, defaultName string) error { } func parseGitHeaderOldMode(f *File, line, defaultName string) (err error) { - f.OldMode, err = parseMode(line) + f.OldMode, err = parseMode(strings.TrimSpace(line)) return } func parseGitHeaderNewMode(f *File, line, defaultName string) (err error) { - f.NewMode, err = parseMode(line) + f.NewMode, err = parseMode(strings.TrimSpace(line)) return } diff --git a/gitdiff/file_header_test.go b/gitdiff/file_header_test.go index 102795c..ef29833 100644 --- a/gitdiff/file_header_test.go +++ b/gitdiff/file_header_test.go @@ -486,6 +486,12 @@ func TestParseGitHeaderData(t *testing.T) { OldMode: os.FileMode(0100644), }, }, + "oldModeWithTrailingSpace": { + Line: "old mode 100644\r\n", + OutputFile: &File{ + OldMode: os.FileMode(0100644), + }, + }, "invalidOldMode": { Line: "old mode rw\n", Err: true, @@ -496,6 +502,12 @@ func TestParseGitHeaderData(t *testing.T) { NewMode: os.FileMode(0100755), }, }, + "newModeWithTrailingSpace": { + Line: "new mode 100755\r\n", + OutputFile: &File{ + NewMode: os.FileMode(0100755), + }, + }, "invalidNewMode": { Line: "new mode rwx\n", Err: true, @@ -518,6 +530,15 @@ func TestParseGitHeaderData(t *testing.T) { IsNew: true, }, }, + "newFileModeWithTrailingSpace": { + Line: "new file mode 100755\r\n", + DefaultName: "dir/file.txt", + OutputFile: &File{ + NewName: "dir/file.txt", + NewMode: os.FileMode(0100755), + IsNew: true, + }, + }, "copyFrom": { Line: "copy from dir/file.txt\n", OutputFile: &File{ diff --git a/gitdiff/format.go b/gitdiff/format.go new file mode 100644 index 0000000..d97aba9 --- /dev/null +++ b/gitdiff/format.go @@ -0,0 +1,281 @@ +package gitdiff + +import ( + "bytes" + "compress/zlib" + "fmt" + "io" + "strconv" +) + +type formatter struct { + w io.Writer + err error +} + +func newFormatter(w io.Writer) *formatter { + return &formatter{w: w} +} + +func (fm *formatter) Write(p []byte) (int, error) { + if fm.err != nil { + return len(p), nil + } + if _, err := fm.w.Write(p); err != nil { + fm.err = err + } + return len(p), nil +} + +func (fm *formatter) WriteString(s string) (int, error) { + fm.Write([]byte(s)) + return len(s), nil +} + +func (fm *formatter) WriteByte(c byte) error { + fm.Write([]byte{c}) + return nil +} + +func (fm *formatter) WriteQuotedName(s string) { + qpos := 0 + for i := 0; i < len(s); i++ { + ch := s[i] + if q, quoted := quoteByte(ch); quoted { + if qpos == 0 { + fm.WriteByte('"') + } + fm.WriteString(s[qpos:i]) + fm.Write(q) + qpos = i + 1 + } + } + fm.WriteString(s[qpos:]) + if qpos > 0 { + fm.WriteByte('"') + } +} + +var quoteEscapeTable = map[byte]byte{ + '\a': 'a', + '\b': 'b', + '\t': 't', + '\n': 'n', + '\v': 'v', + '\f': 'f', + '\r': 'r', + '"': '"', + '\\': '\\', +} + +func quoteByte(b byte) ([]byte, bool) { + if q, ok := quoteEscapeTable[b]; ok { + return []byte{'\\', q}, true + } + if b < 0x20 || b >= 0x7F { + return []byte{ + '\\', + '0' + (b>>6)&0o3, + '0' + (b>>3)&0o7, + '0' + (b>>0)&0o7, + }, true + } + return nil, false +} + +func (fm *formatter) FormatFile(f *File) { + fm.WriteString("diff --git ") + + var aName, bName string + switch { + case f.OldName == "": + aName = f.NewName + bName = f.NewName + + case f.NewName == "": + aName = f.OldName + bName = f.OldName + + default: + aName = f.OldName + bName = f.NewName + } + + fm.WriteQuotedName("a/" + aName) + fm.WriteByte(' ') + fm.WriteQuotedName("b/" + bName) + fm.WriteByte('\n') + + if f.OldMode != 0 { + if f.IsDelete { + fmt.Fprintf(fm, "deleted file mode %o\n", f.OldMode) + } else if f.NewMode != 0 { + fmt.Fprintf(fm, "old mode %o\n", f.OldMode) + } + } + + if f.NewMode != 0 { + if f.IsNew { + fmt.Fprintf(fm, "new file mode %o\n", f.NewMode) + } else if f.OldMode != 0 { + fmt.Fprintf(fm, "new mode %o\n", f.NewMode) + } + } + + if f.Score > 0 { + if f.IsCopy || f.IsRename { + fmt.Fprintf(fm, "similarity index %d%%\n", f.Score) + } else { + fmt.Fprintf(fm, "dissimilarity index %d%%\n", f.Score) + } + } + + if f.IsCopy { + if f.OldName != "" { + fm.WriteString("copy from ") + fm.WriteQuotedName(f.OldName) + fm.WriteByte('\n') + } + if f.NewName != "" { + fm.WriteString("copy to ") + fm.WriteQuotedName(f.NewName) + fm.WriteByte('\n') + } + } + + if f.IsRename { + if f.OldName != "" { + fm.WriteString("rename from ") + fm.WriteQuotedName(f.OldName) + fm.WriteByte('\n') + } + if f.NewName != "" { + fm.WriteString("rename to ") + fm.WriteQuotedName(f.NewName) + fm.WriteByte('\n') + } + } + + if f.OldOIDPrefix != "" && f.NewOIDPrefix != "" { + fmt.Fprintf(fm, "index %s..%s", f.OldOIDPrefix, f.NewOIDPrefix) + + // Mode is only included on the index line when it is not changing + if f.OldMode != 0 && ((f.NewMode == 0 && !f.IsDelete) || f.OldMode == f.NewMode) { + fmt.Fprintf(fm, " %o", f.OldMode) + } + + fm.WriteByte('\n') + } + + if f.IsBinary { + if f.BinaryFragment == nil { + fm.WriteString("Binary files ") + fm.WriteQuotedName("a/" + aName) + fm.WriteString(" and ") + fm.WriteQuotedName("b/" + bName) + fm.WriteString(" differ\n") + } else { + fm.WriteString("GIT binary patch\n") + fm.FormatBinaryFragment(f.BinaryFragment) + if f.ReverseBinaryFragment != nil { + fm.FormatBinaryFragment(f.ReverseBinaryFragment) + } + } + } + + // The "---" and "+++" lines only appear for text patches with fragments + if len(f.TextFragments) > 0 { + fm.WriteString("--- ") + if f.OldName == "" { + fm.WriteString("/dev/null") + } else { + fm.WriteQuotedName("a/" + f.OldName) + } + fm.WriteByte('\n') + + fm.WriteString("+++ ") + if f.NewName == "" { + fm.WriteString("/dev/null") + } else { + fm.WriteQuotedName("b/" + f.NewName) + } + fm.WriteByte('\n') + + for _, frag := range f.TextFragments { + fm.FormatTextFragment(frag) + } + } +} + +func (fm *formatter) FormatTextFragment(f *TextFragment) { + fm.FormatTextFragmentHeader(f) + fm.WriteByte('\n') + + for _, line := range f.Lines { + fm.WriteString(line.Op.String()) + fm.WriteString(line.Line) + if line.NoEOL() { + fm.WriteString("\n\\ No newline at end of file\n") + } + } +} + +func (fm *formatter) FormatTextFragmentHeader(f *TextFragment) { + fmt.Fprintf(fm, "@@ -%d,%d +%d,%d @@", f.OldPosition, f.OldLines, f.NewPosition, f.NewLines) + if f.Comment != "" { + fm.WriteByte(' ') + fm.WriteString(f.Comment) + } +} + +func (fm *formatter) FormatBinaryFragment(f *BinaryFragment) { + const ( + maxBytesPerLine = 52 + ) + + switch f.Method { + case BinaryPatchDelta: + fm.WriteString("delta ") + case BinaryPatchLiteral: + fm.WriteString("literal ") + } + fm.Write(strconv.AppendInt(nil, f.Size, 10)) + fm.WriteByte('\n') + + data := deflateBinaryChunk(f.Data) + n := (len(data) / maxBytesPerLine) * maxBytesPerLine + + buf := make([]byte, base85Len(maxBytesPerLine)) + for i := 0; i < n; i += maxBytesPerLine { + base85Encode(buf, data[i:i+maxBytesPerLine]) + fm.WriteByte('z') + fm.Write(buf) + fm.WriteByte('\n') + } + if remainder := len(data) - n; remainder > 0 { + buf = buf[0:base85Len(remainder)] + + sizeChar := byte(remainder) + if remainder <= 26 { + sizeChar = 'A' + sizeChar - 1 + } else { + sizeChar = 'a' + sizeChar - 27 + } + + base85Encode(buf, data[n:]) + fm.WriteByte(sizeChar) + fm.Write(buf) + fm.WriteByte('\n') + } + fm.WriteByte('\n') +} + +func deflateBinaryChunk(data []byte) []byte { + var b bytes.Buffer + + zw := zlib.NewWriter(&b) + _, _ = zw.Write(data) + _ = zw.Close() + + return b.Bytes() +} diff --git a/gitdiff/format_roundtrip_test.go b/gitdiff/format_roundtrip_test.go new file mode 100644 index 0000000..a230e91 --- /dev/null +++ b/gitdiff/format_roundtrip_test.go @@ -0,0 +1,157 @@ +package gitdiff + +import ( + "bytes" + "fmt" + "os" + "path/filepath" + "slices" + "testing" +) + +func TestFormatRoundtrip(t *testing.T) { + patches := []struct { + File string + SkipTextCompare bool + }{ + {File: "copy.patch"}, + {File: "copy_modify.patch"}, + {File: "delete.patch"}, + {File: "mode.patch"}, + {File: "mode_modify.patch"}, + {File: "modify.patch"}, + {File: "new.patch"}, + {File: "new_empty.patch"}, + {File: "new_mode.patch"}, + {File: "rename.patch"}, + {File: "rename_modify.patch"}, + + // Due to differences between Go's 'encoding/zlib' package and the zlib + // C library, binary patches cannot be compared directly as the patch + // data is slightly different when re-encoded by Go. + {File: "binary_modify.patch", SkipTextCompare: true}, + {File: "binary_new.patch", SkipTextCompare: true}, + {File: "binary_modify_nodata.patch"}, + } + + for _, patch := range patches { + t.Run(patch.File, func(t *testing.T) { + b, err := os.ReadFile(filepath.Join("testdata", "string", patch.File)) + if err != nil { + t.Fatalf("failed to read patch: %v", err) + } + + original := assertParseSingleFile(t, b, "patch") + str := original.String() + + if !patch.SkipTextCompare { + if string(b) != str { + t.Errorf("incorrect patch text\nexpected: %q\n actual: %q\n", string(b), str) + } + } + + reparsed := assertParseSingleFile(t, []byte(str), "formatted patch") + assertFilesEqual(t, original, reparsed) + }) + } +} + +func assertParseSingleFile(t *testing.T, b []byte, kind string) *File { + files, _, err := Parse(bytes.NewReader(b)) + if err != nil { + t.Fatalf("failed to parse %s: %v", kind, err) + } + if len(files) != 1 { + t.Fatalf("expected %s to contain a single files, but found %d", kind, len(files)) + } + return files[0] +} + +func assertFilesEqual(t *testing.T, expected, actual *File) { + assertEqual(t, expected.OldName, actual.OldName, "OldName") + assertEqual(t, expected.NewName, actual.NewName, "NewName") + + assertEqual(t, expected.IsNew, actual.IsNew, "IsNew") + assertEqual(t, expected.IsDelete, actual.IsDelete, "IsDelete") + assertEqual(t, expected.IsCopy, actual.IsCopy, "IsCopy") + assertEqual(t, expected.IsRename, actual.IsRename, "IsRename") + + assertEqual(t, expected.OldMode, actual.OldMode, "OldMode") + assertEqual(t, expected.NewMode, actual.NewMode, "NewMode") + + assertEqual(t, expected.OldOIDPrefix, actual.OldOIDPrefix, "OldOIDPrefix") + assertEqual(t, expected.NewOIDPrefix, actual.NewOIDPrefix, "NewOIDPrefix") + assertEqual(t, expected.Score, actual.Score, "Score") + + if len(expected.TextFragments) == len(actual.TextFragments) { + for i := range expected.TextFragments { + prefix := fmt.Sprintf("TextFragments[%d].", i) + ef := expected.TextFragments[i] + af := actual.TextFragments[i] + + assertEqual(t, ef.Comment, af.Comment, prefix+"Comment") + + assertEqual(t, ef.OldPosition, af.OldPosition, prefix+"OldPosition") + assertEqual(t, ef.OldLines, af.OldLines, prefix+"OldLines") + + assertEqual(t, ef.NewPosition, af.NewPosition, prefix+"NewPosition") + assertEqual(t, ef.NewLines, af.NewLines, prefix+"NewLines") + + assertEqual(t, ef.LinesAdded, af.LinesAdded, prefix+"LinesAdded") + assertEqual(t, ef.LinesDeleted, af.LinesDeleted, prefix+"LinesDeleted") + + assertEqual(t, ef.LeadingContext, af.LeadingContext, prefix+"LeadingContext") + assertEqual(t, ef.TrailingContext, af.TrailingContext, prefix+"TrailingContext") + + if !slices.Equal(ef.Lines, af.Lines) { + t.Errorf("%sLines: expected %#v, actual %#v", prefix, ef.Lines, af.Lines) + } + } + } else { + t.Errorf("TextFragments: expected length %d, actual length %d", len(expected.TextFragments), len(actual.TextFragments)) + } + + assertEqual(t, expected.IsBinary, actual.IsBinary, "IsBinary") + + if expected.BinaryFragment != nil { + if actual.BinaryFragment == nil { + t.Errorf("BinaryFragment: expected non-nil, actual is nil") + } else { + ef := expected.BinaryFragment + af := expected.BinaryFragment + + assertEqual(t, ef.Method, af.Method, "BinaryFragment.Method") + assertEqual(t, ef.Size, af.Size, "BinaryFragment.Size") + + if !slices.Equal(ef.Data, af.Data) { + t.Errorf("BinaryFragment.Data: expected %#v, actual %#v", ef.Data, af.Data) + } + } + } else if actual.BinaryFragment != nil { + t.Errorf("BinaryFragment: expected nil, actual is non-nil") + } + + if expected.ReverseBinaryFragment != nil { + if actual.ReverseBinaryFragment == nil { + t.Errorf("ReverseBinaryFragment: expected non-nil, actual is nil") + } else { + ef := expected.ReverseBinaryFragment + af := expected.ReverseBinaryFragment + + assertEqual(t, ef.Method, af.Method, "ReverseBinaryFragment.Method") + assertEqual(t, ef.Size, af.Size, "ReverseBinaryFragment.Size") + + if !slices.Equal(ef.Data, af.Data) { + t.Errorf("ReverseBinaryFragment.Data: expected %#v, actual %#v", ef.Data, af.Data) + } + } + } else if actual.ReverseBinaryFragment != nil { + t.Errorf("ReverseBinaryFragment: expected nil, actual is non-nil") + } +} + +func assertEqual[T comparable](t *testing.T, expected, actual T, name string) { + if expected != actual { + t.Errorf("%s: expected %#v, actual %#v", name, expected, actual) + } +} diff --git a/gitdiff/format_test.go b/gitdiff/format_test.go new file mode 100644 index 0000000..3325296 --- /dev/null +++ b/gitdiff/format_test.go @@ -0,0 +1,28 @@ +package gitdiff + +import ( + "strings" + "testing" +) + +func TestFormatter_WriteQuotedName(t *testing.T) { + tests := []struct { + Input string + Expected string + }{ + {"noquotes.txt", `noquotes.txt`}, + {"no quotes.txt", `no quotes.txt`}, + {"new\nline", `"new\nline"`}, + {"escape\x1B null\x00", `"escape\033 null\000"`}, + {"snowman \u2603 snowman", `"snowman \342\230\203 snowman"`}, + {"\"already quoted\"", `"\"already quoted\""`}, + } + + for _, test := range tests { + var b strings.Builder + newFormatter(&b).WriteQuotedName(test.Input) + if b.String() != test.Expected { + t.Errorf("expected %q, got %q", test.Expected, b.String()) + } + } +} diff --git a/gitdiff/gitdiff.go b/gitdiff/gitdiff.go index 18645bd..5365645 100644 --- a/gitdiff/gitdiff.go +++ b/gitdiff/gitdiff.go @@ -4,6 +4,7 @@ import ( "errors" "fmt" "os" + "strings" ) // File describes changes to a single file. It can be either a text file or a @@ -38,6 +39,15 @@ type File struct { ReverseBinaryFragment *BinaryFragment } +// String returns a git diff representation of this file. The value can be +// parsed by this library to obtain the same File, but may not be the same as +// the original input. +func (f *File) String() string { + var diff strings.Builder + newFormatter(&diff).FormatFile(f) + return diff.String() +} + // TextFragment describes changed lines starting at a specific line in a text file. type TextFragment struct { Comment string @@ -57,9 +67,20 @@ type TextFragment struct { Lines []Line } -// Header returns the canonical header of this fragment. +// String returns a git diff format of this fragment. See [File.String] for +// more details on this format. +func (f *TextFragment) String() string { + var diff strings.Builder + newFormatter(&diff).FormatTextFragment(f) + return diff.String() +} + +// Header returns a git diff header of this fragment. See [File.String] for +// more details on this format. func (f *TextFragment) Header() string { - return fmt.Sprintf("@@ -%d,%d +%d,%d @@ %s", f.OldPosition, f.OldLines, f.NewPosition, f.NewLines, f.Comment) + var hdr strings.Builder + newFormatter(&hdr).FormatTextFragmentHeader(f) + return hdr.String() } // Validate checks that the fragment is self-consistent and appliable. Validate @@ -197,3 +218,13 @@ const ( // BinaryPatchLiteral indicates the data is the exact file content BinaryPatchLiteral ) + +// String returns a git diff format of this fragment. Due to differences in +// zlib implementation between Go and Git, encoded binary data in the result +// will likely differ from what Git produces for the same input. See +// [File.String] for more details on this format. +func (f *BinaryFragment) String() string { + var diff strings.Builder + newFormatter(&diff).FormatBinaryFragment(f) + return diff.String() +} diff --git a/gitdiff/parser.go b/gitdiff/parser.go index d44465a..e8f8430 100644 --- a/gitdiff/parser.go +++ b/gitdiff/parser.go @@ -12,6 +12,10 @@ import ( // Parse parses a patch with changes to one or more files. Any content before // the first file is returned as the second value. If an error occurs while // parsing, it returns all files parsed before the error. +// +// Parse expects to receive a single patch. If the input may contain multiple +// patches (for example, if it is an mbox file), callers should split it into +// individual patches and call Parse on each one. func Parse(r io.Reader) ([]*File, string, error) { p := newParser(r) @@ -29,6 +33,9 @@ func Parse(r io.Reader) ([]*File, string, error) { if err != nil { return files, preamble, err } + if len(files) == 0 { + preamble = pre + } if file == nil { break } @@ -46,9 +53,6 @@ func Parse(r io.Reader) ([]*File, string, error) { } } - if len(files) == 0 { - preamble = pre - } files = append(files, file) } diff --git a/gitdiff/parser_test.go b/gitdiff/parser_test.go index 30f59f4..15a5d67 100644 --- a/gitdiff/parser_test.go +++ b/gitdiff/parser_test.go @@ -281,8 +281,13 @@ this is another line --- could this be a header? nope, it's just some dashes `, - Output: nil, - Preamble: "", + Output: nil, + Preamble: ` +this is a line +this is another line +--- could this be a header? +nope, it's just some dashes +`, }, "detatchedFragmentLike": { Input: ` @@ -290,6 +295,10 @@ a wild fragment appears? @@ -1,3 +1,4 ~1,5 @@ `, Output: nil, + Preamble: ` +a wild fragment appears? +@@ -1,3 +1,4 ~1,5 @@ +`, }, "detatchedFragment": { Input: ` @@ -426,6 +435,11 @@ Date: Tue Apr 2 22:55:40 2019 -0700 }, Preamble: textPreamble, }, + "noFiles": { + InputFile: "testdata/no_files.patch", + Output: nil, + Preamble: textPreamble, + }, "newBinaryFile": { InputFile: "testdata/new_binary_file.patch", Output: []*File{ diff --git a/gitdiff/patch_header.go b/gitdiff/patch_header.go index f2c3868..f047059 100644 --- a/gitdiff/patch_header.go +++ b/gitdiff/patch_header.go @@ -68,55 +68,6 @@ func (h *PatchHeader) Message() string { return msg.String() } -// PatchIdentity identifies a person who authored or committed a patch. -type PatchIdentity struct { - Name string - Email string -} - -func (i PatchIdentity) String() string { - name := i.Name - if name == "" { - name = `""` - } - return fmt.Sprintf("%s <%s>", name, i.Email) -} - -// ParsePatchIdentity parses a patch identity string. A valid string contains a -// non-empty name followed by an email address in angle brackets. Like Git, -// ParsePatchIdentity does not require that the email address is valid or -// properly formatted, only that it is non-empty. The name must not contain a -// left angle bracket, '<', and the email address must not contain a right -// angle bracket, '>'. -func ParsePatchIdentity(s string) (PatchIdentity, error) { - var emailStart, emailEnd int - for i, c := range s { - if c == '<' && emailStart == 0 { - emailStart = i + 1 - } - if c == '>' && emailStart > 0 { - emailEnd = i - break - } - } - if emailStart > 0 && emailEnd == 0 { - return PatchIdentity{}, fmt.Errorf("invalid identity string: unclosed email section: %s", s) - } - - var name, email string - if emailStart > 0 { - name = strings.TrimSpace(s[:emailStart-1]) - } - if emailStart > 0 && emailEnd > 0 { - email = strings.TrimSpace(s[emailStart:emailEnd]) - } - if name == "" || email == "" { - return PatchIdentity{}, fmt.Errorf("invalid identity string: %s", s) - } - - return PatchIdentity{Name: name, Email: email}, nil -} - // ParsePatchDate parses a patch date string. It returns the parsed time or an // error if s has an unknown format. ParsePatchDate supports the iso, rfc, // short, raw, unix, and default formats (with local variants) used by the @@ -165,60 +116,97 @@ func ParsePatchDate(s string) (time.Time, error) { return time.Time{}, fmt.Errorf("unknown date format: %s", s) } -// ParsePatchHeader parses a preamble string as returned by Parse into a +// A PatchHeaderOption modifies the behavior of ParsePatchHeader. +type PatchHeaderOption func(*patchHeaderOptions) + +// SubjectCleanMode controls how ParsePatchHeader cleans subject lines when +// parsing mail-formatted patches. +type SubjectCleanMode int + +const ( + // SubjectCleanWhitespace removes leading and trailing whitespace. + SubjectCleanWhitespace SubjectCleanMode = iota + + // SubjectCleanAll removes leading and trailing whitespace, leading "Re:", + // "re:", and ":" strings, and leading strings enclosed by '[' and ']'. + // This is the default behavior of git (see `git mailinfo`) and this + // package. + SubjectCleanAll + + // SubjectCleanPatchOnly is the same as SubjectCleanAll, but only removes + // leading strings enclosed by '[' and ']' if they start with "PATCH". + SubjectCleanPatchOnly +) + +// WithSubjectCleanMode sets the SubjectCleanMode for header parsing. By +// default, uses SubjectCleanAll. +func WithSubjectCleanMode(m SubjectCleanMode) PatchHeaderOption { + return func(opts *patchHeaderOptions) { + opts.subjectCleanMode = m + } +} + +type patchHeaderOptions struct { + subjectCleanMode SubjectCleanMode +} + +// ParsePatchHeader parses the preamble string returned by [Parse] into a // PatchHeader. Due to the variety of header formats, some fields of the parsed // PatchHeader may be unset after parsing. // // Supported formats are the short, medium, full, fuller, and email pretty -// formats used by git diff, git log, and git show and the UNIX mailbox format -// used by git format-patch. +// formats used by `git diff`, `git log`, and `git show` and the UNIX mailbox +// format used by `git format-patch`. // -// If ParsePatchHeader detects that it is handling an email, it will -// remove extra content at the beginning of the title line, such as -// `[PATCH]` or `Re:` in the same way that `git mailinfo` does. -// SubjectPrefix will be set to the value of this removed string. -// (`git mailinfo` is the core part of `git am` that pulls information -// out of an individual mail.) +// When parsing mail-formatted headers, ParsePatchHeader tries to remove +// email-specific content from the title and body: // -// Additionally, if ParsePatchHeader detects that it's handling an -// email, it will remove a `---` line and put anything after it into -// BodyAppendix. +// - Based on the SubjectCleanMode, remove prefixes like reply markers and +// "[PATCH]" strings from the subject, saving any removed content in the +// SubjectPrefix field. Parsing always discards leading and trailing +// whitespace from the subject line. The default mode is SubjectCleanAll. // -// Those wishing the effect of a plain `git am` should use -// `PatchHeader.Title + "\n" + PatchHeader.Body` (or -// `PatchHeader.Message()`). Those wishing to retain the subject -// prefix and appendix material should use `PatchHeader.SubjectPrefix -// + PatchHeader.Title + "\n" + PatchHeader.Body + "\n" + -// PatchHeader.BodyAppendix`. -func ParsePatchHeader(s string) (*PatchHeader, error) { - r := bufio.NewReader(strings.NewReader(s)) - - var line string - for { - var err error - line, err = r.ReadString('\n') - if err == io.EOF { - break - } - if err != nil { - return nil, err - } +// - If the body contains a "---" line (3 hyphens), remove that line and any +// content after it from the body and save it in the BodyAppendix field. +// +// ParsePatchHeader tries to process content it does not understand wthout +// returning errors, but will return errors if well-identified content like +// dates or identies uses unknown or invalid formats. +func ParsePatchHeader(header string, options ...PatchHeaderOption) (*PatchHeader, error) { + opts := patchHeaderOptions{ + subjectCleanMode: SubjectCleanAll, // match git defaults + } + for _, optFn := range options { + optFn(&opts) + } - line = strings.TrimSpace(line) - if len(line) > 0 { - break - } + header = strings.TrimSpace(header) + if header == "" { + return &PatchHeader{}, nil + } + + var firstLine, rest string + if idx := strings.IndexByte(header, '\n'); idx >= 0 { + firstLine = header[:idx] + rest = header[idx+1:] + } else { + firstLine = header + rest = "" } switch { - case strings.HasPrefix(line, mailHeaderPrefix): - return parseHeaderMail(line, r) - case strings.HasPrefix(line, mailMinimumHeaderPrefix): - r = bufio.NewReader(strings.NewReader(s)) - return parseHeaderMail("", r) - case strings.HasPrefix(line, prettyHeaderPrefix): - return parseHeaderPretty(line, r) + case strings.HasPrefix(firstLine, mailHeaderPrefix): + return parseHeaderMail(firstLine, strings.NewReader(rest), opts) + + case strings.HasPrefix(firstLine, mailMinimumHeaderPrefix): + // With a minimum header, the first line is part of the actual mail + // content and needs to be parsed as part of the "rest" + return parseHeaderMail("", strings.NewReader(header), opts) + + case strings.HasPrefix(firstLine, prettyHeaderPrefix): + return parseHeaderPretty(firstLine, strings.NewReader(rest)) } + return nil, errors.New("unrecognized patch header format") } @@ -233,7 +221,7 @@ func parseHeaderPretty(prettyLine string, r io.Reader) (*PatchHeader, error) { h := &PatchHeader{} - prettyLine = prettyLine[len(prettyHeaderPrefix):] + prettyLine = strings.TrimPrefix(prettyLine, prettyHeaderPrefix) if i := strings.IndexByte(prettyLine, ' '); i > 0 { h.SHA = prettyLine[:i] } else { @@ -297,7 +285,7 @@ func parseHeaderPretty(prettyLine string, r io.Reader) (*PatchHeader, error) { h.Title = title if title != "" { - // Don't check for an appendix + // Don't check for an appendix, pretty headers do not contain them body, _ := scanMessageBody(s, indent, false) if s.Err() != nil { return nil, s.Err() @@ -366,7 +354,7 @@ func scanMessageBody(s *bufio.Scanner, indent string, separateAppendix bool) (st return body.String(), appendix.String() } -func parseHeaderMail(mailLine string, r io.Reader) (*PatchHeader, error) { +func parseHeaderMail(mailLine string, r io.Reader, opts patchHeaderOptions) (*PatchHeader, error) { msg, err := mail.ReadMessage(r) if err != nil { return nil, err @@ -374,23 +362,20 @@ func parseHeaderMail(mailLine string, r io.Reader) (*PatchHeader, error) { h := &PatchHeader{} - if len(mailLine) > len(mailHeaderPrefix) { - mailLine = mailLine[len(mailHeaderPrefix):] + if strings.HasPrefix(mailLine, mailHeaderPrefix) { + mailLine = strings.TrimPrefix(mailLine, mailHeaderPrefix) if i := strings.IndexByte(mailLine, ' '); i > 0 { h.SHA = mailLine[:i] } } - addrs, err := msg.Header.AddressList("From") - if err != nil && !errors.Is(err, mail.ErrHeaderNotPresent) { - return nil, err - } - if len(addrs) > 0 { - addr := addrs[0] - if addr.Name == "" { - addr.Name = addr.Address + from := msg.Header.Get("From") + if from != "" { + u, err := ParsePatchIdentity(from) + if err != nil { + return nil, err } - h.Author = &PatchIdentity{Name: addr.Name, Email: addr.Address} + h.Author = &u } date := msg.Header.Get("Date") @@ -403,7 +388,7 @@ func parseHeaderMail(mailLine string, r io.Reader) (*PatchHeader, error) { } subject := msg.Header.Get("Subject") - h.SubjectPrefix, h.Title = parseSubject(subject) + h.SubjectPrefix, h.Title = cleanSubject(subject, opts.subjectCleanMode) s := bufio.NewScanner(msg.Body) h.Body, h.BodyAppendix = scanMessageBody(s, "", true) @@ -414,23 +399,24 @@ func parseHeaderMail(mailLine string, r io.Reader) (*PatchHeader, error) { return h, nil } -// Takes an email subject and returns the patch prefix and commit -// title. i.e., `[PATCH v3 3/5] Implement foo` would return `[PATCH -// v3 3/5] ` and `Implement foo` -func parseSubject(s string) (string, string) { - // This is meant to be compatible with - // https://github.com/git/git/blob/master/mailinfo.c:cleanup_subject(). - // If compatibility with `git am` drifts, go there to see if there - // are any updates. +func cleanSubject(s string, mode SubjectCleanMode) (prefix string, subject string) { + switch mode { + case SubjectCleanAll, SubjectCleanPatchOnly: + case SubjectCleanWhitespace: + return "", strings.TrimSpace(decodeSubject(s)) + default: + panic(fmt.Sprintf("unknown clean mode: %d", mode)) + } + + // Based on the algorithm from Git in mailinfo.c:cleanup_subject() + // If compatibility with `git am` drifts, go there to see if there are any updates. at := 0 for at < len(s) { switch s[at] { case 'r', 'R': // Detect re:, Re:, rE: and RE: - if at+2 < len(s) && - (s[at+1] == 'e' || s[at+1] == 'E') && - s[at+2] == ':' { + if at+2 < len(s) && (s[at+1] == 'e' || s[at+1] == 'E') && s[at+2] == ':' { at += 3 continue } @@ -441,25 +427,21 @@ func parseSubject(s string) (string, string) { continue case '[': - // Look for closing parenthesis - j := at + 1 - for ; j < len(s); j++ { - if s[j] == ']' { - break + if i := strings.IndexByte(s[at:], ']'); i > 0 { + if mode == SubjectCleanAll || strings.Contains(s[at:at+i+1], "PATCH") { + at += i + 1 + continue } } - - if j < len(s) { - at = j + 1 - continue - } } - // Only loop if we actually removed something + // Nothing was removed, end processing break } - return s[:at], decodeSubject(s[at:]) + prefix = strings.TrimLeftFunc(s[:at], unicode.IsSpace) + subject = strings.TrimRightFunc(decodeSubject(s[at:]), unicode.IsSpace) + return } // Decodes a subject line. Currently only supports quoted-printable UTF-8. This format is the result diff --git a/gitdiff/patch_header_test.go b/gitdiff/patch_header_test.go index bda91fe..c8559b0 100644 --- a/gitdiff/patch_header_test.go +++ b/gitdiff/patch_header_test.go @@ -5,65 +5,6 @@ import ( "time" ) -func TestParsePatchIdentity(t *testing.T) { - tests := map[string]struct { - Input string - Output PatchIdentity - Err interface{} - }{ - "simple": { - Input: "Morton Haypenny ", - Output: PatchIdentity{ - Name: "Morton Haypenny", - Email: "mhaypenny@example.com", - }, - }, - "extraWhitespace": { - Input: " Morton Haypenny ", - Output: PatchIdentity{ - Name: "Morton Haypenny", - Email: "mhaypenny@example.com", - }, - }, - "trailingCharacters": { - Input: "Morton Haypenny unrelated garbage", - Output: PatchIdentity{ - Name: "Morton Haypenny", - Email: "mhaypenny@example.com", - }, - }, - "missingName": { - Input: "", - Err: "invalid identity", - }, - "missingEmail": { - Input: "Morton Haypenny", - Err: "invalid identity", - }, - "unclosedEmail": { - Input: "Morton Haypenny +Date: Sat, 11 Apr 2020 15:21:23 -0700 +Subject: [PATCH] [BUG-123] A sample commit to test header parsing + +The medium format shows the body, which +may wrap on to multiple lines. + +Another body line. +`, + Options: []PatchHeaderOption{ + WithSubjectCleanMode(SubjectCleanPatchOnly), + }, + Header: PatchHeader{ + SHA: expectedSHA, + Author: expectedIdentity, + AuthorDate: expectedDate, + Title: "[BUG-123] " + expectedTitle, + Body: expectedBody, + }, + }, "mailboxEmojiOneLine": { Input: `From 61f5cd90bed4d204ee3feb3aa41ee91d4734855b Mon Sep 17 00:00:00 2001 From: Morton Haypenny @@ -308,6 +272,28 @@ Another body line. Body: expectedBody, }, }, + "mailboxRFC5322SpecialCharacters": { + Input: `From 61f5cd90bed4d204ee3feb3aa41ee91d4734855b Mon Sep 17 00:00:00 2001 +From: "dependabot[bot]" <12345+dependabot[bot]@users.noreply.github.com> +Date: Sat, 11 Apr 2020 15:21:23 -0700 +Subject: [PATCH] A sample commit to test header parsing + +The medium format shows the body, which +may wrap on to multiple lines. + +Another body line. +`, + Header: PatchHeader{ + SHA: expectedSHA, + Author: &PatchIdentity{ + Name: "dependabot[bot]", + Email: "12345+dependabot[bot]@users.noreply.github.com", + }, + AuthorDate: expectedDate, + Title: expectedTitle, + Body: expectedBody, + }, + }, "mailboxAppendix": { Input: `From 61f5cd90bed4d204ee3feb3aa41ee91d4734855b Mon Sep 17 00:00:00 2001 From: Morton Haypenny @@ -414,11 +400,15 @@ Author: Morton Haypenny Title: expectedTitle, }, }, + "emptyHeader": { + Input: "", + Header: PatchHeader{}, + }, } for name, test := range tests { t.Run(name, func(t *testing.T) { - h, err := ParsePatchHeader(test.Input) + h, err := ParsePatchHeader(test.Input, test.Options...) if test.Err != nil { assertError(t, test.Err, err, "parsing patch header") return @@ -473,50 +463,128 @@ func assertPatchIdentity(t *testing.T, kind string, exp, act *PatchIdentity) { } } -func TestCleanupSubject(t *testing.T) { - exp := "A sample commit to test header parsing" - tests := map[string]string{ - "plain": "", - "patch": "[PATCH] ", - "patchv5": "[PATCH v5] ", - "patchrfc": "[PATCH RFC] ", - "patchnospace": "[PATCH]", - "space": " ", - "re": "re: ", - "Re": "Re: ", - "RE": "rE: ", - "rere": "re: re: ", - } - - for name, prefix := range tests { - gotprefix, gottitle := parseSubject(prefix + exp) - if gottitle != exp { - t.Errorf("%s: Incorrect parsing of prefix %s: got title %s, wanted %s", - name, prefix, gottitle, exp) - } - if gotprefix != prefix { - t.Errorf("%s: Incorrect parsing of prefix %s: got prefix %s", - name, prefix, gotprefix) - } - } +func TestCleanSubject(t *testing.T) { + expectedSubject := "A sample commit to test header parsing" - moretests := map[string]struct { - in, eprefix, etitle string + tests := map[string]struct { + Input string + Mode SubjectCleanMode + Prefix string + Subject string }{ - "Reimplement": {"Reimplement something", "", "Reimplement something"}, - "patch-reimplement": {"[PATCH v5] Reimplement something", "[PATCH v5] ", "Reimplement something"}, - "Openbracket": {"[Just to annoy people", "", "[Just to annoy people"}, + "CleanAll/noPrefix": { + Input: expectedSubject, + Mode: SubjectCleanAll, + Subject: expectedSubject, + }, + "CleanAll/patchPrefix": { + Input: "[PATCH] " + expectedSubject, + Mode: SubjectCleanAll, + Prefix: "[PATCH] ", + Subject: expectedSubject, + }, + "CleanAll/patchPrefixNoSpace": { + Input: "[PATCH]" + expectedSubject, + Mode: SubjectCleanAll, + Prefix: "[PATCH]", + Subject: expectedSubject, + }, + "CleanAll/patchPrefixContent": { + Input: "[PATCH 3/7] " + expectedSubject, + Mode: SubjectCleanAll, + Prefix: "[PATCH 3/7] ", + Subject: expectedSubject, + }, + "CleanAll/spacePrefix": { + Input: " " + expectedSubject, + Mode: SubjectCleanAll, + Subject: expectedSubject, + }, + "CleanAll/replyLowerPrefix": { + Input: "re: " + expectedSubject, + Mode: SubjectCleanAll, + Prefix: "re: ", + Subject: expectedSubject, + }, + "CleanAll/replyMixedPrefix": { + Input: "Re: " + expectedSubject, + Mode: SubjectCleanAll, + Prefix: "Re: ", + Subject: expectedSubject, + }, + "CleanAll/replyCapsPrefix": { + Input: "RE: " + expectedSubject, + Mode: SubjectCleanAll, + Prefix: "RE: ", + Subject: expectedSubject, + }, + "CleanAll/replyDoublePrefix": { + Input: "Re: re: " + expectedSubject, + Mode: SubjectCleanAll, + Prefix: "Re: re: ", + Subject: expectedSubject, + }, + "CleanAll/noPrefixSubjectHasRe": { + Input: "Reimplement parsing", + Mode: SubjectCleanAll, + Subject: "Reimplement parsing", + }, + "CleanAll/patchPrefixSubjectHasRe": { + Input: "[PATCH 1/2] Reimplement parsing", + Mode: SubjectCleanAll, + Prefix: "[PATCH 1/2] ", + Subject: "Reimplement parsing", + }, + "CleanAll/unclosedPrefix": { + Input: "[Just to annoy people", + Mode: SubjectCleanAll, + Subject: "[Just to annoy people", + }, + "CleanAll/multiplePrefix": { + Input: " Re:Re: [PATCH 1/2][DRAFT] " + expectedSubject + " ", + Mode: SubjectCleanAll, + Prefix: "Re:Re: [PATCH 1/2][DRAFT] ", + Subject: expectedSubject, + }, + "CleanPatchOnly/patchPrefix": { + Input: "[PATCH] " + expectedSubject, + Mode: SubjectCleanPatchOnly, + Prefix: "[PATCH] ", + Subject: expectedSubject, + }, + "CleanPatchOnly/mixedPrefix": { + Input: "[PATCH] [TICKET-123] " + expectedSubject, + Mode: SubjectCleanPatchOnly, + Prefix: "[PATCH] ", + Subject: "[TICKET-123] " + expectedSubject, + }, + "CleanPatchOnly/multiplePrefix": { + Input: "Re:Re: [PATCH 1/2][DRAFT] " + expectedSubject, + Mode: SubjectCleanPatchOnly, + Prefix: "Re:Re: [PATCH 1/2]", + Subject: "[DRAFT] " + expectedSubject, + }, + "CleanWhitespace/leadingSpace": { + Input: " [PATCH] " + expectedSubject, + Mode: SubjectCleanWhitespace, + Subject: "[PATCH] " + expectedSubject, + }, + "CleanWhitespace/trailingSpace": { + Input: "[PATCH] " + expectedSubject + " ", + Mode: SubjectCleanWhitespace, + Subject: "[PATCH] " + expectedSubject, + }, } - for name, test := range moretests { - prefix, title := parseSubject(test.in) - if title != test.etitle { - t.Errorf("%s: Incorrect parsing of %s: got title %s, wanted %s", - name, test.in, title, test.etitle) - } - if prefix != test.eprefix { - t.Errorf("%s: Incorrect parsing of %s: got prefix %s, wanted %s", - name, test.in, title, test.etitle) - } + for name, test := range tests { + t.Run(name, func(t *testing.T) { + prefix, subject := cleanSubject(test.Input, test.Mode) + if prefix != test.Prefix { + t.Errorf("incorrect prefix: expected %q, actual %q", test.Prefix, prefix) + } + if subject != test.Subject { + t.Errorf("incorrect subject: expected %q, actual %q", test.Subject, subject) + } + }) } } diff --git a/gitdiff/patch_identity.go b/gitdiff/patch_identity.go new file mode 100644 index 0000000..018f80c --- /dev/null +++ b/gitdiff/patch_identity.go @@ -0,0 +1,166 @@ +package gitdiff + +import ( + "fmt" + "strings" +) + +// PatchIdentity identifies a person who authored or committed a patch. +type PatchIdentity struct { + Name string + Email string +} + +func (i PatchIdentity) String() string { + name := i.Name + if name == "" { + name = `""` + } + return fmt.Sprintf("%s <%s>", name, i.Email) +} + +// ParsePatchIdentity parses a patch identity string. A patch identity contains +// an email address and an optional name in [RFC 5322] format. This is either a +// plain email adddress or a name followed by an address in angle brackets: +// +// author@example.com +// Author Name +// +// If the input is not one of these formats, ParsePatchIdentity applies a +// heuristic to separate the name and email portions. If both the name and +// email are missing or empty, ParsePatchIdentity returns an error. It +// otherwise does not validate the result. +// +// [RFC 5322]: https://datatracker.ietf.org/doc/html/rfc5322 +func ParsePatchIdentity(s string) (PatchIdentity, error) { + s = normalizeSpace(s) + s = unquotePairs(s) + + var name, email string + if at := strings.IndexByte(s, '@'); at >= 0 { + start, end := at, at + for start >= 0 && !isRFC5332Space(s[start]) && s[start] != '<' { + start-- + } + for end < len(s) && !isRFC5332Space(s[end]) && s[end] != '>' { + end++ + } + email = s[start+1 : end] + + // Adjust the boundaries so that we drop angle brackets, but keep + // spaces when removing the email to form the name. + if start < 0 || s[start] != '<' { + start++ + } + if end >= len(s) || s[end] != '>' { + end-- + } + name = s[:start] + s[end+1:] + } else { + start, end := 0, 0 + for i := 0; i < len(s); i++ { + if s[i] == '<' && start == 0 { + start = i + 1 + } + if s[i] == '>' && start > 0 { + end = i + break + } + } + if start > 0 && end >= start { + email = strings.TrimSpace(s[start:end]) + name = s[:start-1] + } + } + + // After extracting the email, the name might contain extra whitespace + // again and may be surrounded by comment characters. The git source gives + // these examples of when this can happen: + // + // "Name " + // "email@domain (Name)" + // "Name (Comment)" + // + name = normalizeSpace(name) + if strings.HasPrefix(name, "(") && strings.HasSuffix(name, ")") { + name = name[1 : len(name)-1] + } + name = strings.TrimSpace(name) + + // If the name is empty or contains email-like characters, use the email + // instead (assuming one exists) + if name == "" || strings.ContainsAny(name, "@<>") { + name = email + } + + if name == "" && email == "" { + return PatchIdentity{}, fmt.Errorf("invalid identity string %q", s) + } + return PatchIdentity{Name: name, Email: email}, nil +} + +// unquotePairs process the RFC5322 tokens "quoted-string" and "comment" to +// remove any "quoted-pairs" (backslash-espaced characters). It also removes +// the quotes from any quoted strings, but leaves the comment delimiters. +func unquotePairs(s string) string { + quote := false + comments := 0 + escaped := false + + var out strings.Builder + for i := 0; i < len(s); i++ { + if escaped { + escaped = false + } else { + switch s[i] { + case '\\': + // quoted-pair is only allowed in quoted-string/comment + if quote || comments > 0 { + escaped = true + continue // drop '\' character + } + + case '"': + if comments == 0 { + quote = !quote + continue // drop '"' character + } + + case '(': + if !quote { + comments++ + } + case ')': + if comments > 0 { + comments-- + } + } + } + out.WriteByte(s[i]) + } + return out.String() +} + +// normalizeSpace trims leading and trailing whitespace from s and converts +// inner sequences of one or more whitespace characters to single spaces. +func normalizeSpace(s string) string { + var sb strings.Builder + for i := 0; i < len(s); i++ { + c := s[i] + if !isRFC5332Space(c) { + if sb.Len() > 0 && isRFC5332Space(s[i-1]) { + sb.WriteByte(' ') + } + sb.WriteByte(c) + } + } + return sb.String() +} + +func isRFC5332Space(c byte) bool { + switch c { + case '\t', '\n', '\r', ' ': + return true + } + return false +} diff --git a/gitdiff/patch_identity_test.go b/gitdiff/patch_identity_test.go new file mode 100644 index 0000000..f15fe38 --- /dev/null +++ b/gitdiff/patch_identity_test.go @@ -0,0 +1,127 @@ +package gitdiff + +import ( + "testing" +) + +func TestParsePatchIdentity(t *testing.T) { + tests := map[string]struct { + Input string + Output PatchIdentity + Err interface{} + }{ + "simple": { + Input: "Morton Haypenny ", + Output: PatchIdentity{ + Name: "Morton Haypenny", + Email: "mhaypenny@example.com", + }, + }, + "extraWhitespace": { + Input: "\t Morton Haypenny \r\n ", + Output: PatchIdentity{ + Name: "Morton Haypenny", + Email: "mhaypenny@example.com", + }, + }, + "trailingCharacters": { + Input: "Morton Haypenny II", + Output: PatchIdentity{ + Name: "Morton Haypenny II", + Email: "mhaypenny@example.com", + }, + }, + "onlyEmail": { + Input: "mhaypenny@example.com", + Output: PatchIdentity{ + Name: "mhaypenny@example.com", + Email: "mhaypenny@example.com", + }, + }, + "onlyEmailInBrackets": { + Input: "", + Output: PatchIdentity{ + Name: "mhaypenny@example.com", + Email: "mhaypenny@example.com", + }, + }, + "rfc5322SpecialCharacters": { + Input: `"dependabot[bot]" <12345+dependabot[bot]@users.noreply.github.com>`, + Output: PatchIdentity{ + Name: "dependabot[bot]", + Email: "12345+dependabot[bot]@users.noreply.github.com", + }, + }, + "rfc5322QuotedPairs": { + Input: `"Morton \"Old-Timer\" Haypenny" <"mhaypenny\+[1900]"@example.com> (III \(PhD\))`, + Output: PatchIdentity{ + Name: `Morton "Old-Timer" Haypenny (III (PhD))`, + Email: "mhaypenny+[1900]@example.com", + }, + }, + "rfc5322QuotedPairsOutOfContext": { + Input: `Morton \\Backslash Haypenny `, + Output: PatchIdentity{ + Name: `Morton \\Backslash Haypenny`, + Email: "mhaypenny@example.com", + }, + }, + "emptyEmail": { + Input: "Morton Haypenny <>", + Output: PatchIdentity{ + Name: "Morton Haypenny", + Email: "", + }, + }, + "unclosedEmail": { + Input: "Morton Haypenny ", + Output: PatchIdentity{ + Name: "Morton Haypenny", + Email: "mhaypenny", + }, + }, + "bogusEmailWithWhitespace": { + Input: "Morton Haypenny < mhaypenny >", + Output: PatchIdentity{ + Name: "Morton Haypenny", + Email: "mhaypenny", + }, + }, + "missingEmail": { + Input: "Morton Haypenny", + Err: "invalid identity", + }, + "missingNameAndEmptyEmail": { + Input: "<>", + Err: "invalid identity", + }, + "empty": { + Input: "", + Err: "invalid identity", + }, + } + + for name, test := range tests { + t.Run(name, func(t *testing.T) { + id, err := ParsePatchIdentity(test.Input) + if test.Err != nil { + assertError(t, test.Err, err, "parsing identity") + return + } + if err != nil { + t.Fatalf("unexpected error parsing identity: %v", err) + } + + if test.Output != id { + t.Errorf("incorrect identity: expected %#v, actual %#v", test.Output, id) + } + }) + } +} diff --git a/gitdiff/testdata/apply/bin.go b/gitdiff/testdata/apply/bin.go index 6d39ffd..e34f06b 100644 --- a/gitdiff/testdata/apply/bin.go +++ b/gitdiff/testdata/apply/bin.go @@ -1,4 +1,4 @@ -//+build ignore +//go:build ignore // bin.go is a helper CLI to manipulate binary diff data for testing purposes. // It can decode patches generated by git using the standard parsing functions diff --git a/gitdiff/testdata/apply/text_fragment_change_end_eol.out b/gitdiff/testdata/apply/text_fragment_change_end_eol.out new file mode 100644 index 0000000..8cf2f17 --- /dev/null +++ b/gitdiff/testdata/apply/text_fragment_change_end_eol.out @@ -0,0 +1,3 @@ +line 1 +line 2 +line 3 \ No newline at end of file diff --git a/gitdiff/testdata/apply/text_fragment_change_end_eol.patch b/gitdiff/testdata/apply/text_fragment_change_end_eol.patch new file mode 100644 index 0000000..f1c9477 --- /dev/null +++ b/gitdiff/testdata/apply/text_fragment_change_end_eol.patch @@ -0,0 +1,10 @@ +diff --git a/gitdiff/testdata/apply/text_fragment_remove_last_eol.src b/gitdiff/testdata/apply/text_fragment_remove_last_eol.src +index a92d664..8cf2f17 100644 +--- a/gitdiff/testdata/apply/text_fragment_remove_last_eol.src ++++ b/gitdiff/testdata/apply/text_fragment_remove_last_eol.src +@@ -1,3 +1,3 @@ + line 1 + line 2 +-line 3 ++line 3 +\ No newline at end of file diff --git a/gitdiff/testdata/apply/text_fragment_change_end_eol.src b/gitdiff/testdata/apply/text_fragment_change_end_eol.src new file mode 100644 index 0000000..a92d664 --- /dev/null +++ b/gitdiff/testdata/apply/text_fragment_change_end_eol.src @@ -0,0 +1,3 @@ +line 1 +line 2 +line 3 diff --git a/gitdiff/testdata/no_files.patch b/gitdiff/testdata/no_files.patch new file mode 100644 index 0000000..9eea12d --- /dev/null +++ b/gitdiff/testdata/no_files.patch @@ -0,0 +1,8 @@ +commit 5d9790fec7d95aa223f3d20936340bf55ff3dcbe +Author: Morton Haypenny +Date: Tue Apr 2 22:55:40 2019 -0700 + + A file with multiple fragments. + + The content is arbitrary. + diff --git a/gitdiff/testdata/string/binary_modify.patch b/gitdiff/testdata/string/binary_modify.patch new file mode 100644 index 0000000..12ddad5 --- /dev/null +++ b/gitdiff/testdata/string/binary_modify.patch @@ -0,0 +1,9 @@ +diff --git a/file.bin b/file.bin +index a7f4d5d6975ec021016c02b6d58345ebf434f38c..bdc9a70f055892146612dcdb413f0e339faaa0df 100644 +GIT binary patch +delta 66 +QcmeZhVVvM$!$1K50C&Ox;s5{u + +delta 5 +McmZo+^qAlQ00i9urT_o{ + diff --git a/gitdiff/testdata/string/binary_modify_nodata.patch b/gitdiff/testdata/string/binary_modify_nodata.patch new file mode 100644 index 0000000..833a534 --- /dev/null +++ b/gitdiff/testdata/string/binary_modify_nodata.patch @@ -0,0 +1,3 @@ +diff --git a/file.bin b/file.bin +index a7f4d5d..bdc9a70 100644 +Binary files a/file.bin and b/file.bin differ diff --git a/gitdiff/testdata/string/binary_new.patch b/gitdiff/testdata/string/binary_new.patch new file mode 100644 index 0000000..c56f35e --- /dev/null +++ b/gitdiff/testdata/string/binary_new.patch @@ -0,0 +1,11 @@ +diff --git a/file.bin b/file.bin +new file mode 100644 +index 0000000000000000000000000000000000000000..a7f4d5d6975ec021016c02b6d58345ebf434f38c +GIT binary patch +literal 72 +zcmV-O0Jr~td-`u6JcK&{KDK=