From b6a90110a3bf32b68d27477c96f540a7fbfb536f Mon Sep 17 00:00:00 2001 From: Billy Keyes Date: Thu, 9 Sep 2021 14:24:16 -0400 Subject: [PATCH 01/23] Fix out-of-bounds panic parsing timestamps (#28) Found by go-fuzz. --- gitdiff/file_header.go | 2 +- gitdiff/file_header_test.go | 8 ++++++++ 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/gitdiff/file_header.go b/gitdiff/file_header.go index e3185bd..58904b4 100644 --- a/gitdiff/file_header.go +++ b/gitdiff/file_header.go @@ -527,7 +527,7 @@ func hasEpochTimestamp(s string) bool { // a valid timestamp can have optional ':' in zone specifier // remove that if it exists so we have a single format - if ts[len(ts)-3] == ':' { + if len(ts) >= 3 && ts[len(ts)-3] == ':' { ts = ts[:len(ts)-3] + ts[len(ts)-2:] } diff --git a/gitdiff/file_header_test.go b/gitdiff/file_header_test.go index 99cfed3..102795c 100644 --- a/gitdiff/file_header_test.go +++ b/gitdiff/file_header_test.go @@ -724,6 +724,14 @@ func TestHasEpochTimestamp(t *testing.T) { Input: "+++ file.txt\t2019-03-21 12:34:56.789 -0700\n", Output: false, }, + "notTimestamp": { + Input: "+++ file.txt\trandom text\n", + Output: false, + }, + "notTimestampShort": { + Input: "+++ file.txt\t0\n", + Output: false, + }, } for name, test := range tests { From 4f540371d0aa2a79f0540bf654a591c8bc7adae6 Mon Sep 17 00:00:00 2001 From: Billy Keyes Date: Sun, 12 Sep 2021 18:51:00 -0400 Subject: [PATCH 02/23] Return errors for empty text fragments (#29) This fixes two issues: 1. Fragments with only context lines were accepted 2. Fragments with only a "no newline" marker caused a panic The panic was found by go-fuzz. While fixing that issue, I discovered the first problem with other types of empty fragments while comparing behavior against 'git apply'. Also extract some helper functions and modify the loop conditions to clean things up while making changes in this area. --- gitdiff/text.go | 48 +++++++++++++++++++++++++++----------------- gitdiff/text_test.go | 18 +++++++++++++++++ 2 files changed, 48 insertions(+), 18 deletions(-) diff --git a/gitdiff/text.go b/gitdiff/text.go index 04de8f4..ee30792 100644 --- a/gitdiff/text.go +++ b/gitdiff/text.go @@ -79,14 +79,8 @@ func (p *parser) ParseTextChunk(frag *TextFragment) error { return p.Errorf(0, "no content following fragment header") } - isNoNewlineLine := func(s string) bool { - // test for "\ No newline at end of file" by prefix because the text - // changes by locale (git claims all versions are at least 12 chars) - return len(s) >= 12 && s[:2] == "\\ " - } - oldLines, newLines := frag.OldLines, frag.NewLines - for { + for oldLines > 0 || newLines > 0 { line := p.Line(0) op, data := line[0], line[1:] @@ -113,13 +107,14 @@ func (p *parser) ParseTextChunk(frag *TextFragment) error { frag.LinesAdded++ frag.TrailingContext = 0 frag.Lines = append(frag.Lines, Line{OpAdd, data}) - default: + case '\\': // this may appear in middle of fragment if it's for a deleted line - if isNoNewlineLine(line) { - last := &frag.Lines[len(frag.Lines)-1] - last.Line = strings.TrimSuffix(last.Line, "\n") + if isNoNewlineMarker(line) { + removeLastNewline(frag) break } + fallthrough + default: // TODO(bkeyes): if this is because we hit the next header, it // would be helpful to return the miscounts line error. We could // either test for the common headers ("@@ -", "diff --git") or @@ -128,11 +123,6 @@ func (p *parser) ParseTextChunk(frag *TextFragment) error { return p.Errorf(0, "invalid line operation: %q", op) } - next := p.Line(1) - if oldLines <= 0 && newLines <= 0 && !isNoNewlineLine(next) { - break - } - if err := p.Next(); err != nil { if err == io.EOF { break @@ -145,13 +135,35 @@ func (p *parser) ParseTextChunk(frag *TextFragment) error { hdr := max(frag.OldLines-oldLines, frag.NewLines-newLines) + 1 return p.Errorf(-hdr, "fragment header miscounts lines: %+d old, %+d new", -oldLines, -newLines) } + if frag.LinesAdded == 0 && frag.LinesDeleted == 0 { + return p.Errorf(0, "fragment contains no changes") + } - if err := p.Next(); err != nil && err != io.EOF { - return err + // check for a final "no newline" marker since it is not included in the + // counters used to stop the loop above + if isNoNewlineMarker(p.Line(0)) { + removeLastNewline(frag) + if err := p.Next(); err != nil && err != io.EOF { + return err + } } + return nil } +func isNoNewlineMarker(s string) bool { + // test for "\ No newline at end of file" by prefix because the text + // changes by locale (git claims all versions are at least 12 chars) + return len(s) >= 12 && s[:2] == "\\ " +} + +func removeLastNewline(frag *TextFragment) { + if len(frag.Lines) > 0 { + last := &frag.Lines[len(frag.Lines)-1] + last.Line = strings.TrimSuffix(last.Line, "\n") + } +} + func parseRange(s string) (start int64, end int64, err error) { parts := strings.SplitN(s, ",", 2) diff --git a/gitdiff/text_test.go b/gitdiff/text_test.go index d1caed1..990b3bc 100644 --- a/gitdiff/text_test.go +++ b/gitdiff/text_test.go @@ -317,6 +317,24 @@ func TestParseTextChunk(t *testing.T) { }, Err: true, }, + "onlyContext": { + Input: ` context line + context line +`, + Fragment: TextFragment{ + OldLines: 2, + NewLines: 2, + }, + Err: true, + }, + "unexpectedNoNewlineMarker": { + Input: `\ No newline at end of file`, + Fragment: TextFragment{ + OldLines: 1, + NewLines: 1, + }, + Err: true, + }, } for name, test := range tests { From f23f745615837c8bc9804b790f1eb24a8042069c Mon Sep 17 00:00:00 2001 From: Billy Keyes Date: Sat, 18 Sep 2021 21:16:28 -0700 Subject: [PATCH 03/23] Add note about fuzz testing to README --- README.md | 3 +++ 1 file changed, 3 insertions(+) diff --git a/README.md b/README.md index 1f45a49..8f9671b 100644 --- a/README.md +++ b/README.md @@ -44,6 +44,9 @@ library is used in a production application that parses and applies thousands of patches every day, but the space of all possible patches is large, so there are likely undiscovered bugs. +The parsing code has also had a modest amount of fuzz testing. I hope to continue +this testing in the future. + ## Why another git/unified diff parser? [Several][sourcegraph] [packages][sergi] with [similar][waigani] From 52645c60d0415a26817e7cdf90ca66b8806f46b9 Mon Sep 17 00:00:00 2001 From: goldsteinn <35538541+goldsteinn@users.noreply.github.com> Date: Sun, 27 Feb 2022 21:32:01 -0500 Subject: [PATCH 04/23] Use ioutil for compatibility with older Go versions (#31) Co-authored-by: Noah Goldstein --- gitdiff/patch_header.go | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/gitdiff/patch_header.go b/gitdiff/patch_header.go index c3c387d..f2c3868 100644 --- a/gitdiff/patch_header.go +++ b/gitdiff/patch_header.go @@ -5,6 +5,7 @@ import ( "errors" "fmt" "io" + "io/ioutil" "mime/quotedprintable" "net/mail" "strconv" @@ -477,7 +478,7 @@ func decodeSubject(encoded string) string { payload = strings.ReplaceAll(payload, " =?UTF-8?q?", "") payload = strings.ReplaceAll(payload, "?=", "") - decoded, err := io.ReadAll(quotedprintable.NewReader(strings.NewReader(payload))) + decoded, err := ioutil.ReadAll(quotedprintable.NewReader(strings.NewReader(payload))) if err != nil { // if err, abort decoding and return original subject return encoded From 8764d8180fdd2a5100249b300011d61bb594c9cc Mon Sep 17 00:00:00 2001 From: Billy Keyes Date: Fri, 18 Mar 2022 14:13:13 -0700 Subject: [PATCH 05/23] Add note about apply API changes to README --- README.md | 22 ++++++++++++---------- 1 file changed, 12 insertions(+), 10 deletions(-) diff --git a/README.md b/README.md index 8f9671b..b7522e3 100644 --- a/README.md +++ b/README.md @@ -36,16 +36,18 @@ if err := gitdiff.NewApplier(code).ApplyFile(&output, files[0]); err != nil { ## Development Status -Mostly complete. API changes are possible, particularly for patch application, -but I expect the parsing interface and types to remain stable. - -Patch parsing and strict application are well-covered by unit tests and the -library is used in a production application that parses and applies thousands of -patches every day, but the space of all possible patches is large, so there are -likely undiscovered bugs. - -The parsing code has also had a modest amount of fuzz testing. I hope to continue -this testing in the future. +The parsing API and types are complete and I expect will remain stable. As of +March 2022, I am refactoring the apply API to address existing issues and to +support non-strict application. Version 0.6.1 is the last release using the old +API and the `master` branch may contain breaking changes. Please use a release +version to avoid surprises. + +Parsing and strict application are well-covered by unit tests and the library +is used in a production application that parses and applies thousands of +patches every day. However, the space of all possible patches is large, so +there are likely undiscovered bugs. + +The parsing code has also had a modest amount of fuzz testing. ## Why another git/unified diff parser? From 75930390c9440405f4941e70e3897cb328afdc09 Mon Sep 17 00:00:00 2001 From: Billy Keyes Date: Sun, 20 Mar 2022 12:20:17 -0700 Subject: [PATCH 06/23] Split apply logic by fragment type (#32) Remove the Applier type and replace it with TextApplier and BinaryApplier, both of which operate on fragments instead of on full files. Move the logic that previously existed in Applier.ApplyFile to the top-level Apply function. Also restructure arguments and methods to make it clear that appliers are one-time-use objects. The destination is now set when creating an applier and the Reset() method was replaced by Close(). --- .golangci.yml | 1 + README.md | 2 +- gitdiff/apply.go | 357 +++------------------------------------- gitdiff/apply_binary.go | 206 +++++++++++++++++++++++ gitdiff/apply_test.go | 83 ++-------- gitdiff/apply_text.go | 153 +++++++++++++++++ 6 files changed, 398 insertions(+), 404 deletions(-) create mode 100644 gitdiff/apply_binary.go create mode 100644 gitdiff/apply_text.go diff --git a/.golangci.yml b/.golangci.yml index 12cdbd2..82dbad2 100644 --- a/.golangci.yml +++ b/.golangci.yml @@ -11,6 +11,7 @@ linters: - golint - govet - ineffassign + - misspell - typecheck - unconvert - varcheck diff --git a/README.md b/README.md index b7522e3..58bb675 100644 --- a/README.md +++ b/README.md @@ -29,7 +29,7 @@ if err != nil { // apply the changes in the patch to a source file var output bytes.Buffer -if err := gitdiff.NewApplier(code).ApplyFile(&output, files[0]); err != nil { +if err := gitdiff.Apply(&output, code, files[0]); err != nil { log.Fatal(err) } ``` diff --git a/gitdiff/apply.go b/gitdiff/apply.go index 05a4526..cc4c3f0 100644 --- a/gitdiff/apply.go +++ b/gitdiff/apply.go @@ -89,85 +89,36 @@ func applyError(err error, args ...interface{}) error { var ( errApplyInProgress = errors.New("gitdiff: incompatible apply in progress") + errApplierClosed = errors.New("gitdiff: applier is closed") ) -const ( - applyInitial = iota - applyText - applyBinary - applyFile -) - -// Apply is a convenience function that creates an Applier for src with default -// settings and applies the changes in f, writing the result to dst. -func Apply(dst io.Writer, src io.ReaderAt, f *File) error { - return NewApplier(src).ApplyFile(dst, f) -} - -// Applier applies changes described in fragments to source data. If changes -// are described in multiple fragments, those fragments must be applied in -// order, usually by calling ApplyFile. -// -// By default, Applier operates in "strict" mode, where fragment content and -// positions must exactly match those of the source. -// -// If an error occurs while applying, methods on Applier return instances of -// *ApplyError that annotate the wrapped error with additional information -// when available. If the error is because of a conflict between a fragment and -// the source, the wrapped error will be a *Conflict. +// Apply applies the changes in f to src, writing the result to dst. It can +// apply both text and binary changes. // -// While an Applier can apply both text and binary fragments, only one fragment -// type can be used without resetting the Applier. The first fragment applied -// sets the type for the Applier. Mixing fragment types or mixing -// fragment-level and file-level applies results in an error. -type Applier struct { - src io.ReaderAt - lineSrc LineReaderAt - nextLine int64 - applyType int -} - -// NewApplier creates an Applier that reads data from src. If src is a -// LineReaderAt, it is used directly to apply text fragments. -func NewApplier(src io.ReaderAt) *Applier { - a := new(Applier) - a.Reset(src) - return a -} - -// Reset resets the input and internal state of the Applier. If src is nil, the -// existing source is reused. -func (a *Applier) Reset(src io.ReaderAt) { - if src != nil { - a.src = src - if lineSrc, ok := src.(LineReaderAt); ok { - a.lineSrc = lineSrc - } else { - a.lineSrc = &lineReaderAt{r: src} +// If an error occurs while applying, Apply returns an *ApplyError that +// annotates the error with additional information. If the error is because of +// a conflict with the source, the wrapped error will be a *Conflict. +func Apply(dst io.Writer, src io.ReaderAt, f *File) error { + if f.IsBinary { + if len(f.TextFragments) > 0 { + return applyError(errors.New("binary file contains text fragments")) + } + if f.BinaryFragment == nil { + return applyError(errors.New("binary file does not contain a binary fragment")) + } + } else { + if f.BinaryFragment != nil { + return applyError(errors.New("text file contains a binary fragment")) } - } - a.nextLine = 0 - a.applyType = applyInitial -} - -// ApplyFile applies the changes in all of the fragments of f and writes the -// result to dst. -func (a *Applier) ApplyFile(dst io.Writer, f *File) error { - if a.applyType != applyInitial { - return applyError(errApplyInProgress) - } - defer func() { a.applyType = applyFile }() - - if f.IsBinary && len(f.TextFragments) > 0 { - return applyError(errors.New("binary file contains text fragments")) - } - if !f.IsBinary && f.BinaryFragment != nil { - return applyError(errors.New("text file contains binary fragment")) } switch { case f.BinaryFragment != nil: - return a.ApplyBinaryFragment(dst, f.BinaryFragment) + applier := NewBinaryApplier(dst, src) + if err := applier.ApplyFragment(f.BinaryFragment); err != nil { + return err + } + return applier.Close() case len(f.TextFragments) > 0: frags := make([]*TextFragment, len(f.TextFragments)) @@ -181,271 +132,17 @@ func (a *Applier) ApplyFile(dst io.Writer, f *File) error { // right now, the application fails if fragments overlap, but it should be // possible to precompute the result of applying them in order + applier := NewTextApplier(dst, src) for i, frag := range frags { - if err := a.ApplyTextFragment(dst, frag); err != nil { + if err := applier.ApplyFragment(frag); err != nil { return applyError(err, fragNum(i)) } } - } - - return applyError(a.Flush(dst)) -} + return applier.Close() -// ApplyTextFragment applies the changes in the fragment f and writes unwritten -// data before the start of the fragment and the result to dst. If multiple -// text fragments apply to the same source, ApplyTextFragment must be called in -// order of increasing start position. As a result, each fragment can be -// applied at most once before a call to Reset. -func (a *Applier) ApplyTextFragment(dst io.Writer, f *TextFragment) error { - if a.applyType != applyInitial && a.applyType != applyText { - return applyError(errApplyInProgress) - } - defer func() { a.applyType = applyText }() - - // application code assumes fragment fields are consistent - if err := f.Validate(); err != nil { - return applyError(err) - } - - // lines are 0-indexed, positions are 1-indexed (but new files have position = 0) - fragStart := f.OldPosition - 1 - if fragStart < 0 { - fragStart = 0 - } - fragEnd := fragStart + f.OldLines - - start := a.nextLine - if fragStart < start { - return applyError(&Conflict{"fragment overlaps with an applied fragment"}) - } - - if f.OldPosition == 0 { - ok, err := isLen(a.src, 0) - if err != nil { - return applyError(err) - } - if !ok { - return applyError(&Conflict{"cannot create new file from non-empty src"}) - } - } - - preimage := make([][]byte, fragEnd-start) - n, err := a.lineSrc.ReadLinesAt(preimage, start) - if err != nil { - return applyError(err, lineNum(start+int64(n))) - } - - // copy leading data before the fragment starts - for i, line := range preimage[:fragStart-start] { - if _, err := dst.Write(line); err != nil { - a.nextLine = start + int64(i) - return applyError(err, lineNum(a.nextLine)) - } - } - preimage = preimage[fragStart-start:] - - // apply the changes in the fragment - used := int64(0) - for i, line := range f.Lines { - if err := applyTextLine(dst, line, preimage, used); err != nil { - a.nextLine = fragStart + used - return applyError(err, lineNum(a.nextLine), fragLineNum(i)) - } - if line.Old() { - used++ - } - } - a.nextLine = fragStart + used - - // new position of +0,0 mean a full delete, so check for leftovers - if f.NewPosition == 0 && f.NewLines == 0 { - var b [1][]byte - n, err := a.lineSrc.ReadLinesAt(b[:], a.nextLine) - if err != nil && err != io.EOF { - return applyError(err, lineNum(a.nextLine)) - } - if n > 0 { - return applyError(&Conflict{"src still has content after full delete"}, lineNum(a.nextLine)) - } - } - - return nil -} - -func applyTextLine(dst io.Writer, line Line, preimage [][]byte, i int64) (err error) { - if line.Old() && string(preimage[i]) != line.Line { - return &Conflict{"fragment line does not match src line"} - } - if line.New() { - _, err = io.WriteString(dst, line.Line) - } - return err -} - -// Flush writes any data following the last applied fragment to dst. -func (a *Applier) Flush(dst io.Writer) (err error) { - switch a.applyType { - case applyInitial: - _, err = copyFrom(dst, a.src, 0) - case applyText: - _, err = copyLinesFrom(dst, a.lineSrc, a.nextLine) - case applyBinary: - // nothing to flush, binary apply "consumes" full source - } - return err -} - -// ApplyBinaryFragment applies the changes in the fragment f and writes the -// result to dst. At most one binary fragment can be applied before a call to -// Reset. -func (a *Applier) ApplyBinaryFragment(dst io.Writer, f *BinaryFragment) error { - if a.applyType != applyInitial { - return applyError(errApplyInProgress) - } - defer func() { a.applyType = applyBinary }() - - if f == nil { - return applyError(errors.New("nil fragment")) - } - - switch f.Method { - case BinaryPatchLiteral: - if _, err := dst.Write(f.Data); err != nil { - return applyError(err) - } - case BinaryPatchDelta: - if err := applyBinaryDeltaFragment(dst, a.src, f.Data); err != nil { - return applyError(err) - } default: - return applyError(fmt.Errorf("unsupported binary patch method: %v", f.Method)) - } - return nil -} - -func applyBinaryDeltaFragment(dst io.Writer, src io.ReaderAt, frag []byte) error { - srcSize, delta := readBinaryDeltaSize(frag) - if err := checkBinarySrcSize(src, srcSize); err != nil { - return err - } - - dstSize, delta := readBinaryDeltaSize(delta) - - for len(delta) > 0 { - op := delta[0] - if op == 0 { - return errors.New("invalid delta opcode 0") - } - - var n int64 - var err error - switch op & 0x80 { - case 0x80: - n, delta, err = applyBinaryDeltaCopy(dst, op, delta[1:], src) - case 0x00: - n, delta, err = applyBinaryDeltaAdd(dst, op, delta[1:]) - } - if err != nil { - return err - } - dstSize -= n - } - - if dstSize != 0 { - return errors.New("corrupt binary delta: insufficient or extra data") - } - return nil -} - -// readBinaryDeltaSize reads a variable length size from a delta-encoded binary -// fragment, returing the size and the unused data. Data is encoded as: -// -// [[1xxxxxxx]...] [0xxxxxxx] -// -// in little-endian order, with 7 bits of the value per byte. -func readBinaryDeltaSize(d []byte) (size int64, rest []byte) { - shift := uint(0) - for i, b := range d { - size |= int64(b&0x7F) << shift - shift += 7 - if b <= 0x7F { - return size, d[i+1:] - } - } - return size, nil -} - -// applyBinaryDeltaAdd applies an add opcode in a delta-encoded binary -// fragment, returning the amount of data written and the usused part of the -// fragment. An add operation takes the form: -// -// [0xxxxxx][[data1]...] -// -// where the lower seven bits of the opcode is the number of data bytes -// following the opcode. See also pack-format.txt in the Git source. -func applyBinaryDeltaAdd(w io.Writer, op byte, delta []byte) (n int64, rest []byte, err error) { - size := int(op) - if len(delta) < size { - return 0, delta, errors.New("corrupt binary delta: incomplete add") - } - _, err = w.Write(delta[:size]) - return int64(size), delta[size:], err -} - -// applyBinaryDeltaCopy applies a copy opcode in a delta-encoded binary -// fragment, returing the amount of data written and the unused part of the -// fragment. A copy operation takes the form: -// -// [1xxxxxxx][offset1][offset2][offset3][offset4][size1][size2][size3] -// -// where the lower seven bits of the opcode determine which non-zero offset and -// size bytes are present in little-endian order: if bit 0 is set, offset1 is -// present, etc. If no offset or size bytes are present, offset is 0 and size -// is 0x10000. See also pack-format.txt in the Git source. -func applyBinaryDeltaCopy(w io.Writer, op byte, delta []byte, src io.ReaderAt) (n int64, rest []byte, err error) { - const defaultSize = 0x10000 - - unpack := func(start, bits uint) (v int64) { - for i := uint(0); i < bits; i++ { - mask := byte(1 << (i + start)) - if op&mask > 0 { - if len(delta) == 0 { - err = errors.New("corrupt binary delta: incomplete copy") - return - } - v |= int64(delta[0]) << (8 * i) - delta = delta[1:] - } - } - return - } - - offset := unpack(0, 4) - size := unpack(4, 3) - if err != nil { - return 0, delta, err - } - if size == 0 { - size = defaultSize - } - - // TODO(bkeyes): consider pooling these buffers - b := make([]byte, size) - if _, err := src.ReadAt(b, offset); err != nil { - return 0, delta, err - } - - _, err = w.Write(b) - return size, delta, err -} - -func checkBinarySrcSize(r io.ReaderAt, size int64) error { - ok, err := isLen(r, size) - if err != nil { + // nothing to apply, just copy all the data + _, err := copyFrom(dst, src, 0) return err } - if !ok { - return &Conflict{"fragment src size does not match actual src size"} - } - return nil } diff --git a/gitdiff/apply_binary.go b/gitdiff/apply_binary.go new file mode 100644 index 0000000..b1ff4c1 --- /dev/null +++ b/gitdiff/apply_binary.go @@ -0,0 +1,206 @@ +package gitdiff + +import ( + "errors" + "fmt" + "io" +) + +// BinaryApplier applies binary changes described in a fragment to source data. +// The applier must be closed after use. +type BinaryApplier struct { + dst io.Writer + src io.ReaderAt + + closed bool + dirty bool +} + +// NewBinaryApplier creates an BinaryApplier that reads data from src and +// writes modified data to dst. +func NewBinaryApplier(dst io.Writer, src io.ReaderAt) *BinaryApplier { + a := BinaryApplier{ + dst: dst, + src: src, + } + return &a +} + +// ApplyFragment applies the changes in the fragment f and writes the result to +// dst. ApplyFragment can be called at most once. +// +// If an error occurs while applying, ApplyFragment returns an *ApplyError that +// annotates the error with additional information. If the error is because of +// a conflict between a fragment and the source, the wrapped error will be a +// *Conflict. +func (a *BinaryApplier) ApplyFragment(f *BinaryFragment) error { + if f == nil { + return applyError(errors.New("nil fragment")) + } + if a.closed { + return applyError(errApplierClosed) + } + if a.dirty { + return applyError(errApplyInProgress) + } + + // mark an apply as in progress, even if it fails before making changes + a.dirty = true + + switch f.Method { + case BinaryPatchLiteral: + if _, err := a.dst.Write(f.Data); err != nil { + return applyError(err) + } + case BinaryPatchDelta: + if err := applyBinaryDeltaFragment(a.dst, a.src, f.Data); err != nil { + return applyError(err) + } + default: + return applyError(fmt.Errorf("unsupported binary patch method: %v", f.Method)) + } + return nil +} + +// Close writes any data following the last applied fragment and prevents +// future calls to ApplyFragment. +func (a *BinaryApplier) Close() (err error) { + if a.closed { + return nil + } + + a.closed = true + if !a.dirty { + _, err = copyFrom(a.dst, a.src, 0) + } else { + // do nothing, applying a binary fragment copies all data + } + return err +} + +func applyBinaryDeltaFragment(dst io.Writer, src io.ReaderAt, frag []byte) error { + srcSize, delta := readBinaryDeltaSize(frag) + if err := checkBinarySrcSize(src, srcSize); err != nil { + return err + } + + dstSize, delta := readBinaryDeltaSize(delta) + + for len(delta) > 0 { + op := delta[0] + if op == 0 { + return errors.New("invalid delta opcode 0") + } + + var n int64 + var err error + switch op & 0x80 { + case 0x80: + n, delta, err = applyBinaryDeltaCopy(dst, op, delta[1:], src) + case 0x00: + n, delta, err = applyBinaryDeltaAdd(dst, op, delta[1:]) + } + if err != nil { + return err + } + dstSize -= n + } + + if dstSize != 0 { + return errors.New("corrupt binary delta: insufficient or extra data") + } + return nil +} + +// readBinaryDeltaSize reads a variable length size from a delta-encoded binary +// fragment, returing the size and the unused data. Data is encoded as: +// +// [[1xxxxxxx]...] [0xxxxxxx] +// +// in little-endian order, with 7 bits of the value per byte. +func readBinaryDeltaSize(d []byte) (size int64, rest []byte) { + shift := uint(0) + for i, b := range d { + size |= int64(b&0x7F) << shift + shift += 7 + if b <= 0x7F { + return size, d[i+1:] + } + } + return size, nil +} + +// applyBinaryDeltaAdd applies an add opcode in a delta-encoded binary +// fragment, returning the amount of data written and the usused part of the +// fragment. An add operation takes the form: +// +// [0xxxxxx][[data1]...] +// +// where the lower seven bits of the opcode is the number of data bytes +// following the opcode. See also pack-format.txt in the Git source. +func applyBinaryDeltaAdd(w io.Writer, op byte, delta []byte) (n int64, rest []byte, err error) { + size := int(op) + if len(delta) < size { + return 0, delta, errors.New("corrupt binary delta: incomplete add") + } + _, err = w.Write(delta[:size]) + return int64(size), delta[size:], err +} + +// applyBinaryDeltaCopy applies a copy opcode in a delta-encoded binary +// fragment, returing the amount of data written and the unused part of the +// fragment. A copy operation takes the form: +// +// [1xxxxxxx][offset1][offset2][offset3][offset4][size1][size2][size3] +// +// where the lower seven bits of the opcode determine which non-zero offset and +// size bytes are present in little-endian order: if bit 0 is set, offset1 is +// present, etc. If no offset or size bytes are present, offset is 0 and size +// is 0x10000. See also pack-format.txt in the Git source. +func applyBinaryDeltaCopy(w io.Writer, op byte, delta []byte, src io.ReaderAt) (n int64, rest []byte, err error) { + const defaultSize = 0x10000 + + unpack := func(start, bits uint) (v int64) { + for i := uint(0); i < bits; i++ { + mask := byte(1 << (i + start)) + if op&mask > 0 { + if len(delta) == 0 { + err = errors.New("corrupt binary delta: incomplete copy") + return + } + v |= int64(delta[0]) << (8 * i) + delta = delta[1:] + } + } + return + } + + offset := unpack(0, 4) + size := unpack(4, 3) + if err != nil { + return 0, delta, err + } + if size == 0 { + size = defaultSize + } + + // TODO(bkeyes): consider pooling these buffers + b := make([]byte, size) + if _, err := src.ReadAt(b, offset); err != nil { + return 0, delta, err + } + + _, err = w.Write(b) + return size, delta, err +} + +func checkBinarySrcSize(r io.ReaderAt, size int64) error { + ok, err := isLen(r, size) + if err != nil { + return err + } + if !ok { + return &Conflict{"fragment src size does not match actual src size"} + } + return nil +} diff --git a/gitdiff/apply_test.go b/gitdiff/apply_test.go index d981e96..dd076bb 100644 --- a/gitdiff/apply_test.go +++ b/gitdiff/apply_test.go @@ -9,69 +9,6 @@ import ( "testing" ) -func TestApplierInvariants(t *testing.T) { - binary := &BinaryFragment{ - Method: BinaryPatchLiteral, - Size: 2, - Data: []byte("\xbe\xef"), - } - - text := &TextFragment{ - NewPosition: 1, - NewLines: 1, - LinesAdded: 1, - Lines: []Line{ - {Op: OpAdd, Line: "new line\n"}, - }, - } - - file := &File{ - TextFragments: []*TextFragment{text}, - } - - src := bytes.NewReader(nil) - dst := ioutil.Discard - - assertInProgress := func(t *testing.T, kind string, err error) { - if !errors.Is(err, errApplyInProgress) { - t.Fatalf("expected in-progress error for %s apply, but got: %v", kind, err) - } - } - - t.Run("binaryFirst", func(t *testing.T) { - a := NewApplier(src) - if err := a.ApplyBinaryFragment(dst, binary); err != nil { - t.Fatalf("unexpected error applying fragment: %v", err) - } - assertInProgress(t, "text", a.ApplyTextFragment(dst, text)) - assertInProgress(t, "binary", a.ApplyBinaryFragment(dst, binary)) - assertInProgress(t, "file", a.ApplyFile(dst, file)) - }) - - t.Run("textFirst", func(t *testing.T) { - a := NewApplier(src) - if err := a.ApplyTextFragment(dst, text); err != nil { - t.Fatalf("unexpected error applying fragment: %v", err) - } - // additional text fragments are allowed - if err := a.ApplyTextFragment(dst, text); err != nil { - t.Fatalf("unexpected error applying second fragment: %v", err) - } - assertInProgress(t, "binary", a.ApplyBinaryFragment(dst, binary)) - assertInProgress(t, "file", a.ApplyFile(dst, file)) - }) - - t.Run("fileFirst", func(t *testing.T) { - a := NewApplier(src) - if err := a.ApplyFile(dst, file); err != nil { - t.Fatalf("unexpected error applying file: %v", err) - } - assertInProgress(t, "text", a.ApplyTextFragment(dst, text)) - assertInProgress(t, "binary", a.ApplyBinaryFragment(dst, binary)) - assertInProgress(t, "file", a.ApplyFile(dst, file)) - }) -} - func TestApplyTextFragment(t *testing.T) { tests := map[string]applyTest{ "createFile": {Files: getApplyFiles("text_fragment_new")}, @@ -127,11 +64,12 @@ func TestApplyTextFragment(t *testing.T) { for name, test := range tests { t.Run(name, func(t *testing.T) { - test.run(t, func(w io.Writer, applier *Applier, file *File) error { + test.run(t, func(dst io.Writer, src io.ReaderAt, file *File) error { if len(file.TextFragments) != 1 { t.Fatalf("patch should contain exactly one fragment, but it has %d", len(file.TextFragments)) } - return applier.ApplyTextFragment(w, file.TextFragments[0]) + applier := NewTextApplier(dst, src) + return applier.ApplyFragment(file.TextFragments[0]) }) }) } @@ -176,8 +114,9 @@ func TestApplyBinaryFragment(t *testing.T) { for name, test := range tests { t.Run(name, func(t *testing.T) { - test.run(t, func(w io.Writer, applier *Applier, file *File) error { - return applier.ApplyBinaryFragment(w, file.BinaryFragment) + test.run(t, func(dst io.Writer, src io.ReaderAt, file *File) error { + applier := NewBinaryApplier(dst, src) + return applier.ApplyFragment(file.BinaryFragment) }) }) } @@ -216,8 +155,8 @@ func TestApplyFile(t *testing.T) { for name, test := range tests { t.Run(name, func(t *testing.T) { - test.run(t, func(w io.Writer, applier *Applier, file *File) error { - return applier.ApplyFile(w, file) + test.run(t, func(dst io.Writer, src io.ReaderAt, file *File) error { + return Apply(dst, src, file) }) }) } @@ -228,7 +167,7 @@ type applyTest struct { Err interface{} } -func (at applyTest) run(t *testing.T, apply func(io.Writer, *Applier, *File) error) { +func (at applyTest) run(t *testing.T, apply func(io.Writer, io.ReaderAt, *File) error) { src, patch, out := at.Files.Load(t) files, _, err := Parse(bytes.NewReader(patch)) @@ -239,10 +178,8 @@ func (at applyTest) run(t *testing.T, apply func(io.Writer, *Applier, *File) err t.Fatalf("patch should contain exactly one file, but it has %d", len(files)) } - applier := NewApplier(bytes.NewReader(src)) - var dst bytes.Buffer - err = apply(&dst, applier, files[0]) + err = apply(&dst, bytes.NewReader(src), files[0]) if at.Err != nil { assertError(t, at.Err, err, "applying fragment") return diff --git a/gitdiff/apply_text.go b/gitdiff/apply_text.go new file mode 100644 index 0000000..a404552 --- /dev/null +++ b/gitdiff/apply_text.go @@ -0,0 +1,153 @@ +package gitdiff + +import ( + "io" +) + +// TextApplier applies changes described in text fragments to source data. If +// changes are described in multiple fragments, those fragments must be applied +// in order. The applier must be closed after use. +// +// By default, TextApplier operates in "strict" mode, where fragment content +// and positions must exactly match those of the source. +// +type TextApplier struct { + dst io.Writer + src io.ReaderAt + lineSrc LineReaderAt + nextLine int64 + + closed bool + dirty bool +} + +// NewTextApplier creates a TextApplier that reads data from src and writes +// modified data to dst. If src implements LineReaderAt, it is used directly. +func NewTextApplier(dst io.Writer, src io.ReaderAt) *TextApplier { + a := TextApplier{ + dst: dst, + src: src, + } + + if lineSrc, ok := src.(LineReaderAt); ok { + a.lineSrc = lineSrc + } else { + a.lineSrc = &lineReaderAt{r: src} + } + + return &a +} + +// ApplyFragment applies the changes in the fragment f, writing unwritten data +// before the start of the fragment and any changes from the fragment. If +// multiple text fragments apply to the same content, ApplyFragment must be +// called in order of increasing start position. As a result, each fragment can +// be applied at most once. +// +// If an error occurs while applying, ApplyFragment returns an *ApplyError that +// annotates the error with additional information. If the error is because of +// a conflict between the fragment and the source, the wrapped error will be a +// *Conflict. +func (a *TextApplier) ApplyFragment(f *TextFragment) error { + if a.closed { + return applyError(errApplierClosed) + } + + // mark an apply as in progress, even if it fails before making changes + a.dirty = true + + // application code assumes fragment fields are consistent + if err := f.Validate(); err != nil { + return applyError(err) + } + + // lines are 0-indexed, positions are 1-indexed (but new files have position = 0) + fragStart := f.OldPosition - 1 + if fragStart < 0 { + fragStart = 0 + } + fragEnd := fragStart + f.OldLines + + start := a.nextLine + if fragStart < start { + return applyError(&Conflict{"fragment overlaps with an applied fragment"}) + } + + if f.OldPosition == 0 { + ok, err := isLen(a.src, 0) + if err != nil { + return applyError(err) + } + if !ok { + return applyError(&Conflict{"cannot create new file from non-empty src"}) + } + } + + preimage := make([][]byte, fragEnd-start) + n, err := a.lineSrc.ReadLinesAt(preimage, start) + if err != nil { + return applyError(err, lineNum(start+int64(n))) + } + + // copy leading data before the fragment starts + for i, line := range preimage[:fragStart-start] { + if _, err := a.dst.Write(line); err != nil { + a.nextLine = start + int64(i) + return applyError(err, lineNum(a.nextLine)) + } + } + preimage = preimage[fragStart-start:] + + // apply the changes in the fragment + used := int64(0) + for i, line := range f.Lines { + if err := applyTextLine(a.dst, line, preimage, used); err != nil { + a.nextLine = fragStart + used + return applyError(err, lineNum(a.nextLine), fragLineNum(i)) + } + if line.Old() { + used++ + } + } + a.nextLine = fragStart + used + + // new position of +0,0 mean a full delete, so check for leftovers + if f.NewPosition == 0 && f.NewLines == 0 { + var b [1][]byte + n, err := a.lineSrc.ReadLinesAt(b[:], a.nextLine) + if err != nil && err != io.EOF { + return applyError(err, lineNum(a.nextLine)) + } + if n > 0 { + return applyError(&Conflict{"src still has content after full delete"}, lineNum(a.nextLine)) + } + } + + return nil +} + +func applyTextLine(dst io.Writer, line Line, preimage [][]byte, i int64) (err error) { + if line.Old() && string(preimage[i]) != line.Line { + return &Conflict{"fragment line does not match src line"} + } + if line.New() { + _, err = io.WriteString(dst, line.Line) + } + return err +} + +// Close writes any data following the last applied fragment and prevents +// future calls to ApplyFragment. +func (a *TextApplier) Close() (err error) { + if a.closed { + return nil + } + + a.closed = true + if !a.dirty { + _, err = copyFrom(a.dst, a.src, 0) + } else { + _, err = copyLinesFrom(a.dst, a.lineSrc, a.nextLine) + } + return err +} From 071689e91f0247ca1fbd40fec4c955d7d5bfa713 Mon Sep 17 00:00:00 2001 From: Billy Keyes Date: Fri, 30 Sep 2022 22:41:17 -0700 Subject: [PATCH 07/23] Test with Go 1.19, upgrade golangci-lint (#35) The previous version of the golangci-lint action would install its own version of Go, which eventually conflicted with the old pinned version of the linter I was using. Upgrade the action to avoid this, but also update Go and the linter while I'm here. --- .github/workflows/go.yml | 10 +++++----- gitdiff/apply.go | 7 +++---- gitdiff/apply_binary.go | 6 +++--- gitdiff/apply_text.go | 1 - gitdiff/testdata/apply/bin.go | 2 +- 5 files changed, 12 insertions(+), 14 deletions(-) diff --git a/.github/workflows/go.yml b/.github/workflows/go.yml index 13ed2a9..558965e 100644 --- a/.github/workflows/go.yml +++ b/.github/workflows/go.yml @@ -9,18 +9,18 @@ jobs: name: Verify runs-on: ubuntu-latest steps: - - name: Set up Go 1.16 - uses: actions/setup-go@v2 + - name: Set up Go 1.19 + uses: actions/setup-go@v3 with: - go-version: 1.16 + go-version: 1.19 - name: Check out code into the Go module directory uses: actions/checkout@v2 - name: Lint - uses: golangci/golangci-lint-action@v2 + uses: golangci/golangci-lint-action@v3 with: - version: v1.28 + version: v1.49 - name: Test run: go test -v ./... diff --git a/gitdiff/apply.go b/gitdiff/apply.go index cc4c3f0..44bbcca 100644 --- a/gitdiff/apply.go +++ b/gitdiff/apply.go @@ -13,10 +13,9 @@ import ( // Users can test if an error was caused by a conflict by using errors.Is with // an empty Conflict: // -// if errors.Is(err, &Conflict{}) { -// // handle conflict -// } -// +// if errors.Is(err, &Conflict{}) { +// // handle conflict +// } type Conflict struct { msg string } diff --git a/gitdiff/apply_binary.go b/gitdiff/apply_binary.go index b1ff4c1..b34772d 100644 --- a/gitdiff/apply_binary.go +++ b/gitdiff/apply_binary.go @@ -115,7 +115,7 @@ func applyBinaryDeltaFragment(dst io.Writer, src io.ReaderAt, frag []byte) error // readBinaryDeltaSize reads a variable length size from a delta-encoded binary // fragment, returing the size and the unused data. Data is encoded as: // -// [[1xxxxxxx]...] [0xxxxxxx] +// [[1xxxxxxx]...] [0xxxxxxx] // // in little-endian order, with 7 bits of the value per byte. func readBinaryDeltaSize(d []byte) (size int64, rest []byte) { @@ -134,7 +134,7 @@ func readBinaryDeltaSize(d []byte) (size int64, rest []byte) { // fragment, returning the amount of data written and the usused part of the // fragment. An add operation takes the form: // -// [0xxxxxx][[data1]...] +// [0xxxxxx][[data1]...] // // where the lower seven bits of the opcode is the number of data bytes // following the opcode. See also pack-format.txt in the Git source. @@ -151,7 +151,7 @@ func applyBinaryDeltaAdd(w io.Writer, op byte, delta []byte) (n int64, rest []by // fragment, returing the amount of data written and the unused part of the // fragment. A copy operation takes the form: // -// [1xxxxxxx][offset1][offset2][offset3][offset4][size1][size2][size3] +// [1xxxxxxx][offset1][offset2][offset3][offset4][size1][size2][size3] // // where the lower seven bits of the opcode determine which non-zero offset and // size bytes are present in little-endian order: if bit 0 is set, offset1 is diff --git a/gitdiff/apply_text.go b/gitdiff/apply_text.go index a404552..43af83a 100644 --- a/gitdiff/apply_text.go +++ b/gitdiff/apply_text.go @@ -10,7 +10,6 @@ import ( // // By default, TextApplier operates in "strict" mode, where fragment content // and positions must exactly match those of the source. -// type TextApplier struct { dst io.Writer src io.ReaderAt diff --git a/gitdiff/testdata/apply/bin.go b/gitdiff/testdata/apply/bin.go index 6d39ffd..e34f06b 100644 --- a/gitdiff/testdata/apply/bin.go +++ b/gitdiff/testdata/apply/bin.go @@ -1,4 +1,4 @@ -//+build ignore +//go:build ignore // bin.go is a helper CLI to manipulate binary diff data for testing purposes. // It can decode patches generated by git using the standard parsing functions From dc43dbf8c7a47db0157e6261151531db188b3e3f Mon Sep 17 00:00:00 2001 From: Billy Keyes Date: Fri, 30 Sep 2022 22:43:40 -0700 Subject: [PATCH 08/23] Slightly simplify patch header parsing (#34) Use standard string functions instead of removing leading whitespace in a custom loop. This also makes the distinction between the two types of mail header parsing a bit clearer. --- gitdiff/patch_header.go | 56 ++++++++++++++++++------------------ gitdiff/patch_header_test.go | 4 +++ 2 files changed, 32 insertions(+), 28 deletions(-) diff --git a/gitdiff/patch_header.go b/gitdiff/patch_header.go index f2c3868..da71d7e 100644 --- a/gitdiff/patch_header.go +++ b/gitdiff/patch_header.go @@ -190,35 +190,35 @@ func ParsePatchDate(s string) (time.Time, error) { // prefix and appendix material should use `PatchHeader.SubjectPrefix // + PatchHeader.Title + "\n" + PatchHeader.Body + "\n" + // PatchHeader.BodyAppendix`. -func ParsePatchHeader(s string) (*PatchHeader, error) { - r := bufio.NewReader(strings.NewReader(s)) - - var line string - for { - var err error - line, err = r.ReadString('\n') - if err == io.EOF { - break - } - if err != nil { - return nil, err - } +func ParsePatchHeader(header string) (*PatchHeader, error) { + header = strings.TrimSpace(header) - line = strings.TrimSpace(line) - if len(line) > 0 { - break - } + if header == "" { + return &PatchHeader{}, nil + } + + var firstLine, rest string + if idx := strings.IndexByte(header, '\n'); idx >= 0 { + firstLine = header[:idx] + rest = header[idx+1:] + } else { + firstLine = header + rest = "" } switch { - case strings.HasPrefix(line, mailHeaderPrefix): - return parseHeaderMail(line, r) - case strings.HasPrefix(line, mailMinimumHeaderPrefix): - r = bufio.NewReader(strings.NewReader(s)) - return parseHeaderMail("", r) - case strings.HasPrefix(line, prettyHeaderPrefix): - return parseHeaderPretty(line, r) + case strings.HasPrefix(firstLine, mailHeaderPrefix): + return parseHeaderMail(firstLine, strings.NewReader(rest)) + + case strings.HasPrefix(firstLine, mailMinimumHeaderPrefix): + // With a minimum header, the first line is part of the actual mail + // content and needs to be parsed as part of the "rest" + return parseHeaderMail("", strings.NewReader(header)) + + case strings.HasPrefix(firstLine, prettyHeaderPrefix): + return parseHeaderPretty(firstLine, strings.NewReader(rest)) } + return nil, errors.New("unrecognized patch header format") } @@ -233,7 +233,7 @@ func parseHeaderPretty(prettyLine string, r io.Reader) (*PatchHeader, error) { h := &PatchHeader{} - prettyLine = prettyLine[len(prettyHeaderPrefix):] + prettyLine = strings.TrimPrefix(prettyLine, prettyHeaderPrefix) if i := strings.IndexByte(prettyLine, ' '); i > 0 { h.SHA = prettyLine[:i] } else { @@ -297,7 +297,7 @@ func parseHeaderPretty(prettyLine string, r io.Reader) (*PatchHeader, error) { h.Title = title if title != "" { - // Don't check for an appendix + // Don't check for an appendix, pretty headers do not contain them body, _ := scanMessageBody(s, indent, false) if s.Err() != nil { return nil, s.Err() @@ -374,8 +374,8 @@ func parseHeaderMail(mailLine string, r io.Reader) (*PatchHeader, error) { h := &PatchHeader{} - if len(mailLine) > len(mailHeaderPrefix) { - mailLine = mailLine[len(mailHeaderPrefix):] + if strings.HasPrefix(mailLine, mailHeaderPrefix) { + mailLine = strings.TrimPrefix(mailLine, mailHeaderPrefix) if i := strings.IndexByte(mailLine, ' '); i > 0 { h.SHA = mailLine[:i] } diff --git a/gitdiff/patch_header_test.go b/gitdiff/patch_header_test.go index bda91fe..57207a5 100644 --- a/gitdiff/patch_header_test.go +++ b/gitdiff/patch_header_test.go @@ -414,6 +414,10 @@ Author: Morton Haypenny Title: expectedTitle, }, }, + "empty": { + Input: "", + Header: PatchHeader{}, + }, } for name, test := range tests { From 03daf96518a95647d85122e6d107ee63c0bc5b58 Mon Sep 17 00:00:00 2001 From: Billy Keyes Date: Sat, 1 Oct 2022 14:47:39 -0700 Subject: [PATCH 09/23] Add option to control patch subject cleaning (#36) When processing mail-formatted patches, the default cleanup removed all leading content in square brackets, but this pattern is often used to identify tickets or other information that should remain in the commit title. Git supports disabling this the the `-k` and `-b` flags, which we simulate with the new SubjectCleanMode options. Use WithSubjectCleanMode(SubjectCleanPatchOnly) to only remove bracketed strings that contain "PATCH", keeping others that are (probably) part of the actual commit message. Note that because of the mail parsing library, we cannot replicate the `-k` flag exactly and always clean leading and trailing whitespace. --- README.md | 4 + gitdiff/patch_header.go | 128 ++++++++++++++--------- gitdiff/patch_header_test.go | 193 ++++++++++++++++++++++++++--------- 3 files changed, 232 insertions(+), 93 deletions(-) diff --git a/README.md b/README.md index 58bb675..01e8d2d 100644 --- a/README.md +++ b/README.md @@ -101,3 +101,7 @@ The parsing code has also had a modest amount of fuzz testing. context of each fragment must exactly match the source file; `git apply` implements a search algorithm that tries different lines and amounts of context, with further options to normalize or ignore whitespace changes. + +7. When parsing mail-formatted patch headers, leading and trailing whitespace + is always removed from `Subject` lines. There is no exact equivalent to `git + mailinfo -k`. diff --git a/gitdiff/patch_header.go b/gitdiff/patch_header.go index da71d7e..f935c6d 100644 --- a/gitdiff/patch_header.go +++ b/gitdiff/patch_header.go @@ -165,34 +165,71 @@ func ParsePatchDate(s string) (time.Time, error) { return time.Time{}, fmt.Errorf("unknown date format: %s", s) } -// ParsePatchHeader parses a preamble string as returned by Parse into a +// A PatchHeaderOption modifies the behavior of ParsePatchHeader. +type PatchHeaderOption func(*patchHeaderOptions) + +// SubjectCleanMode controls how ParsePatchHeader cleans subject lines when +// parsing mail-formatted patches. +type SubjectCleanMode int + +const ( + // SubjectCleanWhitespace removes leading and trailing whitespace. + SubjectCleanWhitespace SubjectCleanMode = iota + + // SubjectCleanAll removes leading and trailing whitespace, leading "Re:", + // "re:", and ":" strings, and leading strings enclosed by '[' and ']'. + // This is the default behavior of git (see `git mailinfo`) and this + // package. + SubjectCleanAll + + // SubjectCleanPatchOnly is the same as SubjectCleanAll, but only removes + // leading strings enclosed by '[' and ']' if they start with "PATCH". + SubjectCleanPatchOnly +) + +// WithSubjectCleanMode sets the SubjectCleanMode for header parsing. By +// default, uses SubjectCleanAll. +func WithSubjectCleanMode(m SubjectCleanMode) PatchHeaderOption { + return func(opts *patchHeaderOptions) { + opts.subjectCleanMode = m + } +} + +type patchHeaderOptions struct { + subjectCleanMode SubjectCleanMode +} + +// ParsePatchHeader parses the preamble string returned by [Parse] into a // PatchHeader. Due to the variety of header formats, some fields of the parsed // PatchHeader may be unset after parsing. // // Supported formats are the short, medium, full, fuller, and email pretty -// formats used by git diff, git log, and git show and the UNIX mailbox format -// used by git format-patch. +// formats used by `git diff`, `git log`, and `git show` and the UNIX mailbox +// format used by `git format-patch`. // -// If ParsePatchHeader detects that it is handling an email, it will -// remove extra content at the beginning of the title line, such as -// `[PATCH]` or `Re:` in the same way that `git mailinfo` does. -// SubjectPrefix will be set to the value of this removed string. -// (`git mailinfo` is the core part of `git am` that pulls information -// out of an individual mail.) +// When parsing mail-formatted headers, ParsePatchHeader tries to remove +// email-specific content from the title and body: // -// Additionally, if ParsePatchHeader detects that it's handling an -// email, it will remove a `---` line and put anything after it into -// BodyAppendix. +// - Based on the SubjectCleanMode, remove prefixes like reply markers and +// "[PATCH]" strings from the subject, saving any removed content in the +// SubjectPrefix field. Parsing always discards leading and trailing +// whitespace from the subject line. The default mode is SubjectCleanAll. // -// Those wishing the effect of a plain `git am` should use -// `PatchHeader.Title + "\n" + PatchHeader.Body` (or -// `PatchHeader.Message()`). Those wishing to retain the subject -// prefix and appendix material should use `PatchHeader.SubjectPrefix -// + PatchHeader.Title + "\n" + PatchHeader.Body + "\n" + -// PatchHeader.BodyAppendix`. -func ParsePatchHeader(header string) (*PatchHeader, error) { - header = strings.TrimSpace(header) +// - If the body contains a "---" line (3 hyphens), remove that line and any +// content after it from the body and save it in the BodyAppendix field. +// +// ParsePatchHeader tries to process content it does not understand wthout +// returning errors, but will return errors if well-identified content like +// dates or identies uses unknown or invalid formats. +func ParsePatchHeader(header string, options ...PatchHeaderOption) (*PatchHeader, error) { + opts := patchHeaderOptions{ + subjectCleanMode: SubjectCleanAll, // match git defaults + } + for _, optFn := range options { + optFn(&opts) + } + header = strings.TrimSpace(header) if header == "" { return &PatchHeader{}, nil } @@ -208,12 +245,12 @@ func ParsePatchHeader(header string) (*PatchHeader, error) { switch { case strings.HasPrefix(firstLine, mailHeaderPrefix): - return parseHeaderMail(firstLine, strings.NewReader(rest)) + return parseHeaderMail(firstLine, strings.NewReader(rest), opts) case strings.HasPrefix(firstLine, mailMinimumHeaderPrefix): // With a minimum header, the first line is part of the actual mail // content and needs to be parsed as part of the "rest" - return parseHeaderMail("", strings.NewReader(header)) + return parseHeaderMail("", strings.NewReader(header), opts) case strings.HasPrefix(firstLine, prettyHeaderPrefix): return parseHeaderPretty(firstLine, strings.NewReader(rest)) @@ -366,7 +403,7 @@ func scanMessageBody(s *bufio.Scanner, indent string, separateAppendix bool) (st return body.String(), appendix.String() } -func parseHeaderMail(mailLine string, r io.Reader) (*PatchHeader, error) { +func parseHeaderMail(mailLine string, r io.Reader, opts patchHeaderOptions) (*PatchHeader, error) { msg, err := mail.ReadMessage(r) if err != nil { return nil, err @@ -403,7 +440,7 @@ func parseHeaderMail(mailLine string, r io.Reader) (*PatchHeader, error) { } subject := msg.Header.Get("Subject") - h.SubjectPrefix, h.Title = parseSubject(subject) + h.SubjectPrefix, h.Title = cleanSubject(subject, opts.subjectCleanMode) s := bufio.NewScanner(msg.Body) h.Body, h.BodyAppendix = scanMessageBody(s, "", true) @@ -414,23 +451,24 @@ func parseHeaderMail(mailLine string, r io.Reader) (*PatchHeader, error) { return h, nil } -// Takes an email subject and returns the patch prefix and commit -// title. i.e., `[PATCH v3 3/5] Implement foo` would return `[PATCH -// v3 3/5] ` and `Implement foo` -func parseSubject(s string) (string, string) { - // This is meant to be compatible with - // https://github.com/git/git/blob/master/mailinfo.c:cleanup_subject(). - // If compatibility with `git am` drifts, go there to see if there - // are any updates. +func cleanSubject(s string, mode SubjectCleanMode) (prefix string, subject string) { + switch mode { + case SubjectCleanAll, SubjectCleanPatchOnly: + case SubjectCleanWhitespace: + return "", strings.TrimSpace(decodeSubject(s)) + default: + panic(fmt.Sprintf("unknown clean mode: %d", mode)) + } + + // Based on the algorithm from Git in mailinfo.c:cleanup_subject() + // If compatibility with `git am` drifts, go there to see if there are any updates. at := 0 for at < len(s) { switch s[at] { case 'r', 'R': // Detect re:, Re:, rE: and RE: - if at+2 < len(s) && - (s[at+1] == 'e' || s[at+1] == 'E') && - s[at+2] == ':' { + if at+2 < len(s) && (s[at+1] == 'e' || s[at+1] == 'E') && s[at+2] == ':' { at += 3 continue } @@ -441,25 +479,21 @@ func parseSubject(s string) (string, string) { continue case '[': - // Look for closing parenthesis - j := at + 1 - for ; j < len(s); j++ { - if s[j] == ']' { - break + if i := strings.IndexByte(s[at:], ']'); i > 0 { + if mode == SubjectCleanAll || strings.Contains(s[at:at+i+1], "PATCH") { + at += i + 1 + continue } } - - if j < len(s) { - at = j + 1 - continue - } } - // Only loop if we actually removed something + // Nothing was removed, end processing break } - return s[:at], decodeSubject(s[at:]) + prefix = strings.TrimLeftFunc(s[:at], unicode.IsSpace) + subject = strings.TrimRightFunc(decodeSubject(s[at:]), unicode.IsSpace) + return } // Decodes a subject line. Currently only supports quoted-printable UTF-8. This format is the result diff --git a/gitdiff/patch_header_test.go b/gitdiff/patch_header_test.go index 57207a5..003e8c7 100644 --- a/gitdiff/patch_header_test.go +++ b/gitdiff/patch_header_test.go @@ -144,9 +144,10 @@ func TestParsePatchHeader(t *testing.T) { expectedBodyAppendix := "CC: Joe Smith " tests := map[string]struct { - Input string - Header PatchHeader - Err interface{} + Input string + Options []PatchHeaderOption + Header PatchHeader + Err interface{} }{ "prettyShort": { Input: `commit 61f5cd90bed4d204ee3feb3aa41ee91d4734855b @@ -269,6 +270,28 @@ Another body line. Body: expectedBody, }, }, + "mailboxPatchOnly": { + Input: `From 61f5cd90bed4d204ee3feb3aa41ee91d4734855b Mon Sep 17 00:00:00 2001 +From: Morton Haypenny +Date: Sat, 11 Apr 2020 15:21:23 -0700 +Subject: [PATCH] [BUG-123] A sample commit to test header parsing + +The medium format shows the body, which +may wrap on to multiple lines. + +Another body line. +`, + Options: []PatchHeaderOption{ + WithSubjectCleanMode(SubjectCleanPatchOnly), + }, + Header: PatchHeader{ + SHA: expectedSHA, + Author: expectedIdentity, + AuthorDate: expectedDate, + Title: "[BUG-123] " + expectedTitle, + Body: expectedBody, + }, + }, "mailboxEmojiOneLine": { Input: `From 61f5cd90bed4d204ee3feb3aa41ee91d4734855b Mon Sep 17 00:00:00 2001 From: Morton Haypenny @@ -414,7 +437,7 @@ Author: Morton Haypenny Title: expectedTitle, }, }, - "empty": { + "emptyHeader": { Input: "", Header: PatchHeader{}, }, @@ -422,7 +445,7 @@ Author: Morton Haypenny for name, test := range tests { t.Run(name, func(t *testing.T) { - h, err := ParsePatchHeader(test.Input) + h, err := ParsePatchHeader(test.Input, test.Options...) if test.Err != nil { assertError(t, test.Err, err, "parsing patch header") return @@ -477,50 +500,128 @@ func assertPatchIdentity(t *testing.T, kind string, exp, act *PatchIdentity) { } } -func TestCleanupSubject(t *testing.T) { - exp := "A sample commit to test header parsing" - tests := map[string]string{ - "plain": "", - "patch": "[PATCH] ", - "patchv5": "[PATCH v5] ", - "patchrfc": "[PATCH RFC] ", - "patchnospace": "[PATCH]", - "space": " ", - "re": "re: ", - "Re": "Re: ", - "RE": "rE: ", - "rere": "re: re: ", - } - - for name, prefix := range tests { - gotprefix, gottitle := parseSubject(prefix + exp) - if gottitle != exp { - t.Errorf("%s: Incorrect parsing of prefix %s: got title %s, wanted %s", - name, prefix, gottitle, exp) - } - if gotprefix != prefix { - t.Errorf("%s: Incorrect parsing of prefix %s: got prefix %s", - name, prefix, gotprefix) - } - } +func TestCleanSubject(t *testing.T) { + expectedSubject := "A sample commit to test header parsing" - moretests := map[string]struct { - in, eprefix, etitle string + tests := map[string]struct { + Input string + Mode SubjectCleanMode + Prefix string + Subject string }{ - "Reimplement": {"Reimplement something", "", "Reimplement something"}, - "patch-reimplement": {"[PATCH v5] Reimplement something", "[PATCH v5] ", "Reimplement something"}, - "Openbracket": {"[Just to annoy people", "", "[Just to annoy people"}, + "CleanAll/noPrefix": { + Input: expectedSubject, + Mode: SubjectCleanAll, + Subject: expectedSubject, + }, + "CleanAll/patchPrefix": { + Input: "[PATCH] " + expectedSubject, + Mode: SubjectCleanAll, + Prefix: "[PATCH] ", + Subject: expectedSubject, + }, + "CleanAll/patchPrefixNoSpace": { + Input: "[PATCH]" + expectedSubject, + Mode: SubjectCleanAll, + Prefix: "[PATCH]", + Subject: expectedSubject, + }, + "CleanAll/patchPrefixContent": { + Input: "[PATCH 3/7] " + expectedSubject, + Mode: SubjectCleanAll, + Prefix: "[PATCH 3/7] ", + Subject: expectedSubject, + }, + "CleanAll/spacePrefix": { + Input: " " + expectedSubject, + Mode: SubjectCleanAll, + Subject: expectedSubject, + }, + "CleanAll/replyLowerPrefix": { + Input: "re: " + expectedSubject, + Mode: SubjectCleanAll, + Prefix: "re: ", + Subject: expectedSubject, + }, + "CleanAll/replyMixedPrefix": { + Input: "Re: " + expectedSubject, + Mode: SubjectCleanAll, + Prefix: "Re: ", + Subject: expectedSubject, + }, + "CleanAll/replyCapsPrefix": { + Input: "RE: " + expectedSubject, + Mode: SubjectCleanAll, + Prefix: "RE: ", + Subject: expectedSubject, + }, + "CleanAll/replyDoublePrefix": { + Input: "Re: re: " + expectedSubject, + Mode: SubjectCleanAll, + Prefix: "Re: re: ", + Subject: expectedSubject, + }, + "CleanAll/noPrefixSubjectHasRe": { + Input: "Reimplement parsing", + Mode: SubjectCleanAll, + Subject: "Reimplement parsing", + }, + "CleanAll/patchPrefixSubjectHasRe": { + Input: "[PATCH 1/2] Reimplement parsing", + Mode: SubjectCleanAll, + Prefix: "[PATCH 1/2] ", + Subject: "Reimplement parsing", + }, + "CleanAll/unclosedPrefix": { + Input: "[Just to annoy people", + Mode: SubjectCleanAll, + Subject: "[Just to annoy people", + }, + "CleanAll/multiplePrefix": { + Input: " Re:Re: [PATCH 1/2][DRAFT] " + expectedSubject + " ", + Mode: SubjectCleanAll, + Prefix: "Re:Re: [PATCH 1/2][DRAFT] ", + Subject: expectedSubject, + }, + "CleanPatchOnly/patchPrefix": { + Input: "[PATCH] " + expectedSubject, + Mode: SubjectCleanPatchOnly, + Prefix: "[PATCH] ", + Subject: expectedSubject, + }, + "CleanPatchOnly/mixedPrefix": { + Input: "[PATCH] [TICKET-123] " + expectedSubject, + Mode: SubjectCleanPatchOnly, + Prefix: "[PATCH] ", + Subject: "[TICKET-123] " + expectedSubject, + }, + "CleanPatchOnly/multiplePrefix": { + Input: "Re:Re: [PATCH 1/2][DRAFT] " + expectedSubject, + Mode: SubjectCleanPatchOnly, + Prefix: "Re:Re: [PATCH 1/2]", + Subject: "[DRAFT] " + expectedSubject, + }, + "CleanWhitespace/leadingSpace": { + Input: " [PATCH] " + expectedSubject, + Mode: SubjectCleanWhitespace, + Subject: "[PATCH] " + expectedSubject, + }, + "CleanWhitespace/trailingSpace": { + Input: "[PATCH] " + expectedSubject + " ", + Mode: SubjectCleanWhitespace, + Subject: "[PATCH] " + expectedSubject, + }, } - for name, test := range moretests { - prefix, title := parseSubject(test.in) - if title != test.etitle { - t.Errorf("%s: Incorrect parsing of %s: got title %s, wanted %s", - name, test.in, title, test.etitle) - } - if prefix != test.eprefix { - t.Errorf("%s: Incorrect parsing of %s: got prefix %s, wanted %s", - name, test.in, title, test.etitle) - } + for name, test := range tests { + t.Run(name, func(t *testing.T) { + prefix, subject := cleanSubject(test.Input, test.Mode) + if prefix != test.Prefix { + t.Errorf("incorrect prefix: expected %q, actual %q", test.Prefix, prefix) + } + if subject != test.Subject { + t.Errorf("incorrect subject: expected %q, actual %q", test.Subject, subject) + } + }) } } From 1575e0c4ef9322a5685ff76bd40b53363a98799e Mon Sep 17 00:00:00 2001 From: Billy Keyes Date: Sat, 1 Oct 2022 14:50:12 -0700 Subject: [PATCH 10/23] Remove warning about apply API changes --- README.md | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index 01e8d2d..e5fe24f 100644 --- a/README.md +++ b/README.md @@ -36,11 +36,9 @@ if err := gitdiff.Apply(&output, code, files[0]); err != nil { ## Development Status -The parsing API and types are complete and I expect will remain stable. As of -March 2022, I am refactoring the apply API to address existing issues and to -support non-strict application. Version 0.6.1 is the last release using the old -API and the `master` branch may contain breaking changes. Please use a release -version to avoid surprises. +The parsing API and types are complete and I expect will remain stable. Version +0.7.0 introduced a new apply API that may change more in the future to support +non-strict patch application. Parsing and strict application are well-covered by unit tests and the library is used in a production application that parses and applies thousands of From 981bc4b60c4490e10d9f8ca8cfd7833e303c399e Mon Sep 17 00:00:00 2001 From: Billy Keyes Date: Mon, 26 Dec 2022 17:56:30 -0500 Subject: [PATCH 11/23] Fix parsing of mode lines with trailing space (#38) If a patch is passed through a system that converts line endings to '\r\n', mode lines end up with trailing whitespace that confuses strconv.ParseInt. In Git, this is avoided by using strtoul() for parsing, which stops at the first non-digit character. Changing line endings in patch content itself will cause other problems so it is best to avoid this transform, but if it does happen, it shouldn't cause a parse error. --- gitdiff/file_header.go | 4 ++-- gitdiff/file_header_test.go | 21 +++++++++++++++++++++ 2 files changed, 23 insertions(+), 2 deletions(-) diff --git a/gitdiff/file_header.go b/gitdiff/file_header.go index 58904b4..77ceb5d 100644 --- a/gitdiff/file_header.go +++ b/gitdiff/file_header.go @@ -324,12 +324,12 @@ func parseGitHeaderNewName(f *File, line, defaultName string) error { } func parseGitHeaderOldMode(f *File, line, defaultName string) (err error) { - f.OldMode, err = parseMode(line) + f.OldMode, err = parseMode(strings.TrimSpace(line)) return } func parseGitHeaderNewMode(f *File, line, defaultName string) (err error) { - f.NewMode, err = parseMode(line) + f.NewMode, err = parseMode(strings.TrimSpace(line)) return } diff --git a/gitdiff/file_header_test.go b/gitdiff/file_header_test.go index 102795c..ef29833 100644 --- a/gitdiff/file_header_test.go +++ b/gitdiff/file_header_test.go @@ -486,6 +486,12 @@ func TestParseGitHeaderData(t *testing.T) { OldMode: os.FileMode(0100644), }, }, + "oldModeWithTrailingSpace": { + Line: "old mode 100644\r\n", + OutputFile: &File{ + OldMode: os.FileMode(0100644), + }, + }, "invalidOldMode": { Line: "old mode rw\n", Err: true, @@ -496,6 +502,12 @@ func TestParseGitHeaderData(t *testing.T) { NewMode: os.FileMode(0100755), }, }, + "newModeWithTrailingSpace": { + Line: "new mode 100755\r\n", + OutputFile: &File{ + NewMode: os.FileMode(0100755), + }, + }, "invalidNewMode": { Line: "new mode rwx\n", Err: true, @@ -518,6 +530,15 @@ func TestParseGitHeaderData(t *testing.T) { IsNew: true, }, }, + "newFileModeWithTrailingSpace": { + Line: "new file mode 100755\r\n", + DefaultName: "dir/file.txt", + OutputFile: &File{ + NewName: "dir/file.txt", + NewMode: os.FileMode(0100755), + IsNew: true, + }, + }, "copyFrom": { Line: "copy from dir/file.txt\n", OutputFile: &File{ From 13e8639a0b49cb49741b05f11ac52c530d2d38f0 Mon Sep 17 00:00:00 2001 From: Billy Keyes Date: Sun, 25 Feb 2024 12:39:09 -0800 Subject: [PATCH 12/23] Clarify that Parse expects a single patch --- gitdiff/parser.go | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/gitdiff/parser.go b/gitdiff/parser.go index d44465a..d2b29cb 100644 --- a/gitdiff/parser.go +++ b/gitdiff/parser.go @@ -12,6 +12,10 @@ import ( // Parse parses a patch with changes to one or more files. Any content before // the first file is returned as the second value. If an error occurs while // parsing, it returns all files parsed before the error. +// +// Parse expects to receive a single patch. If the input may contain multiple +// patches (for example, if it is an mbox file), callers should split it into +// individual patches and call Parse on each one. func Parse(r io.Reader) ([]*File, string, error) { p := newParser(r) From 3f2ea5c16e0a9d685e1b91b6b8bfc0fbf4cb5d6a Mon Sep 17 00:00:00 2001 From: Billy Keyes Date: Wed, 6 Mar 2024 21:32:38 -0800 Subject: [PATCH 13/23] Accept empty emails in ParsePatchIdentity (#42) Git is actually more lenient here than I thought. As long as the identity contains the "<>" delimiters, Git will allow an empty email, so we should accept the same thing. I also discovered that an identity with only an email set will use the email as the name, so I've implemented that behavior as well. --- gitdiff/patch_header.go | 21 ++++++++++++++------- gitdiff/patch_header_test.go | 22 ++++++++++++++++++++-- 2 files changed, 34 insertions(+), 9 deletions(-) diff --git a/gitdiff/patch_header.go b/gitdiff/patch_header.go index f935c6d..af3293b 100644 --- a/gitdiff/patch_header.go +++ b/gitdiff/patch_header.go @@ -82,12 +82,15 @@ func (i PatchIdentity) String() string { return fmt.Sprintf("%s <%s>", name, i.Email) } -// ParsePatchIdentity parses a patch identity string. A valid string contains a -// non-empty name followed by an email address in angle brackets. Like Git, -// ParsePatchIdentity does not require that the email address is valid or -// properly formatted, only that it is non-empty. The name must not contain a -// left angle bracket, '<', and the email address must not contain a right -// angle bracket, '>'. +// ParsePatchIdentity parses a patch identity string. A valid string contains +// an optional name followed by an email address in angle brackets. The angle +// brackets must always exist, but may enclose an empty address. At least one +// of the name or the email address must be non-empty. If the string only +// contains an email address, that value is also used as the name. +// +// The name must not contain a left angle bracket, '<', and the email address +// must not contain a right angle bracket, '>'. Otherwise, there are no +// restrictions on the format of either field. func ParsePatchIdentity(s string) (PatchIdentity, error) { var emailStart, emailEnd int for i, c := range s { @@ -110,7 +113,11 @@ func ParsePatchIdentity(s string) (PatchIdentity, error) { if emailStart > 0 && emailEnd > 0 { email = strings.TrimSpace(s[emailStart:emailEnd]) } - if name == "" || email == "" { + if name == "" && email != "" { + name = email + } + + if name == "" && email == "" { return PatchIdentity{}, fmt.Errorf("invalid identity string: %s", s) } diff --git a/gitdiff/patch_header_test.go b/gitdiff/patch_header_test.go index 003e8c7..33ec209 100644 --- a/gitdiff/patch_header_test.go +++ b/gitdiff/patch_header_test.go @@ -32,14 +32,32 @@ func TestParsePatchIdentity(t *testing.T) { Email: "mhaypenny@example.com", }, }, - "missingName": { + "onlyEmail": { Input: "", - Err: "invalid identity", + Output: PatchIdentity{ + Name: "mhaypenny@example.com", + Email: "mhaypenny@example.com", + }, + }, + "emptyEmail": { + Input: "Morton Haypenny <>", + Output: PatchIdentity{ + Name: "Morton Haypenny", + Email: "", + }, }, "missingEmail": { Input: "Morton Haypenny", Err: "invalid identity", }, + "missingNameAndEmptyEmail": { + Input: "<>", + Err: "invalid identity", + }, + "empty": { + Input: "", + Err: "invalid identity", + }, "unclosedEmail": { Input: "Morton Haypenny Date: Sun, 5 May 2024 23:16:19 -0400 Subject: [PATCH 14/23] Follow git logic when parsing patch identities (#44) When GitHub creates patches for Dependabot PRs, it generates a "From:" line that is not valid according to RFC 5322: the address spec contains unquoted special characters (the "[bot]" in "dependabot[bot]"). While the 'net/mail' parser makes some exceptions to the spec, this is not one of them, so parsing these patch headers fails. Git's 'mailinfo' command avoids this by only implementing the unquoting part of RFC 5322 and then applying a heuristic to separate the string in to name and email values that seem reasonable. This commit does two things: 1. Reimplements ParsePatchIdentity to follow Git's logic, so that it can accept a wider range of inputs, including quoted strings. Strings accepted by the previous implementation parse in the same way with one exception: inputs that contain whitespace inside the angle brackets for an email address now use the email address as the name and drop any separate name component. 2. When parsing mail-formatted patches, use ParsePatchIdentity to parse the "From:" line instead of the 'net/mail' function. --- gitdiff/patch_header.go | 71 ++------------ gitdiff/patch_header_test.go | 99 +++++--------------- gitdiff/patch_identity.go | 166 +++++++++++++++++++++++++++++++++ gitdiff/patch_identity_test.go | 127 +++++++++++++++++++++++++ 4 files changed, 321 insertions(+), 142 deletions(-) create mode 100644 gitdiff/patch_identity.go create mode 100644 gitdiff/patch_identity_test.go diff --git a/gitdiff/patch_header.go b/gitdiff/patch_header.go index af3293b..f047059 100644 --- a/gitdiff/patch_header.go +++ b/gitdiff/patch_header.go @@ -68,62 +68,6 @@ func (h *PatchHeader) Message() string { return msg.String() } -// PatchIdentity identifies a person who authored or committed a patch. -type PatchIdentity struct { - Name string - Email string -} - -func (i PatchIdentity) String() string { - name := i.Name - if name == "" { - name = `""` - } - return fmt.Sprintf("%s <%s>", name, i.Email) -} - -// ParsePatchIdentity parses a patch identity string. A valid string contains -// an optional name followed by an email address in angle brackets. The angle -// brackets must always exist, but may enclose an empty address. At least one -// of the name or the email address must be non-empty. If the string only -// contains an email address, that value is also used as the name. -// -// The name must not contain a left angle bracket, '<', and the email address -// must not contain a right angle bracket, '>'. Otherwise, there are no -// restrictions on the format of either field. -func ParsePatchIdentity(s string) (PatchIdentity, error) { - var emailStart, emailEnd int - for i, c := range s { - if c == '<' && emailStart == 0 { - emailStart = i + 1 - } - if c == '>' && emailStart > 0 { - emailEnd = i - break - } - } - if emailStart > 0 && emailEnd == 0 { - return PatchIdentity{}, fmt.Errorf("invalid identity string: unclosed email section: %s", s) - } - - var name, email string - if emailStart > 0 { - name = strings.TrimSpace(s[:emailStart-1]) - } - if emailStart > 0 && emailEnd > 0 { - email = strings.TrimSpace(s[emailStart:emailEnd]) - } - if name == "" && email != "" { - name = email - } - - if name == "" && email == "" { - return PatchIdentity{}, fmt.Errorf("invalid identity string: %s", s) - } - - return PatchIdentity{Name: name, Email: email}, nil -} - // ParsePatchDate parses a patch date string. It returns the parsed time or an // error if s has an unknown format. ParsePatchDate supports the iso, rfc, // short, raw, unix, and default formats (with local variants) used by the @@ -425,16 +369,13 @@ func parseHeaderMail(mailLine string, r io.Reader, opts patchHeaderOptions) (*Pa } } - addrs, err := msg.Header.AddressList("From") - if err != nil && !errors.Is(err, mail.ErrHeaderNotPresent) { - return nil, err - } - if len(addrs) > 0 { - addr := addrs[0] - if addr.Name == "" { - addr.Name = addr.Address + from := msg.Header.Get("From") + if from != "" { + u, err := ParsePatchIdentity(from) + if err != nil { + return nil, err } - h.Author = &PatchIdentity{Name: addr.Name, Email: addr.Address} + h.Author = &u } date := msg.Header.Get("Date") diff --git a/gitdiff/patch_header_test.go b/gitdiff/patch_header_test.go index 33ec209..c8559b0 100644 --- a/gitdiff/patch_header_test.go +++ b/gitdiff/patch_header_test.go @@ -5,83 +5,6 @@ import ( "time" ) -func TestParsePatchIdentity(t *testing.T) { - tests := map[string]struct { - Input string - Output PatchIdentity - Err interface{} - }{ - "simple": { - Input: "Morton Haypenny ", - Output: PatchIdentity{ - Name: "Morton Haypenny", - Email: "mhaypenny@example.com", - }, - }, - "extraWhitespace": { - Input: " Morton Haypenny ", - Output: PatchIdentity{ - Name: "Morton Haypenny", - Email: "mhaypenny@example.com", - }, - }, - "trailingCharacters": { - Input: "Morton Haypenny unrelated garbage", - Output: PatchIdentity{ - Name: "Morton Haypenny", - Email: "mhaypenny@example.com", - }, - }, - "onlyEmail": { - Input: "", - Output: PatchIdentity{ - Name: "mhaypenny@example.com", - Email: "mhaypenny@example.com", - }, - }, - "emptyEmail": { - Input: "Morton Haypenny <>", - Output: PatchIdentity{ - Name: "Morton Haypenny", - Email: "", - }, - }, - "missingEmail": { - Input: "Morton Haypenny", - Err: "invalid identity", - }, - "missingNameAndEmptyEmail": { - Input: "<>", - Err: "invalid identity", - }, - "empty": { - Input: "", - Err: "invalid identity", - }, - "unclosedEmail": { - Input: "Morton Haypenny +Date: Sat, 11 Apr 2020 15:21:23 -0700 +Subject: [PATCH] A sample commit to test header parsing + +The medium format shows the body, which +may wrap on to multiple lines. + +Another body line. +`, + Header: PatchHeader{ + SHA: expectedSHA, + Author: &PatchIdentity{ + Name: "dependabot[bot]", + Email: "12345+dependabot[bot]@users.noreply.github.com", + }, + AuthorDate: expectedDate, + Title: expectedTitle, + Body: expectedBody, + }, + }, "mailboxAppendix": { Input: `From 61f5cd90bed4d204ee3feb3aa41ee91d4734855b Mon Sep 17 00:00:00 2001 From: Morton Haypenny diff --git a/gitdiff/patch_identity.go b/gitdiff/patch_identity.go new file mode 100644 index 0000000..018f80c --- /dev/null +++ b/gitdiff/patch_identity.go @@ -0,0 +1,166 @@ +package gitdiff + +import ( + "fmt" + "strings" +) + +// PatchIdentity identifies a person who authored or committed a patch. +type PatchIdentity struct { + Name string + Email string +} + +func (i PatchIdentity) String() string { + name := i.Name + if name == "" { + name = `""` + } + return fmt.Sprintf("%s <%s>", name, i.Email) +} + +// ParsePatchIdentity parses a patch identity string. A patch identity contains +// an email address and an optional name in [RFC 5322] format. This is either a +// plain email adddress or a name followed by an address in angle brackets: +// +// author@example.com +// Author Name +// +// If the input is not one of these formats, ParsePatchIdentity applies a +// heuristic to separate the name and email portions. If both the name and +// email are missing or empty, ParsePatchIdentity returns an error. It +// otherwise does not validate the result. +// +// [RFC 5322]: https://datatracker.ietf.org/doc/html/rfc5322 +func ParsePatchIdentity(s string) (PatchIdentity, error) { + s = normalizeSpace(s) + s = unquotePairs(s) + + var name, email string + if at := strings.IndexByte(s, '@'); at >= 0 { + start, end := at, at + for start >= 0 && !isRFC5332Space(s[start]) && s[start] != '<' { + start-- + } + for end < len(s) && !isRFC5332Space(s[end]) && s[end] != '>' { + end++ + } + email = s[start+1 : end] + + // Adjust the boundaries so that we drop angle brackets, but keep + // spaces when removing the email to form the name. + if start < 0 || s[start] != '<' { + start++ + } + if end >= len(s) || s[end] != '>' { + end-- + } + name = s[:start] + s[end+1:] + } else { + start, end := 0, 0 + for i := 0; i < len(s); i++ { + if s[i] == '<' && start == 0 { + start = i + 1 + } + if s[i] == '>' && start > 0 { + end = i + break + } + } + if start > 0 && end >= start { + email = strings.TrimSpace(s[start:end]) + name = s[:start-1] + } + } + + // After extracting the email, the name might contain extra whitespace + // again and may be surrounded by comment characters. The git source gives + // these examples of when this can happen: + // + // "Name " + // "email@domain (Name)" + // "Name (Comment)" + // + name = normalizeSpace(name) + if strings.HasPrefix(name, "(") && strings.HasSuffix(name, ")") { + name = name[1 : len(name)-1] + } + name = strings.TrimSpace(name) + + // If the name is empty or contains email-like characters, use the email + // instead (assuming one exists) + if name == "" || strings.ContainsAny(name, "@<>") { + name = email + } + + if name == "" && email == "" { + return PatchIdentity{}, fmt.Errorf("invalid identity string %q", s) + } + return PatchIdentity{Name: name, Email: email}, nil +} + +// unquotePairs process the RFC5322 tokens "quoted-string" and "comment" to +// remove any "quoted-pairs" (backslash-espaced characters). It also removes +// the quotes from any quoted strings, but leaves the comment delimiters. +func unquotePairs(s string) string { + quote := false + comments := 0 + escaped := false + + var out strings.Builder + for i := 0; i < len(s); i++ { + if escaped { + escaped = false + } else { + switch s[i] { + case '\\': + // quoted-pair is only allowed in quoted-string/comment + if quote || comments > 0 { + escaped = true + continue // drop '\' character + } + + case '"': + if comments == 0 { + quote = !quote + continue // drop '"' character + } + + case '(': + if !quote { + comments++ + } + case ')': + if comments > 0 { + comments-- + } + } + } + out.WriteByte(s[i]) + } + return out.String() +} + +// normalizeSpace trims leading and trailing whitespace from s and converts +// inner sequences of one or more whitespace characters to single spaces. +func normalizeSpace(s string) string { + var sb strings.Builder + for i := 0; i < len(s); i++ { + c := s[i] + if !isRFC5332Space(c) { + if sb.Len() > 0 && isRFC5332Space(s[i-1]) { + sb.WriteByte(' ') + } + sb.WriteByte(c) + } + } + return sb.String() +} + +func isRFC5332Space(c byte) bool { + switch c { + case '\t', '\n', '\r', ' ': + return true + } + return false +} diff --git a/gitdiff/patch_identity_test.go b/gitdiff/patch_identity_test.go new file mode 100644 index 0000000..f15fe38 --- /dev/null +++ b/gitdiff/patch_identity_test.go @@ -0,0 +1,127 @@ +package gitdiff + +import ( + "testing" +) + +func TestParsePatchIdentity(t *testing.T) { + tests := map[string]struct { + Input string + Output PatchIdentity + Err interface{} + }{ + "simple": { + Input: "Morton Haypenny ", + Output: PatchIdentity{ + Name: "Morton Haypenny", + Email: "mhaypenny@example.com", + }, + }, + "extraWhitespace": { + Input: "\t Morton Haypenny \r\n ", + Output: PatchIdentity{ + Name: "Morton Haypenny", + Email: "mhaypenny@example.com", + }, + }, + "trailingCharacters": { + Input: "Morton Haypenny II", + Output: PatchIdentity{ + Name: "Morton Haypenny II", + Email: "mhaypenny@example.com", + }, + }, + "onlyEmail": { + Input: "mhaypenny@example.com", + Output: PatchIdentity{ + Name: "mhaypenny@example.com", + Email: "mhaypenny@example.com", + }, + }, + "onlyEmailInBrackets": { + Input: "", + Output: PatchIdentity{ + Name: "mhaypenny@example.com", + Email: "mhaypenny@example.com", + }, + }, + "rfc5322SpecialCharacters": { + Input: `"dependabot[bot]" <12345+dependabot[bot]@users.noreply.github.com>`, + Output: PatchIdentity{ + Name: "dependabot[bot]", + Email: "12345+dependabot[bot]@users.noreply.github.com", + }, + }, + "rfc5322QuotedPairs": { + Input: `"Morton \"Old-Timer\" Haypenny" <"mhaypenny\+[1900]"@example.com> (III \(PhD\))`, + Output: PatchIdentity{ + Name: `Morton "Old-Timer" Haypenny (III (PhD))`, + Email: "mhaypenny+[1900]@example.com", + }, + }, + "rfc5322QuotedPairsOutOfContext": { + Input: `Morton \\Backslash Haypenny `, + Output: PatchIdentity{ + Name: `Morton \\Backslash Haypenny`, + Email: "mhaypenny@example.com", + }, + }, + "emptyEmail": { + Input: "Morton Haypenny <>", + Output: PatchIdentity{ + Name: "Morton Haypenny", + Email: "", + }, + }, + "unclosedEmail": { + Input: "Morton Haypenny ", + Output: PatchIdentity{ + Name: "Morton Haypenny", + Email: "mhaypenny", + }, + }, + "bogusEmailWithWhitespace": { + Input: "Morton Haypenny < mhaypenny >", + Output: PatchIdentity{ + Name: "Morton Haypenny", + Email: "mhaypenny", + }, + }, + "missingEmail": { + Input: "Morton Haypenny", + Err: "invalid identity", + }, + "missingNameAndEmptyEmail": { + Input: "<>", + Err: "invalid identity", + }, + "empty": { + Input: "", + Err: "invalid identity", + }, + } + + for name, test := range tests { + t.Run(name, func(t *testing.T) { + id, err := ParsePatchIdentity(test.Input) + if test.Err != nil { + assertError(t, test.Err, err, "parsing identity") + return + } + if err != nil { + t.Fatalf("unexpected error parsing identity: %v", err) + } + + if test.Output != id { + t.Errorf("incorrect identity: expected %#v, actual %#v", test.Output, id) + } + }) + } +} From 0a4e55f9a190b500835e10d187ca4f88a96917e2 Mon Sep 17 00:00:00 2001 From: Billy Keyes Date: Sun, 14 Jul 2024 23:44:16 -0400 Subject: [PATCH 15/23] Return preamble when a patch has no files (#46) While empty patches with only a header were parsable, the parser discarded the preamble content. This meant callers had to handle this case specially. Now, if we reach the end of the input without finding a file, Parse() returns the full content of the patch as the preamble. --- gitdiff/file_header.go | 2 +- gitdiff/parser.go | 6 +++--- gitdiff/parser_test.go | 18 ++++++++++++++++-- gitdiff/testdata/no_files.patch | 8 ++++++++ 4 files changed, 28 insertions(+), 6 deletions(-) create mode 100644 gitdiff/testdata/no_files.patch diff --git a/gitdiff/file_header.go b/gitdiff/file_header.go index 77ceb5d..7ae4bc9 100644 --- a/gitdiff/file_header.go +++ b/gitdiff/file_header.go @@ -57,7 +57,7 @@ func (p *parser) ParseNextFileHeader() (*File, string, error) { return nil, "", err } } - return nil, "", nil + return nil, preamble.String(), nil } func (p *parser) ParseGitFileHeader() (*File, error) { diff --git a/gitdiff/parser.go b/gitdiff/parser.go index d2b29cb..e8f8430 100644 --- a/gitdiff/parser.go +++ b/gitdiff/parser.go @@ -33,6 +33,9 @@ func Parse(r io.Reader) ([]*File, string, error) { if err != nil { return files, preamble, err } + if len(files) == 0 { + preamble = pre + } if file == nil { break } @@ -50,9 +53,6 @@ func Parse(r io.Reader) ([]*File, string, error) { } } - if len(files) == 0 { - preamble = pre - } files = append(files, file) } diff --git a/gitdiff/parser_test.go b/gitdiff/parser_test.go index 30f59f4..15a5d67 100644 --- a/gitdiff/parser_test.go +++ b/gitdiff/parser_test.go @@ -281,8 +281,13 @@ this is another line --- could this be a header? nope, it's just some dashes `, - Output: nil, - Preamble: "", + Output: nil, + Preamble: ` +this is a line +this is another line +--- could this be a header? +nope, it's just some dashes +`, }, "detatchedFragmentLike": { Input: ` @@ -290,6 +295,10 @@ a wild fragment appears? @@ -1,3 +1,4 ~1,5 @@ `, Output: nil, + Preamble: ` +a wild fragment appears? +@@ -1,3 +1,4 ~1,5 @@ +`, }, "detatchedFragment": { Input: ` @@ -426,6 +435,11 @@ Date: Tue Apr 2 22:55:40 2019 -0700 }, Preamble: textPreamble, }, + "noFiles": { + InputFile: "testdata/no_files.patch", + Output: nil, + Preamble: textPreamble, + }, "newBinaryFile": { InputFile: "testdata/new_binary_file.patch", Output: []*File{ diff --git a/gitdiff/testdata/no_files.patch b/gitdiff/testdata/no_files.patch new file mode 100644 index 0000000..9eea12d --- /dev/null +++ b/gitdiff/testdata/no_files.patch @@ -0,0 +1,8 @@ +commit 5d9790fec7d95aa223f3d20936340bf55ff3dcbe +Author: Morton Haypenny +Date: Tue Apr 2 22:55:40 2019 -0700 + + A file with multiple fragments. + + The content is arbitrary. + From 9e0997ef65150db9392595ab9f2a1df593d48d22 Mon Sep 17 00:00:00 2001 From: Billy Keyes Date: Sun, 11 Aug 2024 14:39:23 -0400 Subject: [PATCH 16/23] Update Go and golangci-lint versions (#49) The minimum Go version for the package is now Go 1.21. This is because a future change will use the 'slices' package in test code. Note that non-test code in the package should still be compatible with older versions of Go. As part of this, also update the golangci-lint version to one that works with Go 1.21, which required replacing some deprecated linters. --- .github/workflows/go.yml | 12 ++++++------ .golangci.yml | 28 ++++++++++++++++++++++++---- go.mod | 2 +- 3 files changed, 31 insertions(+), 11 deletions(-) diff --git a/.github/workflows/go.yml b/.github/workflows/go.yml index 558965e..a59475a 100644 --- a/.github/workflows/go.yml +++ b/.github/workflows/go.yml @@ -9,18 +9,18 @@ jobs: name: Verify runs-on: ubuntu-latest steps: - - name: Set up Go 1.19 - uses: actions/setup-go@v3 + - name: Set up Go 1.21 + uses: actions/setup-go@v5 with: - go-version: 1.19 + go-version: 1.21 - name: Check out code into the Go module directory - uses: actions/checkout@v2 + uses: actions/checkout@v4 - name: Lint - uses: golangci/golangci-lint-action@v3 + uses: golangci/golangci-lint-action@v6 with: - version: v1.49 + version: v1.59 - name: Test run: go test -v ./... diff --git a/.golangci.yml b/.golangci.yml index 82dbad2..30b37f8 100644 --- a/.golangci.yml +++ b/.golangci.yml @@ -4,21 +4,41 @@ run: linters: disable-all: true enable: - - deadcode - errcheck - gofmt - goimports - - golint - govet - ineffassign - misspell + - revive - typecheck - unconvert - - varcheck + - unused issues: exclude-use-default: false -linter-settings: +linters-settings: goimports: local-prefixes: github.com/bluekeyes/go-gitdiff + revive: + rules: + # enable all rules from golint + - name: context-keys-type + - name: time-naming + - name: var-declaration + - name: unexported-return + - name: errorf + - name: blank-imports + - name: context-as-argument + - name: dot-imports + - name: error-return + - name: error-strings + - name: error-naming + - name: exported + - name: increment-decrement + - name: var-naming + - name: package-comments + - name: range + - name: receiver-naming + - name: indent-error-flow diff --git a/go.mod b/go.mod index f35826e..27c3738 100644 --- a/go.mod +++ b/go.mod @@ -1,3 +1,3 @@ module github.com/bluekeyes/go-gitdiff -go 1.13 +go 1.21 From 8584cd59afe0fd5af926a98ea3ef650fa4b1f952 Mon Sep 17 00:00:00 2001 From: Billy Keyes Date: Sun, 11 Aug 2024 14:57:07 -0400 Subject: [PATCH 17/23] Add String() methods to parsed types (#48) This enables clients to move back and forth between parsed objects and text patches. The generated patches are semantically equal to the parsed object and should re-parse to the same object, but may not be byte-for-byte identical to the original input. In my testing, formatted text patches are usually identical to the input, but there may be cases where this is not true. Binary patches always differ. This is because Go's 'compress/flate' package ends streams with an empty block instead of adding the end-of-stream flag to the last non-empty block, like Git's C implementation. Since the streams will always be different for this reason, I chose to also enable default compression (the test patches I generated with Git used no compression.) The main tests for this feature involve parsing, formatting, and then re-parsing a patch to make sure we get equal objects. Formatting is handled by a new internal formatter type, which allows writing all data to the same stream. This isn't exposed publicly right now, but will be useful if there's a need for more flexible formatting functions in the future, like formatting to a user-provided io.Writer. --- .golangci.yml | 6 + gitdiff/base85.go | 43 ++- gitdiff/base85_test.go | 58 ++++ gitdiff/format.go | 277 ++++++++++++++++++++ gitdiff/format_roundtrip_test.go | 156 +++++++++++ gitdiff/format_test.go | 28 ++ gitdiff/gitdiff.go | 35 ++- gitdiff/testdata/string/binary_modify.patch | 9 + gitdiff/testdata/string/binary_new.patch | 11 + gitdiff/testdata/string/copy.patch | 4 + gitdiff/testdata/string/copy_modify.patch | 21 ++ gitdiff/testdata/string/delete.patch | 16 ++ gitdiff/testdata/string/mode.patch | 3 + gitdiff/testdata/string/mode_modify.patch | 10 + gitdiff/testdata/string/modify.patch | 16 ++ gitdiff/testdata/string/new.patch | 16 ++ gitdiff/testdata/string/new_empty.patch | 3 + gitdiff/testdata/string/new_mode.patch | 16 ++ gitdiff/testdata/string/rename.patch | 4 + gitdiff/testdata/string/rename_modify.patch | 18 ++ 20 files changed, 746 insertions(+), 4 deletions(-) create mode 100644 gitdiff/format.go create mode 100644 gitdiff/format_roundtrip_test.go create mode 100644 gitdiff/format_test.go create mode 100644 gitdiff/testdata/string/binary_modify.patch create mode 100644 gitdiff/testdata/string/binary_new.patch create mode 100644 gitdiff/testdata/string/copy.patch create mode 100644 gitdiff/testdata/string/copy_modify.patch create mode 100644 gitdiff/testdata/string/delete.patch create mode 100644 gitdiff/testdata/string/mode.patch create mode 100644 gitdiff/testdata/string/mode_modify.patch create mode 100644 gitdiff/testdata/string/modify.patch create mode 100644 gitdiff/testdata/string/new.patch create mode 100644 gitdiff/testdata/string/new_empty.patch create mode 100644 gitdiff/testdata/string/new_mode.patch create mode 100644 gitdiff/testdata/string/rename.patch create mode 100644 gitdiff/testdata/string/rename_modify.patch diff --git a/.golangci.yml b/.golangci.yml index 30b37f8..192c556 100644 --- a/.golangci.yml +++ b/.golangci.yml @@ -19,6 +19,12 @@ issues: exclude-use-default: false linters-settings: + errcheck: + exclude-functions: + - (*github.com/bluekeyes/go-gitdiff/gitdiff.formatter).Write + - (*github.com/bluekeyes/go-gitdiff/gitdiff.formatter).WriteString + - (*github.com/bluekeyes/go-gitdiff/gitdiff.formatter).WriteByte + - fmt.Fprintf(*github.com/bluekeyes/go-gitdiff/gitdiff.formatter) goimports: local-prefixes: github.com/bluekeyes/go-gitdiff revive: diff --git a/gitdiff/base85.go b/gitdiff/base85.go index 385aa64..86db117 100644 --- a/gitdiff/base85.go +++ b/gitdiff/base85.go @@ -19,8 +19,8 @@ func init() { } // base85Decode decodes Base85-encoded data from src into dst. It uses the -// alphabet defined by base85.c in the Git source tree, which appears to be -// unique. src must contain at least len(dst) bytes of encoded data. +// alphabet defined by base85.c in the Git source tree. src must contain at +// least len(dst) bytes of encoded data. func base85Decode(dst, src []byte) error { var v uint32 var n, ndst int @@ -50,3 +50,42 @@ func base85Decode(dst, src []byte) error { } return nil } + +// base85Encode encodes src in Base85, writing the result to dst. It uses the +// alphabet defined by base85.c in the Git source tree. +func base85Encode(dst, src []byte) { + var di, si int + + encode := func(v uint32) { + dst[di+0] = b85Alpha[(v/(85*85*85*85))%85] + dst[di+1] = b85Alpha[(v/(85*85*85))%85] + dst[di+2] = b85Alpha[(v/(85*85))%85] + dst[di+3] = b85Alpha[(v/85)%85] + dst[di+4] = b85Alpha[v%85] + } + + n := (len(src) / 4) * 4 + for si < n { + encode(uint32(src[si+0])<<24 | uint32(src[si+1])<<16 | uint32(src[si+2])<<8 | uint32(src[si+3])) + si += 4 + di += 5 + } + + var v uint32 + switch len(src) - si { + case 3: + v |= uint32(src[si+2]) << 8 + fallthrough + case 2: + v |= uint32(src[si+1]) << 16 + fallthrough + case 1: + v |= uint32(src[si+0]) << 24 + encode(v) + } +} + +// base85Len returns the length of n bytes of Base85 encoded data. +func base85Len(n int) int { + return (n + 3) / 4 * 5 +} diff --git a/gitdiff/base85_test.go b/gitdiff/base85_test.go index 068da63..672c471 100644 --- a/gitdiff/base85_test.go +++ b/gitdiff/base85_test.go @@ -1,6 +1,7 @@ package gitdiff import ( + "bytes" "testing" ) @@ -58,3 +59,60 @@ func TestBase85Decode(t *testing.T) { }) } } + +func TestBase85Encode(t *testing.T) { + tests := map[string]struct { + Input []byte + Output string + }{ + "zeroBytes": { + Input: []byte{}, + Output: "", + }, + "twoBytes": { + Input: []byte{0xCA, 0xFE}, + Output: "%KiWV", + }, + "fourBytes": { + Input: []byte{0x0, 0x0, 0xCA, 0xFE}, + Output: "007GV", + }, + "sixBytes": { + Input: []byte{0x0, 0x0, 0xCA, 0xFE, 0xCA, 0xFE}, + Output: "007GV%KiWV", + }, + } + + for name, test := range tests { + t.Run(name, func(t *testing.T) { + dst := make([]byte, len(test.Output)) + base85Encode(dst, test.Input) + for i, b := range test.Output { + if dst[i] != byte(b) { + t.Errorf("incorrect character at index %d: expected '%c', actual '%c'", i, b, dst[i]) + } + } + }) + } +} + +func FuzzBase85Roundtrip(f *testing.F) { + f.Add([]byte{0x2b, 0x0d}) + f.Add([]byte{0xbc, 0xb4, 0x3f}) + f.Add([]byte{0xfa, 0x62, 0x05, 0x83, 0x24, 0x39, 0xd5, 0x25}) + f.Add([]byte{0x31, 0x59, 0x02, 0xa0, 0x61, 0x12, 0xd9, 0x43, 0xb8, 0x23, 0x1a, 0xb4, 0x02, 0xae, 0xfa, 0xcc, 0x22, 0xad, 0x41, 0xb9, 0xb8}) + + f.Fuzz(func(t *testing.T, in []byte) { + n := len(in) + dst := make([]byte, base85Len(n)) + out := make([]byte, n) + + base85Encode(dst, in) + if err := base85Decode(out, dst); err != nil { + t.Fatalf("unexpected error decoding base85 data: %v", err) + } + if !bytes.Equal(in, out) { + t.Errorf("decoded data differed from input data:\n input: %x\n output: %x\nencoding: %s\n", in, out, string(dst)) + } + }) +} diff --git a/gitdiff/format.go b/gitdiff/format.go new file mode 100644 index 0000000..5653f6f --- /dev/null +++ b/gitdiff/format.go @@ -0,0 +1,277 @@ +package gitdiff + +import ( + "bytes" + "compress/zlib" + "fmt" + "io" + "strconv" +) + +type formatter struct { + w io.Writer + err error +} + +func newFormatter(w io.Writer) *formatter { + return &formatter{w: w} +} + +func (fm *formatter) Write(p []byte) (int, error) { + if fm.err != nil { + return len(p), nil + } + if _, err := fm.w.Write(p); err != nil { + fm.err = err + } + return len(p), nil +} + +func (fm *formatter) WriteString(s string) (int, error) { + fm.Write([]byte(s)) + return len(s), nil +} + +func (fm *formatter) WriteByte(c byte) error { + fm.Write([]byte{c}) + return nil +} + +func (fm *formatter) WriteQuotedName(s string) { + qpos := 0 + for i := 0; i < len(s); i++ { + ch := s[i] + if q, quoted := quoteByte(ch); quoted { + if qpos == 0 { + fm.WriteByte('"') + } + fm.WriteString(s[qpos:i]) + fm.Write(q) + qpos = i + 1 + } + } + fm.WriteString(s[qpos:]) + if qpos > 0 { + fm.WriteByte('"') + } +} + +var quoteEscapeTable = map[byte]byte{ + '\a': 'a', + '\b': 'b', + '\t': 't', + '\n': 'n', + '\v': 'v', + '\f': 'f', + '\r': 'r', + '"': '"', + '\\': '\\', +} + +func quoteByte(b byte) ([]byte, bool) { + if q, ok := quoteEscapeTable[b]; ok { + return []byte{'\\', q}, true + } + if b < 0x20 || b >= 0x7F { + return []byte{ + '\\', + '0' + (b>>6)&0o3, + '0' + (b>>3)&0o7, + '0' + (b>>0)&0o7, + }, true + } + return nil, false +} + +func (fm *formatter) FormatFile(f *File) { + fm.WriteString("diff --git ") + + var aName, bName string + switch { + case f.OldName == "": + aName = f.NewName + bName = f.NewName + + case f.NewName == "": + aName = f.OldName + bName = f.OldName + + default: + aName = f.OldName + bName = f.NewName + } + + fm.WriteQuotedName("a/" + aName) + fm.WriteByte(' ') + fm.WriteQuotedName("b/" + bName) + fm.WriteByte('\n') + + if f.OldMode != 0 { + if f.IsDelete { + fmt.Fprintf(fm, "deleted file mode %o\n", f.OldMode) + } else if f.NewMode != 0 { + fmt.Fprintf(fm, "old mode %o\n", f.OldMode) + } + } + + if f.NewMode != 0 { + if f.IsNew { + fmt.Fprintf(fm, "new file mode %o\n", f.NewMode) + } else if f.OldMode != 0 { + fmt.Fprintf(fm, "new mode %o\n", f.NewMode) + } + } + + if f.Score > 0 { + if f.IsCopy || f.IsRename { + fmt.Fprintf(fm, "similarity index %d%%\n", f.Score) + } else { + fmt.Fprintf(fm, "dissimilarity index %d%%\n", f.Score) + } + } + + if f.IsCopy { + if f.OldName != "" { + fm.WriteString("copy from ") + fm.WriteQuotedName(f.OldName) + fm.WriteByte('\n') + } + if f.NewName != "" { + fm.WriteString("copy to ") + fm.WriteQuotedName(f.NewName) + fm.WriteByte('\n') + } + } + + if f.IsRename { + if f.OldName != "" { + fm.WriteString("rename from ") + fm.WriteQuotedName(f.OldName) + fm.WriteByte('\n') + } + if f.NewName != "" { + fm.WriteString("rename to ") + fm.WriteQuotedName(f.NewName) + fm.WriteByte('\n') + } + } + + if f.OldOIDPrefix != "" && f.NewOIDPrefix != "" { + fmt.Fprintf(fm, "index %s..%s", f.OldOIDPrefix, f.NewOIDPrefix) + + // Mode is only included on the index line when it is not changing + if f.OldMode != 0 && ((f.NewMode == 0 && !f.IsDelete) || f.OldMode == f.NewMode) { + fmt.Fprintf(fm, " %o", f.OldMode) + } + + fm.WriteByte('\n') + } + + if f.IsBinary { + if f.BinaryFragment == nil { + fm.WriteString("Binary files fmer\n") + } else { + fm.WriteString("GIT binary patch\n") + fm.FormatBinaryFragment(f.BinaryFragment) + if f.ReverseBinaryFragment != nil { + fm.FormatBinaryFragment(f.ReverseBinaryFragment) + } + } + } + + // The "---" and "+++" lines only appear for text patches with fragments + if len(f.TextFragments) > 0 { + fm.WriteString("--- ") + if f.OldName == "" { + fm.WriteString("/dev/null") + } else { + fm.WriteQuotedName("a/" + f.OldName) + } + fm.WriteByte('\n') + + fm.WriteString("+++ ") + if f.NewName == "" { + fm.WriteString("/dev/null") + } else { + fm.WriteQuotedName("b/" + f.NewName) + } + fm.WriteByte('\n') + + for _, frag := range f.TextFragments { + fm.FormatTextFragment(frag) + } + } +} + +func (fm *formatter) FormatTextFragment(f *TextFragment) { + fm.FormatTextFragmentHeader(f) + fm.WriteByte('\n') + + for _, line := range f.Lines { + fm.WriteString(line.Op.String()) + fm.WriteString(line.Line) + if line.NoEOL() { + fm.WriteString("\n\\ No newline at end of file\n") + } + } +} + +func (fm *formatter) FormatTextFragmentHeader(f *TextFragment) { + fmt.Fprintf(fm, "@@ -%d,%d +%d,%d @@", f.OldPosition, f.OldLines, f.NewPosition, f.NewLines) + if f.Comment != "" { + fm.WriteByte(' ') + fm.WriteString(f.Comment) + } +} + +func (fm *formatter) FormatBinaryFragment(f *BinaryFragment) { + const ( + maxBytesPerLine = 52 + ) + + switch f.Method { + case BinaryPatchDelta: + fm.WriteString("delta ") + case BinaryPatchLiteral: + fm.WriteString("literal ") + } + fm.Write(strconv.AppendInt(nil, f.Size, 10)) + fm.WriteByte('\n') + + data := deflateBinaryChunk(f.Data) + n := (len(data) / maxBytesPerLine) * maxBytesPerLine + + buf := make([]byte, base85Len(maxBytesPerLine)) + for i := 0; i < n; i += maxBytesPerLine { + base85Encode(buf, data[i:i+maxBytesPerLine]) + fm.WriteByte('z') + fm.Write(buf) + fm.WriteByte('\n') + } + if remainder := len(data) - n; remainder > 0 { + buf = buf[0:base85Len(remainder)] + + sizeChar := byte(remainder) + if remainder <= 26 { + sizeChar = 'A' + sizeChar - 1 + } else { + sizeChar = 'a' + sizeChar - 27 + } + + base85Encode(buf, data[n:]) + fm.WriteByte(sizeChar) + fm.Write(buf) + fm.WriteByte('\n') + } + fm.WriteByte('\n') +} + +func deflateBinaryChunk(data []byte) []byte { + var b bytes.Buffer + + zw := zlib.NewWriter(&b) + _, _ = zw.Write(data) + _ = zw.Close() + + return b.Bytes() +} diff --git a/gitdiff/format_roundtrip_test.go b/gitdiff/format_roundtrip_test.go new file mode 100644 index 0000000..58cff97 --- /dev/null +++ b/gitdiff/format_roundtrip_test.go @@ -0,0 +1,156 @@ +package gitdiff + +import ( + "bytes" + "fmt" + "os" + "path/filepath" + "slices" + "testing" +) + +func TestFormatRoundtrip(t *testing.T) { + patches := []struct { + File string + SkipTextCompare bool + }{ + {File: "copy.patch"}, + {File: "copy_modify.patch"}, + {File: "delete.patch"}, + {File: "mode.patch"}, + {File: "mode_modify.patch"}, + {File: "modify.patch"}, + {File: "new.patch"}, + {File: "new_empty.patch"}, + {File: "new_mode.patch"}, + {File: "rename.patch"}, + {File: "rename_modify.patch"}, + + // Due to differences between Go's 'encoding/zlib' package and the zlib + // C library, binary patches cannot be compared directly as the patch + // data is slightly different when re-encoded by Go. + {File: "binary_modify.patch", SkipTextCompare: true}, + {File: "binary_new.patch", SkipTextCompare: true}, + } + + for _, patch := range patches { + t.Run(patch.File, func(t *testing.T) { + b, err := os.ReadFile(filepath.Join("testdata", "string", patch.File)) + if err != nil { + t.Fatalf("failed to read patch: %v", err) + } + + original := assertParseSingleFile(t, b, "patch") + str := original.String() + + if !patch.SkipTextCompare { + if string(b) != str { + t.Errorf("incorrect patch text\nexpected: %q\n actual: %q\n", string(b), str) + } + } + + reparsed := assertParseSingleFile(t, []byte(str), "formatted patch") + assertFilesEqual(t, original, reparsed) + }) + } +} + +func assertParseSingleFile(t *testing.T, b []byte, kind string) *File { + files, _, err := Parse(bytes.NewReader(b)) + if err != nil { + t.Fatalf("failed to parse %s: %v", kind, err) + } + if len(files) != 1 { + t.Fatalf("expected %s to contain a single files, but found %d", kind, len(files)) + } + return files[0] +} + +func assertFilesEqual(t *testing.T, expected, actual *File) { + assertEqual(t, expected.OldName, actual.OldName, "OldName") + assertEqual(t, expected.NewName, actual.NewName, "NewName") + + assertEqual(t, expected.IsNew, actual.IsNew, "IsNew") + assertEqual(t, expected.IsDelete, actual.IsDelete, "IsDelete") + assertEqual(t, expected.IsCopy, actual.IsCopy, "IsCopy") + assertEqual(t, expected.IsRename, actual.IsRename, "IsRename") + + assertEqual(t, expected.OldMode, actual.OldMode, "OldMode") + assertEqual(t, expected.NewMode, actual.NewMode, "NewMode") + + assertEqual(t, expected.OldOIDPrefix, actual.OldOIDPrefix, "OldOIDPrefix") + assertEqual(t, expected.NewOIDPrefix, actual.NewOIDPrefix, "NewOIDPrefix") + assertEqual(t, expected.Score, actual.Score, "Score") + + if len(expected.TextFragments) == len(actual.TextFragments) { + for i := range expected.TextFragments { + prefix := fmt.Sprintf("TextFragments[%d].", i) + ef := expected.TextFragments[i] + af := actual.TextFragments[i] + + assertEqual(t, ef.Comment, af.Comment, prefix+"Comment") + + assertEqual(t, ef.OldPosition, af.OldPosition, prefix+"OldPosition") + assertEqual(t, ef.OldLines, af.OldLines, prefix+"OldLines") + + assertEqual(t, ef.NewPosition, af.NewPosition, prefix+"NewPosition") + assertEqual(t, ef.NewLines, af.NewLines, prefix+"NewLines") + + assertEqual(t, ef.LinesAdded, af.LinesAdded, prefix+"LinesAdded") + assertEqual(t, ef.LinesDeleted, af.LinesDeleted, prefix+"LinesDeleted") + + assertEqual(t, ef.LeadingContext, af.LeadingContext, prefix+"LeadingContext") + assertEqual(t, ef.TrailingContext, af.TrailingContext, prefix+"TrailingContext") + + if !slices.Equal(ef.Lines, af.Lines) { + t.Errorf("%sLines: expected %#v, actual %#v", prefix, ef.Lines, af.Lines) + } + } + } else { + t.Errorf("TextFragments: expected length %d, actual length %d", len(expected.TextFragments), len(actual.TextFragments)) + } + + assertEqual(t, expected.IsBinary, actual.IsBinary, "IsBinary") + + if expected.BinaryFragment != nil { + if actual.BinaryFragment == nil { + t.Errorf("BinaryFragment: expected non-nil, actual is nil") + } else { + ef := expected.BinaryFragment + af := expected.BinaryFragment + + assertEqual(t, ef.Method, af.Method, "BinaryFragment.Method") + assertEqual(t, ef.Size, af.Size, "BinaryFragment.Size") + + if !slices.Equal(ef.Data, af.Data) { + t.Errorf("BinaryFragment.Data: expected %#v, actual %#v", ef.Data, af.Data) + } + } + } else if actual.BinaryFragment != nil { + t.Errorf("BinaryFragment: expected nil, actual is non-nil") + } + + if expected.ReverseBinaryFragment != nil { + if actual.ReverseBinaryFragment == nil { + t.Errorf("ReverseBinaryFragment: expected non-nil, actual is nil") + } else { + ef := expected.ReverseBinaryFragment + af := expected.ReverseBinaryFragment + + assertEqual(t, ef.Method, af.Method, "ReverseBinaryFragment.Method") + assertEqual(t, ef.Size, af.Size, "ReverseBinaryFragment.Size") + + if !slices.Equal(ef.Data, af.Data) { + t.Errorf("ReverseBinaryFragment.Data: expected %#v, actual %#v", ef.Data, af.Data) + } + } + } else if actual.ReverseBinaryFragment != nil { + t.Errorf("ReverseBinaryFragment: expected nil, actual is non-nil") + } +} + +func assertEqual[T comparable](t *testing.T, expected, actual T, name string) { + if expected != actual { + t.Errorf("%s: expected %#v, actual %#v", name, expected, actual) + } +} diff --git a/gitdiff/format_test.go b/gitdiff/format_test.go new file mode 100644 index 0000000..3325296 --- /dev/null +++ b/gitdiff/format_test.go @@ -0,0 +1,28 @@ +package gitdiff + +import ( + "strings" + "testing" +) + +func TestFormatter_WriteQuotedName(t *testing.T) { + tests := []struct { + Input string + Expected string + }{ + {"noquotes.txt", `noquotes.txt`}, + {"no quotes.txt", `no quotes.txt`}, + {"new\nline", `"new\nline"`}, + {"escape\x1B null\x00", `"escape\033 null\000"`}, + {"snowman \u2603 snowman", `"snowman \342\230\203 snowman"`}, + {"\"already quoted\"", `"\"already quoted\""`}, + } + + for _, test := range tests { + var b strings.Builder + newFormatter(&b).WriteQuotedName(test.Input) + if b.String() != test.Expected { + t.Errorf("expected %q, got %q", test.Expected, b.String()) + } + } +} diff --git a/gitdiff/gitdiff.go b/gitdiff/gitdiff.go index 18645bd..5365645 100644 --- a/gitdiff/gitdiff.go +++ b/gitdiff/gitdiff.go @@ -4,6 +4,7 @@ import ( "errors" "fmt" "os" + "strings" ) // File describes changes to a single file. It can be either a text file or a @@ -38,6 +39,15 @@ type File struct { ReverseBinaryFragment *BinaryFragment } +// String returns a git diff representation of this file. The value can be +// parsed by this library to obtain the same File, but may not be the same as +// the original input. +func (f *File) String() string { + var diff strings.Builder + newFormatter(&diff).FormatFile(f) + return diff.String() +} + // TextFragment describes changed lines starting at a specific line in a text file. type TextFragment struct { Comment string @@ -57,9 +67,20 @@ type TextFragment struct { Lines []Line } -// Header returns the canonical header of this fragment. +// String returns a git diff format of this fragment. See [File.String] for +// more details on this format. +func (f *TextFragment) String() string { + var diff strings.Builder + newFormatter(&diff).FormatTextFragment(f) + return diff.String() +} + +// Header returns a git diff header of this fragment. See [File.String] for +// more details on this format. func (f *TextFragment) Header() string { - return fmt.Sprintf("@@ -%d,%d +%d,%d @@ %s", f.OldPosition, f.OldLines, f.NewPosition, f.NewLines, f.Comment) + var hdr strings.Builder + newFormatter(&hdr).FormatTextFragmentHeader(f) + return hdr.String() } // Validate checks that the fragment is self-consistent and appliable. Validate @@ -197,3 +218,13 @@ const ( // BinaryPatchLiteral indicates the data is the exact file content BinaryPatchLiteral ) + +// String returns a git diff format of this fragment. Due to differences in +// zlib implementation between Go and Git, encoded binary data in the result +// will likely differ from what Git produces for the same input. See +// [File.String] for more details on this format. +func (f *BinaryFragment) String() string { + var diff strings.Builder + newFormatter(&diff).FormatBinaryFragment(f) + return diff.String() +} diff --git a/gitdiff/testdata/string/binary_modify.patch b/gitdiff/testdata/string/binary_modify.patch new file mode 100644 index 0000000..12ddad5 --- /dev/null +++ b/gitdiff/testdata/string/binary_modify.patch @@ -0,0 +1,9 @@ +diff --git a/file.bin b/file.bin +index a7f4d5d6975ec021016c02b6d58345ebf434f38c..bdc9a70f055892146612dcdb413f0e339faaa0df 100644 +GIT binary patch +delta 66 +QcmeZhVVvM$!$1K50C&Ox;s5{u + +delta 5 +McmZo+^qAlQ00i9urT_o{ + diff --git a/gitdiff/testdata/string/binary_new.patch b/gitdiff/testdata/string/binary_new.patch new file mode 100644 index 0000000..c56f35e --- /dev/null +++ b/gitdiff/testdata/string/binary_new.patch @@ -0,0 +1,11 @@ +diff --git a/file.bin b/file.bin +new file mode 100644 +index 0000000000000000000000000000000000000000..a7f4d5d6975ec021016c02b6d58345ebf434f38c +GIT binary patch +literal 72 +zcmV-O0Jr~td-`u6JcK&{KDK= Date: Tue, 7 Jan 2025 19:27:03 -0800 Subject: [PATCH 18/23] Parse binary headers with file paths (#55) Some patches may include one or more file paths as part of the binary header when there is no binary data. Git accounts for this by only checking the prefix and suffix of the line, but I missed that logic when implementing this originally. --- gitdiff/binary.go | 15 +++++++++++---- gitdiff/binary_test.go | 10 ++++++++++ 2 files changed, 21 insertions(+), 4 deletions(-) diff --git a/gitdiff/binary.go b/gitdiff/binary.go index c65a9a6..282e323 100644 --- a/gitdiff/binary.go +++ b/gitdiff/binary.go @@ -50,11 +50,11 @@ func (p *parser) ParseBinaryFragments(f *File) (n int, err error) { } func (p *parser) ParseBinaryMarker() (isBinary bool, hasData bool, err error) { - switch p.Line(0) { - case "GIT binary patch\n": + line := p.Line(0) + switch { + case line == "GIT binary patch\n": hasData = true - case "Binary files differ\n": - case "Files differ\n": + case isBinaryNoDataMarker(line): default: return false, false, nil } @@ -65,6 +65,13 @@ func (p *parser) ParseBinaryMarker() (isBinary bool, hasData bool, err error) { return true, hasData, nil } +func isBinaryNoDataMarker(line string) bool { + if strings.HasSuffix(line, " differ\n") { + return strings.HasPrefix(line, "Binary files ") || strings.HasPrefix(line, "Files ") + } + return false +} + func (p *parser) ParseBinaryFragmentHeader() (*BinaryFragment, error) { parts := strings.SplitN(strings.TrimSuffix(p.Line(0), "\n"), " ", 2) if len(parts) < 2 { diff --git a/gitdiff/binary_test.go b/gitdiff/binary_test.go index a31a0e0..64db243 100644 --- a/gitdiff/binary_test.go +++ b/gitdiff/binary_test.go @@ -25,6 +25,16 @@ func TestParseBinaryMarker(t *testing.T) { IsBinary: true, HasData: false, }, + "binaryFileNoPatchPaths": { + Input: "Binary files a/foo.bin and b/foo.bin differ\n", + IsBinary: true, + HasData: false, + }, + "fileNoPatch": { + Input: "Files differ\n", + IsBinary: true, + HasData: false, + }, "textFile": { Input: "@@ -10,14 +22,31 @@\n", IsBinary: false, From fffa3cca9bd29785ec7f99ea6921de60e32a724a Mon Sep 17 00:00:00 2001 From: Billy Keyes Date: Tue, 7 Jan 2025 19:35:14 -0800 Subject: [PATCH 19/23] Fix binary headers in formatted patches (#56) Include file names in the header (now that we can actually parse them) and fix a bad find-and-replace that changed "differ" to "fmer". Add a new test to verify that binary files without data format correctly. --- gitdiff/format.go | 6 +++++- gitdiff/format_roundtrip_test.go | 1 + gitdiff/testdata/string/binary_modify_nodata.patch | 3 +++ 3 files changed, 9 insertions(+), 1 deletion(-) create mode 100644 gitdiff/testdata/string/binary_modify_nodata.patch diff --git a/gitdiff/format.go b/gitdiff/format.go index 5653f6f..d97aba9 100644 --- a/gitdiff/format.go +++ b/gitdiff/format.go @@ -169,7 +169,11 @@ func (fm *formatter) FormatFile(f *File) { if f.IsBinary { if f.BinaryFragment == nil { - fm.WriteString("Binary files fmer\n") + fm.WriteString("Binary files ") + fm.WriteQuotedName("a/" + aName) + fm.WriteString(" and ") + fm.WriteQuotedName("b/" + bName) + fm.WriteString(" differ\n") } else { fm.WriteString("GIT binary patch\n") fm.FormatBinaryFragment(f.BinaryFragment) diff --git a/gitdiff/format_roundtrip_test.go b/gitdiff/format_roundtrip_test.go index 58cff97..a230e91 100644 --- a/gitdiff/format_roundtrip_test.go +++ b/gitdiff/format_roundtrip_test.go @@ -31,6 +31,7 @@ func TestFormatRoundtrip(t *testing.T) { // data is slightly different when re-encoded by Go. {File: "binary_modify.patch", SkipTextCompare: true}, {File: "binary_new.patch", SkipTextCompare: true}, + {File: "binary_modify_nodata.patch"}, } for _, patch := range patches { diff --git a/gitdiff/testdata/string/binary_modify_nodata.patch b/gitdiff/testdata/string/binary_modify_nodata.patch new file mode 100644 index 0000000..833a534 --- /dev/null +++ b/gitdiff/testdata/string/binary_modify_nodata.patch @@ -0,0 +1,3 @@ +diff --git a/file.bin b/file.bin +index a7f4d5d..bdc9a70 100644 +Binary files a/file.bin and b/file.bin differ From 823d31db22199232e1400cddc39f06d890380055 Mon Sep 17 00:00:00 2001 From: Billy Keyes Date: Tue, 4 Mar 2025 20:14:23 -0800 Subject: [PATCH 20/23] Add a test for removing the last newline in a file (#59) This worked, but completes the test coverage in combination with the test that adds a final newline and one that leaves the missing newline in place. --- gitdiff/apply_test.go | 1 + .../testdata/apply/text_fragment_change_end_eol.out | 3 +++ .../testdata/apply/text_fragment_change_end_eol.patch | 10 ++++++++++ .../testdata/apply/text_fragment_change_end_eol.src | 3 +++ 4 files changed, 17 insertions(+) create mode 100644 gitdiff/testdata/apply/text_fragment_change_end_eol.out create mode 100644 gitdiff/testdata/apply/text_fragment_change_end_eol.patch create mode 100644 gitdiff/testdata/apply/text_fragment_change_end_eol.src diff --git a/gitdiff/apply_test.go b/gitdiff/apply_test.go index dd076bb..f19153b 100644 --- a/gitdiff/apply_test.go +++ b/gitdiff/apply_test.go @@ -22,6 +22,7 @@ func TestApplyTextFragment(t *testing.T) { "changeStart": {Files: getApplyFiles("text_fragment_change_start")}, "changeMiddle": {Files: getApplyFiles("text_fragment_change_middle")}, "changeEnd": {Files: getApplyFiles("text_fragment_change_end")}, + "changeEndEOL": {Files: getApplyFiles("text_fragment_change_end_eol")}, "changeExact": {Files: getApplyFiles("text_fragment_change_exact")}, "changeSingleNoEOL": {Files: getApplyFiles("text_fragment_change_single_noeol")}, diff --git a/gitdiff/testdata/apply/text_fragment_change_end_eol.out b/gitdiff/testdata/apply/text_fragment_change_end_eol.out new file mode 100644 index 0000000..8cf2f17 --- /dev/null +++ b/gitdiff/testdata/apply/text_fragment_change_end_eol.out @@ -0,0 +1,3 @@ +line 1 +line 2 +line 3 \ No newline at end of file diff --git a/gitdiff/testdata/apply/text_fragment_change_end_eol.patch b/gitdiff/testdata/apply/text_fragment_change_end_eol.patch new file mode 100644 index 0000000..f1c9477 --- /dev/null +++ b/gitdiff/testdata/apply/text_fragment_change_end_eol.patch @@ -0,0 +1,10 @@ +diff --git a/gitdiff/testdata/apply/text_fragment_remove_last_eol.src b/gitdiff/testdata/apply/text_fragment_remove_last_eol.src +index a92d664..8cf2f17 100644 +--- a/gitdiff/testdata/apply/text_fragment_remove_last_eol.src ++++ b/gitdiff/testdata/apply/text_fragment_remove_last_eol.src +@@ -1,3 +1,3 @@ + line 1 + line 2 +-line 3 ++line 3 +\ No newline at end of file diff --git a/gitdiff/testdata/apply/text_fragment_change_end_eol.src b/gitdiff/testdata/apply/text_fragment_change_end_eol.src new file mode 100644 index 0000000..a92d664 --- /dev/null +++ b/gitdiff/testdata/apply/text_fragment_change_end_eol.src @@ -0,0 +1,3 @@ +line 1 +line 2 +line 3 From 0896f01926d529e4c0781f72c7957aa942e2068f Mon Sep 17 00:00:00 2001 From: Billy Keyes Date: Sat, 29 Mar 2025 13:27:19 -0700 Subject: [PATCH 21/23] Clarify that we only support GNU diff In issue #57, it was reported that the library could not apply some patches generated by BSD `diff` because of the way that variant reports changes in files without trailing newlines. Since the library behavior matches Git, I don't consider this a bug, but update the README to mention that the support for standard unified diffs is only for the GNU variant of the `diff` tool. --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index e5fe24f..0f65127 100644 --- a/README.md +++ b/README.md @@ -4,7 +4,7 @@ A Go library for parsing and applying patches generated by `git diff`, `git show`, and `git format-patch`. It can also parse and apply unified diffs -generated by the standard `diff` tool. +generated by the standard GNU `diff` tool. It supports standard line-oriented text patches and Git binary patches, and aims to parse anything accepted by the `git apply` command. From 7413262a184380473b5cad4b12a2a061ecbc86c9 Mon Sep 17 00:00:00 2001 From: Billy Keyes Date: Mon, 31 Mar 2025 18:37:50 -0700 Subject: [PATCH 22/23] Update golangci-lint to v2 (#61) --- .github/workflows/go.yml | 4 +-- .golangci.yml | 75 ++++++++++++++++++++-------------------- 2 files changed, 40 insertions(+), 39 deletions(-) diff --git a/.github/workflows/go.yml b/.github/workflows/go.yml index a59475a..c89fc7e 100644 --- a/.github/workflows/go.yml +++ b/.github/workflows/go.yml @@ -18,9 +18,9 @@ jobs: uses: actions/checkout@v4 - name: Lint - uses: golangci/golangci-lint-action@v6 + uses: golangci/golangci-lint-action@v7 with: - version: v1.59 + version: v2.0 - name: Test run: go test -v ./... diff --git a/.golangci.yml b/.golangci.yml index 192c556..655cb5a 100644 --- a/.golangci.yml +++ b/.golangci.yml @@ -1,50 +1,51 @@ +version: "2" + run: tests: false linters: - disable-all: true + default: none enable: - errcheck - - gofmt - - goimports - govet - ineffassign - misspell - revive - - typecheck - unconvert - unused + settings: + errcheck: + exclude-functions: + - (*github.com/bluekeyes/go-gitdiff/gitdiff.formatter).Write + - (*github.com/bluekeyes/go-gitdiff/gitdiff.formatter).WriteString + - (*github.com/bluekeyes/go-gitdiff/gitdiff.formatter).WriteByte + - fmt.Fprintf(*github.com/bluekeyes/go-gitdiff/gitdiff.formatter) + revive: + rules: + - name: context-keys-type + - name: time-naming + - name: var-declaration + - name: unexported-return + - name: errorf + - name: blank-imports + - name: context-as-argument + - name: dot-imports + - name: error-return + - name: error-strings + - name: error-naming + - name: exported + - name: increment-decrement + - name: var-naming + - name: package-comments + - name: range + - name: receiver-naming + - name: indent-error-flow -issues: - exclude-use-default: false - -linters-settings: - errcheck: - exclude-functions: - - (*github.com/bluekeyes/go-gitdiff/gitdiff.formatter).Write - - (*github.com/bluekeyes/go-gitdiff/gitdiff.formatter).WriteString - - (*github.com/bluekeyes/go-gitdiff/gitdiff.formatter).WriteByte - - fmt.Fprintf(*github.com/bluekeyes/go-gitdiff/gitdiff.formatter) - goimports: - local-prefixes: github.com/bluekeyes/go-gitdiff - revive: - rules: - # enable all rules from golint - - name: context-keys-type - - name: time-naming - - name: var-declaration - - name: unexported-return - - name: errorf - - name: blank-imports - - name: context-as-argument - - name: dot-imports - - name: error-return - - name: error-strings - - name: error-naming - - name: exported - - name: increment-decrement - - name: var-naming - - name: package-comments - - name: range - - name: receiver-naming - - name: indent-error-flow +formatters: + enable: + - gofmt + - goimports + settings: + goimports: + local-prefixes: + - github.com/bluekeyes/go-gitdiff From 17bd72f0e2e89a2c6aa03de29fc2583a7ec07943 Mon Sep 17 00:00:00 2001 From: Billy Keyes Date: Wed, 3 Sep 2025 17:26:05 -0700 Subject: [PATCH 23/23] Report short sources as conflicts during apply (#63) Previously, this returned an io.ErrUnexpectedEOF. This is not wrong, in that we did unexpectedly hit the end of the input file, but it is vague and implies a possible library bug rather than a problem with the patch or the input. This condition is really a conflict, as the changes described by the patch are not compatible with the state of the input. --- gitdiff/apply_test.go | 4 ++-- gitdiff/apply_text.go | 6 ++++++ 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/gitdiff/apply_test.go b/gitdiff/apply_test.go index f19153b..05915ba 100644 --- a/gitdiff/apply_test.go +++ b/gitdiff/apply_test.go @@ -31,14 +31,14 @@ func TestApplyTextFragment(t *testing.T) { Src: "text_fragment_error.src", Patch: "text_fragment_error_short_src_before.patch", }, - Err: io.ErrUnexpectedEOF, + Err: &Conflict{}, }, "errorShortSrc": { Files: applyFiles{ Src: "text_fragment_error.src", Patch: "text_fragment_error_short_src.patch", }, - Err: io.ErrUnexpectedEOF, + Err: &Conflict{}, }, "errorContextConflict": { Files: applyFiles{ diff --git a/gitdiff/apply_text.go b/gitdiff/apply_text.go index 43af83a..8d0accb 100644 --- a/gitdiff/apply_text.go +++ b/gitdiff/apply_text.go @@ -1,6 +1,7 @@ package gitdiff import ( + "errors" "io" ) @@ -85,6 +86,11 @@ func (a *TextApplier) ApplyFragment(f *TextFragment) error { preimage := make([][]byte, fragEnd-start) n, err := a.lineSrc.ReadLinesAt(preimage, start) if err != nil { + // an EOF indicates that source file is shorter than the patch expects, + // which should be reported as a conflict rather than a generic error + if errors.Is(err, io.EOF) { + err = &Conflict{"src has fewer lines than required by fragment"} + } return applyError(err, lineNum(start+int64(n))) }