diff --git a/.github/workflows/go.yml b/.github/workflows/go.yml index 558965e..c89fc7e 100644 --- a/.github/workflows/go.yml +++ b/.github/workflows/go.yml @@ -9,18 +9,18 @@ jobs: name: Verify runs-on: ubuntu-latest steps: - - name: Set up Go 1.19 - uses: actions/setup-go@v3 + - name: Set up Go 1.21 + uses: actions/setup-go@v5 with: - go-version: 1.19 + go-version: 1.21 - name: Check out code into the Go module directory - uses: actions/checkout@v2 + uses: actions/checkout@v4 - name: Lint - uses: golangci/golangci-lint-action@v3 + uses: golangci/golangci-lint-action@v7 with: - version: v1.49 + version: v2.0 - name: Test run: go test -v ./... diff --git a/.golangci.yml b/.golangci.yml index 82dbad2..655cb5a 100644 --- a/.golangci.yml +++ b/.golangci.yml @@ -1,24 +1,51 @@ +version: "2" + run: tests: false linters: - disable-all: true + default: none enable: - - deadcode - errcheck - - gofmt - - goimports - - golint - govet - ineffassign - misspell - - typecheck + - revive - unconvert - - varcheck - -issues: - exclude-use-default: false + - unused + settings: + errcheck: + exclude-functions: + - (*github.com/bluekeyes/go-gitdiff/gitdiff.formatter).Write + - (*github.com/bluekeyes/go-gitdiff/gitdiff.formatter).WriteString + - (*github.com/bluekeyes/go-gitdiff/gitdiff.formatter).WriteByte + - fmt.Fprintf(*github.com/bluekeyes/go-gitdiff/gitdiff.formatter) + revive: + rules: + - name: context-keys-type + - name: time-naming + - name: var-declaration + - name: unexported-return + - name: errorf + - name: blank-imports + - name: context-as-argument + - name: dot-imports + - name: error-return + - name: error-strings + - name: error-naming + - name: exported + - name: increment-decrement + - name: var-naming + - name: package-comments + - name: range + - name: receiver-naming + - name: indent-error-flow -linter-settings: - goimports: - local-prefixes: github.com/bluekeyes/go-gitdiff +formatters: + enable: + - gofmt + - goimports + settings: + goimports: + local-prefixes: + - github.com/bluekeyes/go-gitdiff diff --git a/README.md b/README.md index e5fe24f..0f65127 100644 --- a/README.md +++ b/README.md @@ -4,7 +4,7 @@ A Go library for parsing and applying patches generated by `git diff`, `git show`, and `git format-patch`. It can also parse and apply unified diffs -generated by the standard `diff` tool. +generated by the standard GNU `diff` tool. It supports standard line-oriented text patches and Git binary patches, and aims to parse anything accepted by the `git apply` command. diff --git a/gitdiff/apply_test.go b/gitdiff/apply_test.go index dd076bb..05915ba 100644 --- a/gitdiff/apply_test.go +++ b/gitdiff/apply_test.go @@ -22,6 +22,7 @@ func TestApplyTextFragment(t *testing.T) { "changeStart": {Files: getApplyFiles("text_fragment_change_start")}, "changeMiddle": {Files: getApplyFiles("text_fragment_change_middle")}, "changeEnd": {Files: getApplyFiles("text_fragment_change_end")}, + "changeEndEOL": {Files: getApplyFiles("text_fragment_change_end_eol")}, "changeExact": {Files: getApplyFiles("text_fragment_change_exact")}, "changeSingleNoEOL": {Files: getApplyFiles("text_fragment_change_single_noeol")}, @@ -30,14 +31,14 @@ func TestApplyTextFragment(t *testing.T) { Src: "text_fragment_error.src", Patch: "text_fragment_error_short_src_before.patch", }, - Err: io.ErrUnexpectedEOF, + Err: &Conflict{}, }, "errorShortSrc": { Files: applyFiles{ Src: "text_fragment_error.src", Patch: "text_fragment_error_short_src.patch", }, - Err: io.ErrUnexpectedEOF, + Err: &Conflict{}, }, "errorContextConflict": { Files: applyFiles{ diff --git a/gitdiff/apply_text.go b/gitdiff/apply_text.go index 43af83a..8d0accb 100644 --- a/gitdiff/apply_text.go +++ b/gitdiff/apply_text.go @@ -1,6 +1,7 @@ package gitdiff import ( + "errors" "io" ) @@ -85,6 +86,11 @@ func (a *TextApplier) ApplyFragment(f *TextFragment) error { preimage := make([][]byte, fragEnd-start) n, err := a.lineSrc.ReadLinesAt(preimage, start) if err != nil { + // an EOF indicates that source file is shorter than the patch expects, + // which should be reported as a conflict rather than a generic error + if errors.Is(err, io.EOF) { + err = &Conflict{"src has fewer lines than required by fragment"} + } return applyError(err, lineNum(start+int64(n))) } diff --git a/gitdiff/base85.go b/gitdiff/base85.go index 385aa64..86db117 100644 --- a/gitdiff/base85.go +++ b/gitdiff/base85.go @@ -19,8 +19,8 @@ func init() { } // base85Decode decodes Base85-encoded data from src into dst. It uses the -// alphabet defined by base85.c in the Git source tree, which appears to be -// unique. src must contain at least len(dst) bytes of encoded data. +// alphabet defined by base85.c in the Git source tree. src must contain at +// least len(dst) bytes of encoded data. func base85Decode(dst, src []byte) error { var v uint32 var n, ndst int @@ -50,3 +50,42 @@ func base85Decode(dst, src []byte) error { } return nil } + +// base85Encode encodes src in Base85, writing the result to dst. It uses the +// alphabet defined by base85.c in the Git source tree. +func base85Encode(dst, src []byte) { + var di, si int + + encode := func(v uint32) { + dst[di+0] = b85Alpha[(v/(85*85*85*85))%85] + dst[di+1] = b85Alpha[(v/(85*85*85))%85] + dst[di+2] = b85Alpha[(v/(85*85))%85] + dst[di+3] = b85Alpha[(v/85)%85] + dst[di+4] = b85Alpha[v%85] + } + + n := (len(src) / 4) * 4 + for si < n { + encode(uint32(src[si+0])<<24 | uint32(src[si+1])<<16 | uint32(src[si+2])<<8 | uint32(src[si+3])) + si += 4 + di += 5 + } + + var v uint32 + switch len(src) - si { + case 3: + v |= uint32(src[si+2]) << 8 + fallthrough + case 2: + v |= uint32(src[si+1]) << 16 + fallthrough + case 1: + v |= uint32(src[si+0]) << 24 + encode(v) + } +} + +// base85Len returns the length of n bytes of Base85 encoded data. +func base85Len(n int) int { + return (n + 3) / 4 * 5 +} diff --git a/gitdiff/base85_test.go b/gitdiff/base85_test.go index 068da63..672c471 100644 --- a/gitdiff/base85_test.go +++ b/gitdiff/base85_test.go @@ -1,6 +1,7 @@ package gitdiff import ( + "bytes" "testing" ) @@ -58,3 +59,60 @@ func TestBase85Decode(t *testing.T) { }) } } + +func TestBase85Encode(t *testing.T) { + tests := map[string]struct { + Input []byte + Output string + }{ + "zeroBytes": { + Input: []byte{}, + Output: "", + }, + "twoBytes": { + Input: []byte{0xCA, 0xFE}, + Output: "%KiWV", + }, + "fourBytes": { + Input: []byte{0x0, 0x0, 0xCA, 0xFE}, + Output: "007GV", + }, + "sixBytes": { + Input: []byte{0x0, 0x0, 0xCA, 0xFE, 0xCA, 0xFE}, + Output: "007GV%KiWV", + }, + } + + for name, test := range tests { + t.Run(name, func(t *testing.T) { + dst := make([]byte, len(test.Output)) + base85Encode(dst, test.Input) + for i, b := range test.Output { + if dst[i] != byte(b) { + t.Errorf("incorrect character at index %d: expected '%c', actual '%c'", i, b, dst[i]) + } + } + }) + } +} + +func FuzzBase85Roundtrip(f *testing.F) { + f.Add([]byte{0x2b, 0x0d}) + f.Add([]byte{0xbc, 0xb4, 0x3f}) + f.Add([]byte{0xfa, 0x62, 0x05, 0x83, 0x24, 0x39, 0xd5, 0x25}) + f.Add([]byte{0x31, 0x59, 0x02, 0xa0, 0x61, 0x12, 0xd9, 0x43, 0xb8, 0x23, 0x1a, 0xb4, 0x02, 0xae, 0xfa, 0xcc, 0x22, 0xad, 0x41, 0xb9, 0xb8}) + + f.Fuzz(func(t *testing.T, in []byte) { + n := len(in) + dst := make([]byte, base85Len(n)) + out := make([]byte, n) + + base85Encode(dst, in) + if err := base85Decode(out, dst); err != nil { + t.Fatalf("unexpected error decoding base85 data: %v", err) + } + if !bytes.Equal(in, out) { + t.Errorf("decoded data differed from input data:\n input: %x\n output: %x\nencoding: %s\n", in, out, string(dst)) + } + }) +} diff --git a/gitdiff/binary.go b/gitdiff/binary.go index c65a9a6..282e323 100644 --- a/gitdiff/binary.go +++ b/gitdiff/binary.go @@ -50,11 +50,11 @@ func (p *parser) ParseBinaryFragments(f *File) (n int, err error) { } func (p *parser) ParseBinaryMarker() (isBinary bool, hasData bool, err error) { - switch p.Line(0) { - case "GIT binary patch\n": + line := p.Line(0) + switch { + case line == "GIT binary patch\n": hasData = true - case "Binary files differ\n": - case "Files differ\n": + case isBinaryNoDataMarker(line): default: return false, false, nil } @@ -65,6 +65,13 @@ func (p *parser) ParseBinaryMarker() (isBinary bool, hasData bool, err error) { return true, hasData, nil } +func isBinaryNoDataMarker(line string) bool { + if strings.HasSuffix(line, " differ\n") { + return strings.HasPrefix(line, "Binary files ") || strings.HasPrefix(line, "Files ") + } + return false +} + func (p *parser) ParseBinaryFragmentHeader() (*BinaryFragment, error) { parts := strings.SplitN(strings.TrimSuffix(p.Line(0), "\n"), " ", 2) if len(parts) < 2 { diff --git a/gitdiff/binary_test.go b/gitdiff/binary_test.go index a31a0e0..64db243 100644 --- a/gitdiff/binary_test.go +++ b/gitdiff/binary_test.go @@ -25,6 +25,16 @@ func TestParseBinaryMarker(t *testing.T) { IsBinary: true, HasData: false, }, + "binaryFileNoPatchPaths": { + Input: "Binary files a/foo.bin and b/foo.bin differ\n", + IsBinary: true, + HasData: false, + }, + "fileNoPatch": { + Input: "Files differ\n", + IsBinary: true, + HasData: false, + }, "textFile": { Input: "@@ -10,14 +22,31 @@\n", IsBinary: false, diff --git a/gitdiff/file_header.go b/gitdiff/file_header.go index 77ceb5d..7ae4bc9 100644 --- a/gitdiff/file_header.go +++ b/gitdiff/file_header.go @@ -57,7 +57,7 @@ func (p *parser) ParseNextFileHeader() (*File, string, error) { return nil, "", err } } - return nil, "", nil + return nil, preamble.String(), nil } func (p *parser) ParseGitFileHeader() (*File, error) { diff --git a/gitdiff/format.go b/gitdiff/format.go new file mode 100644 index 0000000..d97aba9 --- /dev/null +++ b/gitdiff/format.go @@ -0,0 +1,281 @@ +package gitdiff + +import ( + "bytes" + "compress/zlib" + "fmt" + "io" + "strconv" +) + +type formatter struct { + w io.Writer + err error +} + +func newFormatter(w io.Writer) *formatter { + return &formatter{w: w} +} + +func (fm *formatter) Write(p []byte) (int, error) { + if fm.err != nil { + return len(p), nil + } + if _, err := fm.w.Write(p); err != nil { + fm.err = err + } + return len(p), nil +} + +func (fm *formatter) WriteString(s string) (int, error) { + fm.Write([]byte(s)) + return len(s), nil +} + +func (fm *formatter) WriteByte(c byte) error { + fm.Write([]byte{c}) + return nil +} + +func (fm *formatter) WriteQuotedName(s string) { + qpos := 0 + for i := 0; i < len(s); i++ { + ch := s[i] + if q, quoted := quoteByte(ch); quoted { + if qpos == 0 { + fm.WriteByte('"') + } + fm.WriteString(s[qpos:i]) + fm.Write(q) + qpos = i + 1 + } + } + fm.WriteString(s[qpos:]) + if qpos > 0 { + fm.WriteByte('"') + } +} + +var quoteEscapeTable = map[byte]byte{ + '\a': 'a', + '\b': 'b', + '\t': 't', + '\n': 'n', + '\v': 'v', + '\f': 'f', + '\r': 'r', + '"': '"', + '\\': '\\', +} + +func quoteByte(b byte) ([]byte, bool) { + if q, ok := quoteEscapeTable[b]; ok { + return []byte{'\\', q}, true + } + if b < 0x20 || b >= 0x7F { + return []byte{ + '\\', + '0' + (b>>6)&0o3, + '0' + (b>>3)&0o7, + '0' + (b>>0)&0o7, + }, true + } + return nil, false +} + +func (fm *formatter) FormatFile(f *File) { + fm.WriteString("diff --git ") + + var aName, bName string + switch { + case f.OldName == "": + aName = f.NewName + bName = f.NewName + + case f.NewName == "": + aName = f.OldName + bName = f.OldName + + default: + aName = f.OldName + bName = f.NewName + } + + fm.WriteQuotedName("a/" + aName) + fm.WriteByte(' ') + fm.WriteQuotedName("b/" + bName) + fm.WriteByte('\n') + + if f.OldMode != 0 { + if f.IsDelete { + fmt.Fprintf(fm, "deleted file mode %o\n", f.OldMode) + } else if f.NewMode != 0 { + fmt.Fprintf(fm, "old mode %o\n", f.OldMode) + } + } + + if f.NewMode != 0 { + if f.IsNew { + fmt.Fprintf(fm, "new file mode %o\n", f.NewMode) + } else if f.OldMode != 0 { + fmt.Fprintf(fm, "new mode %o\n", f.NewMode) + } + } + + if f.Score > 0 { + if f.IsCopy || f.IsRename { + fmt.Fprintf(fm, "similarity index %d%%\n", f.Score) + } else { + fmt.Fprintf(fm, "dissimilarity index %d%%\n", f.Score) + } + } + + if f.IsCopy { + if f.OldName != "" { + fm.WriteString("copy from ") + fm.WriteQuotedName(f.OldName) + fm.WriteByte('\n') + } + if f.NewName != "" { + fm.WriteString("copy to ") + fm.WriteQuotedName(f.NewName) + fm.WriteByte('\n') + } + } + + if f.IsRename { + if f.OldName != "" { + fm.WriteString("rename from ") + fm.WriteQuotedName(f.OldName) + fm.WriteByte('\n') + } + if f.NewName != "" { + fm.WriteString("rename to ") + fm.WriteQuotedName(f.NewName) + fm.WriteByte('\n') + } + } + + if f.OldOIDPrefix != "" && f.NewOIDPrefix != "" { + fmt.Fprintf(fm, "index %s..%s", f.OldOIDPrefix, f.NewOIDPrefix) + + // Mode is only included on the index line when it is not changing + if f.OldMode != 0 && ((f.NewMode == 0 && !f.IsDelete) || f.OldMode == f.NewMode) { + fmt.Fprintf(fm, " %o", f.OldMode) + } + + fm.WriteByte('\n') + } + + if f.IsBinary { + if f.BinaryFragment == nil { + fm.WriteString("Binary files ") + fm.WriteQuotedName("a/" + aName) + fm.WriteString(" and ") + fm.WriteQuotedName("b/" + bName) + fm.WriteString(" differ\n") + } else { + fm.WriteString("GIT binary patch\n") + fm.FormatBinaryFragment(f.BinaryFragment) + if f.ReverseBinaryFragment != nil { + fm.FormatBinaryFragment(f.ReverseBinaryFragment) + } + } + } + + // The "---" and "+++" lines only appear for text patches with fragments + if len(f.TextFragments) > 0 { + fm.WriteString("--- ") + if f.OldName == "" { + fm.WriteString("/dev/null") + } else { + fm.WriteQuotedName("a/" + f.OldName) + } + fm.WriteByte('\n') + + fm.WriteString("+++ ") + if f.NewName == "" { + fm.WriteString("/dev/null") + } else { + fm.WriteQuotedName("b/" + f.NewName) + } + fm.WriteByte('\n') + + for _, frag := range f.TextFragments { + fm.FormatTextFragment(frag) + } + } +} + +func (fm *formatter) FormatTextFragment(f *TextFragment) { + fm.FormatTextFragmentHeader(f) + fm.WriteByte('\n') + + for _, line := range f.Lines { + fm.WriteString(line.Op.String()) + fm.WriteString(line.Line) + if line.NoEOL() { + fm.WriteString("\n\\ No newline at end of file\n") + } + } +} + +func (fm *formatter) FormatTextFragmentHeader(f *TextFragment) { + fmt.Fprintf(fm, "@@ -%d,%d +%d,%d @@", f.OldPosition, f.OldLines, f.NewPosition, f.NewLines) + if f.Comment != "" { + fm.WriteByte(' ') + fm.WriteString(f.Comment) + } +} + +func (fm *formatter) FormatBinaryFragment(f *BinaryFragment) { + const ( + maxBytesPerLine = 52 + ) + + switch f.Method { + case BinaryPatchDelta: + fm.WriteString("delta ") + case BinaryPatchLiteral: + fm.WriteString("literal ") + } + fm.Write(strconv.AppendInt(nil, f.Size, 10)) + fm.WriteByte('\n') + + data := deflateBinaryChunk(f.Data) + n := (len(data) / maxBytesPerLine) * maxBytesPerLine + + buf := make([]byte, base85Len(maxBytesPerLine)) + for i := 0; i < n; i += maxBytesPerLine { + base85Encode(buf, data[i:i+maxBytesPerLine]) + fm.WriteByte('z') + fm.Write(buf) + fm.WriteByte('\n') + } + if remainder := len(data) - n; remainder > 0 { + buf = buf[0:base85Len(remainder)] + + sizeChar := byte(remainder) + if remainder <= 26 { + sizeChar = 'A' + sizeChar - 1 + } else { + sizeChar = 'a' + sizeChar - 27 + } + + base85Encode(buf, data[n:]) + fm.WriteByte(sizeChar) + fm.Write(buf) + fm.WriteByte('\n') + } + fm.WriteByte('\n') +} + +func deflateBinaryChunk(data []byte) []byte { + var b bytes.Buffer + + zw := zlib.NewWriter(&b) + _, _ = zw.Write(data) + _ = zw.Close() + + return b.Bytes() +} diff --git a/gitdiff/format_roundtrip_test.go b/gitdiff/format_roundtrip_test.go new file mode 100644 index 0000000..a230e91 --- /dev/null +++ b/gitdiff/format_roundtrip_test.go @@ -0,0 +1,157 @@ +package gitdiff + +import ( + "bytes" + "fmt" + "os" + "path/filepath" + "slices" + "testing" +) + +func TestFormatRoundtrip(t *testing.T) { + patches := []struct { + File string + SkipTextCompare bool + }{ + {File: "copy.patch"}, + {File: "copy_modify.patch"}, + {File: "delete.patch"}, + {File: "mode.patch"}, + {File: "mode_modify.patch"}, + {File: "modify.patch"}, + {File: "new.patch"}, + {File: "new_empty.patch"}, + {File: "new_mode.patch"}, + {File: "rename.patch"}, + {File: "rename_modify.patch"}, + + // Due to differences between Go's 'encoding/zlib' package and the zlib + // C library, binary patches cannot be compared directly as the patch + // data is slightly different when re-encoded by Go. + {File: "binary_modify.patch", SkipTextCompare: true}, + {File: "binary_new.patch", SkipTextCompare: true}, + {File: "binary_modify_nodata.patch"}, + } + + for _, patch := range patches { + t.Run(patch.File, func(t *testing.T) { + b, err := os.ReadFile(filepath.Join("testdata", "string", patch.File)) + if err != nil { + t.Fatalf("failed to read patch: %v", err) + } + + original := assertParseSingleFile(t, b, "patch") + str := original.String() + + if !patch.SkipTextCompare { + if string(b) != str { + t.Errorf("incorrect patch text\nexpected: %q\n actual: %q\n", string(b), str) + } + } + + reparsed := assertParseSingleFile(t, []byte(str), "formatted patch") + assertFilesEqual(t, original, reparsed) + }) + } +} + +func assertParseSingleFile(t *testing.T, b []byte, kind string) *File { + files, _, err := Parse(bytes.NewReader(b)) + if err != nil { + t.Fatalf("failed to parse %s: %v", kind, err) + } + if len(files) != 1 { + t.Fatalf("expected %s to contain a single files, but found %d", kind, len(files)) + } + return files[0] +} + +func assertFilesEqual(t *testing.T, expected, actual *File) { + assertEqual(t, expected.OldName, actual.OldName, "OldName") + assertEqual(t, expected.NewName, actual.NewName, "NewName") + + assertEqual(t, expected.IsNew, actual.IsNew, "IsNew") + assertEqual(t, expected.IsDelete, actual.IsDelete, "IsDelete") + assertEqual(t, expected.IsCopy, actual.IsCopy, "IsCopy") + assertEqual(t, expected.IsRename, actual.IsRename, "IsRename") + + assertEqual(t, expected.OldMode, actual.OldMode, "OldMode") + assertEqual(t, expected.NewMode, actual.NewMode, "NewMode") + + assertEqual(t, expected.OldOIDPrefix, actual.OldOIDPrefix, "OldOIDPrefix") + assertEqual(t, expected.NewOIDPrefix, actual.NewOIDPrefix, "NewOIDPrefix") + assertEqual(t, expected.Score, actual.Score, "Score") + + if len(expected.TextFragments) == len(actual.TextFragments) { + for i := range expected.TextFragments { + prefix := fmt.Sprintf("TextFragments[%d].", i) + ef := expected.TextFragments[i] + af := actual.TextFragments[i] + + assertEqual(t, ef.Comment, af.Comment, prefix+"Comment") + + assertEqual(t, ef.OldPosition, af.OldPosition, prefix+"OldPosition") + assertEqual(t, ef.OldLines, af.OldLines, prefix+"OldLines") + + assertEqual(t, ef.NewPosition, af.NewPosition, prefix+"NewPosition") + assertEqual(t, ef.NewLines, af.NewLines, prefix+"NewLines") + + assertEqual(t, ef.LinesAdded, af.LinesAdded, prefix+"LinesAdded") + assertEqual(t, ef.LinesDeleted, af.LinesDeleted, prefix+"LinesDeleted") + + assertEqual(t, ef.LeadingContext, af.LeadingContext, prefix+"LeadingContext") + assertEqual(t, ef.TrailingContext, af.TrailingContext, prefix+"TrailingContext") + + if !slices.Equal(ef.Lines, af.Lines) { + t.Errorf("%sLines: expected %#v, actual %#v", prefix, ef.Lines, af.Lines) + } + } + } else { + t.Errorf("TextFragments: expected length %d, actual length %d", len(expected.TextFragments), len(actual.TextFragments)) + } + + assertEqual(t, expected.IsBinary, actual.IsBinary, "IsBinary") + + if expected.BinaryFragment != nil { + if actual.BinaryFragment == nil { + t.Errorf("BinaryFragment: expected non-nil, actual is nil") + } else { + ef := expected.BinaryFragment + af := expected.BinaryFragment + + assertEqual(t, ef.Method, af.Method, "BinaryFragment.Method") + assertEqual(t, ef.Size, af.Size, "BinaryFragment.Size") + + if !slices.Equal(ef.Data, af.Data) { + t.Errorf("BinaryFragment.Data: expected %#v, actual %#v", ef.Data, af.Data) + } + } + } else if actual.BinaryFragment != nil { + t.Errorf("BinaryFragment: expected nil, actual is non-nil") + } + + if expected.ReverseBinaryFragment != nil { + if actual.ReverseBinaryFragment == nil { + t.Errorf("ReverseBinaryFragment: expected non-nil, actual is nil") + } else { + ef := expected.ReverseBinaryFragment + af := expected.ReverseBinaryFragment + + assertEqual(t, ef.Method, af.Method, "ReverseBinaryFragment.Method") + assertEqual(t, ef.Size, af.Size, "ReverseBinaryFragment.Size") + + if !slices.Equal(ef.Data, af.Data) { + t.Errorf("ReverseBinaryFragment.Data: expected %#v, actual %#v", ef.Data, af.Data) + } + } + } else if actual.ReverseBinaryFragment != nil { + t.Errorf("ReverseBinaryFragment: expected nil, actual is non-nil") + } +} + +func assertEqual[T comparable](t *testing.T, expected, actual T, name string) { + if expected != actual { + t.Errorf("%s: expected %#v, actual %#v", name, expected, actual) + } +} diff --git a/gitdiff/format_test.go b/gitdiff/format_test.go new file mode 100644 index 0000000..3325296 --- /dev/null +++ b/gitdiff/format_test.go @@ -0,0 +1,28 @@ +package gitdiff + +import ( + "strings" + "testing" +) + +func TestFormatter_WriteQuotedName(t *testing.T) { + tests := []struct { + Input string + Expected string + }{ + {"noquotes.txt", `noquotes.txt`}, + {"no quotes.txt", `no quotes.txt`}, + {"new\nline", `"new\nline"`}, + {"escape\x1B null\x00", `"escape\033 null\000"`}, + {"snowman \u2603 snowman", `"snowman \342\230\203 snowman"`}, + {"\"already quoted\"", `"\"already quoted\""`}, + } + + for _, test := range tests { + var b strings.Builder + newFormatter(&b).WriteQuotedName(test.Input) + if b.String() != test.Expected { + t.Errorf("expected %q, got %q", test.Expected, b.String()) + } + } +} diff --git a/gitdiff/gitdiff.go b/gitdiff/gitdiff.go index 18645bd..5365645 100644 --- a/gitdiff/gitdiff.go +++ b/gitdiff/gitdiff.go @@ -4,6 +4,7 @@ import ( "errors" "fmt" "os" + "strings" ) // File describes changes to a single file. It can be either a text file or a @@ -38,6 +39,15 @@ type File struct { ReverseBinaryFragment *BinaryFragment } +// String returns a git diff representation of this file. The value can be +// parsed by this library to obtain the same File, but may not be the same as +// the original input. +func (f *File) String() string { + var diff strings.Builder + newFormatter(&diff).FormatFile(f) + return diff.String() +} + // TextFragment describes changed lines starting at a specific line in a text file. type TextFragment struct { Comment string @@ -57,9 +67,20 @@ type TextFragment struct { Lines []Line } -// Header returns the canonical header of this fragment. +// String returns a git diff format of this fragment. See [File.String] for +// more details on this format. +func (f *TextFragment) String() string { + var diff strings.Builder + newFormatter(&diff).FormatTextFragment(f) + return diff.String() +} + +// Header returns a git diff header of this fragment. See [File.String] for +// more details on this format. func (f *TextFragment) Header() string { - return fmt.Sprintf("@@ -%d,%d +%d,%d @@ %s", f.OldPosition, f.OldLines, f.NewPosition, f.NewLines, f.Comment) + var hdr strings.Builder + newFormatter(&hdr).FormatTextFragmentHeader(f) + return hdr.String() } // Validate checks that the fragment is self-consistent and appliable. Validate @@ -197,3 +218,13 @@ const ( // BinaryPatchLiteral indicates the data is the exact file content BinaryPatchLiteral ) + +// String returns a git diff format of this fragment. Due to differences in +// zlib implementation between Go and Git, encoded binary data in the result +// will likely differ from what Git produces for the same input. See +// [File.String] for more details on this format. +func (f *BinaryFragment) String() string { + var diff strings.Builder + newFormatter(&diff).FormatBinaryFragment(f) + return diff.String() +} diff --git a/gitdiff/parser.go b/gitdiff/parser.go index d44465a..e8f8430 100644 --- a/gitdiff/parser.go +++ b/gitdiff/parser.go @@ -12,6 +12,10 @@ import ( // Parse parses a patch with changes to one or more files. Any content before // the first file is returned as the second value. If an error occurs while // parsing, it returns all files parsed before the error. +// +// Parse expects to receive a single patch. If the input may contain multiple +// patches (for example, if it is an mbox file), callers should split it into +// individual patches and call Parse on each one. func Parse(r io.Reader) ([]*File, string, error) { p := newParser(r) @@ -29,6 +33,9 @@ func Parse(r io.Reader) ([]*File, string, error) { if err != nil { return files, preamble, err } + if len(files) == 0 { + preamble = pre + } if file == nil { break } @@ -46,9 +53,6 @@ func Parse(r io.Reader) ([]*File, string, error) { } } - if len(files) == 0 { - preamble = pre - } files = append(files, file) } diff --git a/gitdiff/parser_test.go b/gitdiff/parser_test.go index 30f59f4..15a5d67 100644 --- a/gitdiff/parser_test.go +++ b/gitdiff/parser_test.go @@ -281,8 +281,13 @@ this is another line --- could this be a header? nope, it's just some dashes `, - Output: nil, - Preamble: "", + Output: nil, + Preamble: ` +this is a line +this is another line +--- could this be a header? +nope, it's just some dashes +`, }, "detatchedFragmentLike": { Input: ` @@ -290,6 +295,10 @@ a wild fragment appears? @@ -1,3 +1,4 ~1,5 @@ `, Output: nil, + Preamble: ` +a wild fragment appears? +@@ -1,3 +1,4 ~1,5 @@ +`, }, "detatchedFragment": { Input: ` @@ -426,6 +435,11 @@ Date: Tue Apr 2 22:55:40 2019 -0700 }, Preamble: textPreamble, }, + "noFiles": { + InputFile: "testdata/no_files.patch", + Output: nil, + Preamble: textPreamble, + }, "newBinaryFile": { InputFile: "testdata/new_binary_file.patch", Output: []*File{ diff --git a/gitdiff/patch_header.go b/gitdiff/patch_header.go index f935c6d..f047059 100644 --- a/gitdiff/patch_header.go +++ b/gitdiff/patch_header.go @@ -68,55 +68,6 @@ func (h *PatchHeader) Message() string { return msg.String() } -// PatchIdentity identifies a person who authored or committed a patch. -type PatchIdentity struct { - Name string - Email string -} - -func (i PatchIdentity) String() string { - name := i.Name - if name == "" { - name = `""` - } - return fmt.Sprintf("%s <%s>", name, i.Email) -} - -// ParsePatchIdentity parses a patch identity string. A valid string contains a -// non-empty name followed by an email address in angle brackets. Like Git, -// ParsePatchIdentity does not require that the email address is valid or -// properly formatted, only that it is non-empty. The name must not contain a -// left angle bracket, '<', and the email address must not contain a right -// angle bracket, '>'. -func ParsePatchIdentity(s string) (PatchIdentity, error) { - var emailStart, emailEnd int - for i, c := range s { - if c == '<' && emailStart == 0 { - emailStart = i + 1 - } - if c == '>' && emailStart > 0 { - emailEnd = i - break - } - } - if emailStart > 0 && emailEnd == 0 { - return PatchIdentity{}, fmt.Errorf("invalid identity string: unclosed email section: %s", s) - } - - var name, email string - if emailStart > 0 { - name = strings.TrimSpace(s[:emailStart-1]) - } - if emailStart > 0 && emailEnd > 0 { - email = strings.TrimSpace(s[emailStart:emailEnd]) - } - if name == "" || email == "" { - return PatchIdentity{}, fmt.Errorf("invalid identity string: %s", s) - } - - return PatchIdentity{Name: name, Email: email}, nil -} - // ParsePatchDate parses a patch date string. It returns the parsed time or an // error if s has an unknown format. ParsePatchDate supports the iso, rfc, // short, raw, unix, and default formats (with local variants) used by the @@ -418,16 +369,13 @@ func parseHeaderMail(mailLine string, r io.Reader, opts patchHeaderOptions) (*Pa } } - addrs, err := msg.Header.AddressList("From") - if err != nil && !errors.Is(err, mail.ErrHeaderNotPresent) { - return nil, err - } - if len(addrs) > 0 { - addr := addrs[0] - if addr.Name == "" { - addr.Name = addr.Address + from := msg.Header.Get("From") + if from != "" { + u, err := ParsePatchIdentity(from) + if err != nil { + return nil, err } - h.Author = &PatchIdentity{Name: addr.Name, Email: addr.Address} + h.Author = &u } date := msg.Header.Get("Date") diff --git a/gitdiff/patch_header_test.go b/gitdiff/patch_header_test.go index 003e8c7..c8559b0 100644 --- a/gitdiff/patch_header_test.go +++ b/gitdiff/patch_header_test.go @@ -5,65 +5,6 @@ import ( "time" ) -func TestParsePatchIdentity(t *testing.T) { - tests := map[string]struct { - Input string - Output PatchIdentity - Err interface{} - }{ - "simple": { - Input: "Morton Haypenny ", - Output: PatchIdentity{ - Name: "Morton Haypenny", - Email: "mhaypenny@example.com", - }, - }, - "extraWhitespace": { - Input: " Morton Haypenny ", - Output: PatchIdentity{ - Name: "Morton Haypenny", - Email: "mhaypenny@example.com", - }, - }, - "trailingCharacters": { - Input: "Morton Haypenny unrelated garbage", - Output: PatchIdentity{ - Name: "Morton Haypenny", - Email: "mhaypenny@example.com", - }, - }, - "missingName": { - Input: "", - Err: "invalid identity", - }, - "missingEmail": { - Input: "Morton Haypenny", - Err: "invalid identity", - }, - "unclosedEmail": { - Input: "Morton Haypenny +Date: Sat, 11 Apr 2020 15:21:23 -0700 +Subject: [PATCH] A sample commit to test header parsing + +The medium format shows the body, which +may wrap on to multiple lines. + +Another body line. +`, + Header: PatchHeader{ + SHA: expectedSHA, + Author: &PatchIdentity{ + Name: "dependabot[bot]", + Email: "12345+dependabot[bot]@users.noreply.github.com", + }, + AuthorDate: expectedDate, + Title: expectedTitle, + Body: expectedBody, + }, + }, "mailboxAppendix": { Input: `From 61f5cd90bed4d204ee3feb3aa41ee91d4734855b Mon Sep 17 00:00:00 2001 From: Morton Haypenny diff --git a/gitdiff/patch_identity.go b/gitdiff/patch_identity.go new file mode 100644 index 0000000..018f80c --- /dev/null +++ b/gitdiff/patch_identity.go @@ -0,0 +1,166 @@ +package gitdiff + +import ( + "fmt" + "strings" +) + +// PatchIdentity identifies a person who authored or committed a patch. +type PatchIdentity struct { + Name string + Email string +} + +func (i PatchIdentity) String() string { + name := i.Name + if name == "" { + name = `""` + } + return fmt.Sprintf("%s <%s>", name, i.Email) +} + +// ParsePatchIdentity parses a patch identity string. A patch identity contains +// an email address and an optional name in [RFC 5322] format. This is either a +// plain email adddress or a name followed by an address in angle brackets: +// +// author@example.com +// Author Name +// +// If the input is not one of these formats, ParsePatchIdentity applies a +// heuristic to separate the name and email portions. If both the name and +// email are missing or empty, ParsePatchIdentity returns an error. It +// otherwise does not validate the result. +// +// [RFC 5322]: https://datatracker.ietf.org/doc/html/rfc5322 +func ParsePatchIdentity(s string) (PatchIdentity, error) { + s = normalizeSpace(s) + s = unquotePairs(s) + + var name, email string + if at := strings.IndexByte(s, '@'); at >= 0 { + start, end := at, at + for start >= 0 && !isRFC5332Space(s[start]) && s[start] != '<' { + start-- + } + for end < len(s) && !isRFC5332Space(s[end]) && s[end] != '>' { + end++ + } + email = s[start+1 : end] + + // Adjust the boundaries so that we drop angle brackets, but keep + // spaces when removing the email to form the name. + if start < 0 || s[start] != '<' { + start++ + } + if end >= len(s) || s[end] != '>' { + end-- + } + name = s[:start] + s[end+1:] + } else { + start, end := 0, 0 + for i := 0; i < len(s); i++ { + if s[i] == '<' && start == 0 { + start = i + 1 + } + if s[i] == '>' && start > 0 { + end = i + break + } + } + if start > 0 && end >= start { + email = strings.TrimSpace(s[start:end]) + name = s[:start-1] + } + } + + // After extracting the email, the name might contain extra whitespace + // again and may be surrounded by comment characters. The git source gives + // these examples of when this can happen: + // + // "Name " + // "email@domain (Name)" + // "Name (Comment)" + // + name = normalizeSpace(name) + if strings.HasPrefix(name, "(") && strings.HasSuffix(name, ")") { + name = name[1 : len(name)-1] + } + name = strings.TrimSpace(name) + + // If the name is empty or contains email-like characters, use the email + // instead (assuming one exists) + if name == "" || strings.ContainsAny(name, "@<>") { + name = email + } + + if name == "" && email == "" { + return PatchIdentity{}, fmt.Errorf("invalid identity string %q", s) + } + return PatchIdentity{Name: name, Email: email}, nil +} + +// unquotePairs process the RFC5322 tokens "quoted-string" and "comment" to +// remove any "quoted-pairs" (backslash-espaced characters). It also removes +// the quotes from any quoted strings, but leaves the comment delimiters. +func unquotePairs(s string) string { + quote := false + comments := 0 + escaped := false + + var out strings.Builder + for i := 0; i < len(s); i++ { + if escaped { + escaped = false + } else { + switch s[i] { + case '\\': + // quoted-pair is only allowed in quoted-string/comment + if quote || comments > 0 { + escaped = true + continue // drop '\' character + } + + case '"': + if comments == 0 { + quote = !quote + continue // drop '"' character + } + + case '(': + if !quote { + comments++ + } + case ')': + if comments > 0 { + comments-- + } + } + } + out.WriteByte(s[i]) + } + return out.String() +} + +// normalizeSpace trims leading and trailing whitespace from s and converts +// inner sequences of one or more whitespace characters to single spaces. +func normalizeSpace(s string) string { + var sb strings.Builder + for i := 0; i < len(s); i++ { + c := s[i] + if !isRFC5332Space(c) { + if sb.Len() > 0 && isRFC5332Space(s[i-1]) { + sb.WriteByte(' ') + } + sb.WriteByte(c) + } + } + return sb.String() +} + +func isRFC5332Space(c byte) bool { + switch c { + case '\t', '\n', '\r', ' ': + return true + } + return false +} diff --git a/gitdiff/patch_identity_test.go b/gitdiff/patch_identity_test.go new file mode 100644 index 0000000..f15fe38 --- /dev/null +++ b/gitdiff/patch_identity_test.go @@ -0,0 +1,127 @@ +package gitdiff + +import ( + "testing" +) + +func TestParsePatchIdentity(t *testing.T) { + tests := map[string]struct { + Input string + Output PatchIdentity + Err interface{} + }{ + "simple": { + Input: "Morton Haypenny ", + Output: PatchIdentity{ + Name: "Morton Haypenny", + Email: "mhaypenny@example.com", + }, + }, + "extraWhitespace": { + Input: "\t Morton Haypenny \r\n ", + Output: PatchIdentity{ + Name: "Morton Haypenny", + Email: "mhaypenny@example.com", + }, + }, + "trailingCharacters": { + Input: "Morton Haypenny II", + Output: PatchIdentity{ + Name: "Morton Haypenny II", + Email: "mhaypenny@example.com", + }, + }, + "onlyEmail": { + Input: "mhaypenny@example.com", + Output: PatchIdentity{ + Name: "mhaypenny@example.com", + Email: "mhaypenny@example.com", + }, + }, + "onlyEmailInBrackets": { + Input: "", + Output: PatchIdentity{ + Name: "mhaypenny@example.com", + Email: "mhaypenny@example.com", + }, + }, + "rfc5322SpecialCharacters": { + Input: `"dependabot[bot]" <12345+dependabot[bot]@users.noreply.github.com>`, + Output: PatchIdentity{ + Name: "dependabot[bot]", + Email: "12345+dependabot[bot]@users.noreply.github.com", + }, + }, + "rfc5322QuotedPairs": { + Input: `"Morton \"Old-Timer\" Haypenny" <"mhaypenny\+[1900]"@example.com> (III \(PhD\))`, + Output: PatchIdentity{ + Name: `Morton "Old-Timer" Haypenny (III (PhD))`, + Email: "mhaypenny+[1900]@example.com", + }, + }, + "rfc5322QuotedPairsOutOfContext": { + Input: `Morton \\Backslash Haypenny `, + Output: PatchIdentity{ + Name: `Morton \\Backslash Haypenny`, + Email: "mhaypenny@example.com", + }, + }, + "emptyEmail": { + Input: "Morton Haypenny <>", + Output: PatchIdentity{ + Name: "Morton Haypenny", + Email: "", + }, + }, + "unclosedEmail": { + Input: "Morton Haypenny ", + Output: PatchIdentity{ + Name: "Morton Haypenny", + Email: "mhaypenny", + }, + }, + "bogusEmailWithWhitespace": { + Input: "Morton Haypenny < mhaypenny >", + Output: PatchIdentity{ + Name: "Morton Haypenny", + Email: "mhaypenny", + }, + }, + "missingEmail": { + Input: "Morton Haypenny", + Err: "invalid identity", + }, + "missingNameAndEmptyEmail": { + Input: "<>", + Err: "invalid identity", + }, + "empty": { + Input: "", + Err: "invalid identity", + }, + } + + for name, test := range tests { + t.Run(name, func(t *testing.T) { + id, err := ParsePatchIdentity(test.Input) + if test.Err != nil { + assertError(t, test.Err, err, "parsing identity") + return + } + if err != nil { + t.Fatalf("unexpected error parsing identity: %v", err) + } + + if test.Output != id { + t.Errorf("incorrect identity: expected %#v, actual %#v", test.Output, id) + } + }) + } +} diff --git a/gitdiff/testdata/apply/text_fragment_change_end_eol.out b/gitdiff/testdata/apply/text_fragment_change_end_eol.out new file mode 100644 index 0000000..8cf2f17 --- /dev/null +++ b/gitdiff/testdata/apply/text_fragment_change_end_eol.out @@ -0,0 +1,3 @@ +line 1 +line 2 +line 3 \ No newline at end of file diff --git a/gitdiff/testdata/apply/text_fragment_change_end_eol.patch b/gitdiff/testdata/apply/text_fragment_change_end_eol.patch new file mode 100644 index 0000000..f1c9477 --- /dev/null +++ b/gitdiff/testdata/apply/text_fragment_change_end_eol.patch @@ -0,0 +1,10 @@ +diff --git a/gitdiff/testdata/apply/text_fragment_remove_last_eol.src b/gitdiff/testdata/apply/text_fragment_remove_last_eol.src +index a92d664..8cf2f17 100644 +--- a/gitdiff/testdata/apply/text_fragment_remove_last_eol.src ++++ b/gitdiff/testdata/apply/text_fragment_remove_last_eol.src +@@ -1,3 +1,3 @@ + line 1 + line 2 +-line 3 ++line 3 +\ No newline at end of file diff --git a/gitdiff/testdata/apply/text_fragment_change_end_eol.src b/gitdiff/testdata/apply/text_fragment_change_end_eol.src new file mode 100644 index 0000000..a92d664 --- /dev/null +++ b/gitdiff/testdata/apply/text_fragment_change_end_eol.src @@ -0,0 +1,3 @@ +line 1 +line 2 +line 3 diff --git a/gitdiff/testdata/no_files.patch b/gitdiff/testdata/no_files.patch new file mode 100644 index 0000000..9eea12d --- /dev/null +++ b/gitdiff/testdata/no_files.patch @@ -0,0 +1,8 @@ +commit 5d9790fec7d95aa223f3d20936340bf55ff3dcbe +Author: Morton Haypenny +Date: Tue Apr 2 22:55:40 2019 -0700 + + A file with multiple fragments. + + The content is arbitrary. + diff --git a/gitdiff/testdata/string/binary_modify.patch b/gitdiff/testdata/string/binary_modify.patch new file mode 100644 index 0000000..12ddad5 --- /dev/null +++ b/gitdiff/testdata/string/binary_modify.patch @@ -0,0 +1,9 @@ +diff --git a/file.bin b/file.bin +index a7f4d5d6975ec021016c02b6d58345ebf434f38c..bdc9a70f055892146612dcdb413f0e339faaa0df 100644 +GIT binary patch +delta 66 +QcmeZhVVvM$!$1K50C&Ox;s5{u + +delta 5 +McmZo+^qAlQ00i9urT_o{ + diff --git a/gitdiff/testdata/string/binary_modify_nodata.patch b/gitdiff/testdata/string/binary_modify_nodata.patch new file mode 100644 index 0000000..833a534 --- /dev/null +++ b/gitdiff/testdata/string/binary_modify_nodata.patch @@ -0,0 +1,3 @@ +diff --git a/file.bin b/file.bin +index a7f4d5d..bdc9a70 100644 +Binary files a/file.bin and b/file.bin differ diff --git a/gitdiff/testdata/string/binary_new.patch b/gitdiff/testdata/string/binary_new.patch new file mode 100644 index 0000000..c56f35e --- /dev/null +++ b/gitdiff/testdata/string/binary_new.patch @@ -0,0 +1,11 @@ +diff --git a/file.bin b/file.bin +new file mode 100644 +index 0000000000000000000000000000000000000000..a7f4d5d6975ec021016c02b6d58345ebf434f38c +GIT binary patch +literal 72 +zcmV-O0Jr~td-`u6JcK&{KDK=