From 13e8639a0b49cb49741b05f11ac52c530d2d38f0 Mon Sep 17 00:00:00 2001 From: Billy Keyes Date: Sun, 25 Feb 2024 12:39:09 -0800 Subject: [PATCH 01/12] Clarify that Parse expects a single patch --- gitdiff/parser.go | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/gitdiff/parser.go b/gitdiff/parser.go index d44465a..d2b29cb 100644 --- a/gitdiff/parser.go +++ b/gitdiff/parser.go @@ -12,6 +12,10 @@ import ( // Parse parses a patch with changes to one or more files. Any content before // the first file is returned as the second value. If an error occurs while // parsing, it returns all files parsed before the error. +// +// Parse expects to receive a single patch. If the input may contain multiple +// patches (for example, if it is an mbox file), callers should split it into +// individual patches and call Parse on each one. func Parse(r io.Reader) ([]*File, string, error) { p := newParser(r) From 3f2ea5c16e0a9d685e1b91b6b8bfc0fbf4cb5d6a Mon Sep 17 00:00:00 2001 From: Billy Keyes Date: Wed, 6 Mar 2024 21:32:38 -0800 Subject: [PATCH 02/12] Accept empty emails in ParsePatchIdentity (#42) Git is actually more lenient here than I thought. As long as the identity contains the "<>" delimiters, Git will allow an empty email, so we should accept the same thing. I also discovered that an identity with only an email set will use the email as the name, so I've implemented that behavior as well. --- gitdiff/patch_header.go | 21 ++++++++++++++------- gitdiff/patch_header_test.go | 22 ++++++++++++++++++++-- 2 files changed, 34 insertions(+), 9 deletions(-) diff --git a/gitdiff/patch_header.go b/gitdiff/patch_header.go index f935c6d..af3293b 100644 --- a/gitdiff/patch_header.go +++ b/gitdiff/patch_header.go @@ -82,12 +82,15 @@ func (i PatchIdentity) String() string { return fmt.Sprintf("%s <%s>", name, i.Email) } -// ParsePatchIdentity parses a patch identity string. A valid string contains a -// non-empty name followed by an email address in angle brackets. Like Git, -// ParsePatchIdentity does not require that the email address is valid or -// properly formatted, only that it is non-empty. The name must not contain a -// left angle bracket, '<', and the email address must not contain a right -// angle bracket, '>'. +// ParsePatchIdentity parses a patch identity string. A valid string contains +// an optional name followed by an email address in angle brackets. The angle +// brackets must always exist, but may enclose an empty address. At least one +// of the name or the email address must be non-empty. If the string only +// contains an email address, that value is also used as the name. +// +// The name must not contain a left angle bracket, '<', and the email address +// must not contain a right angle bracket, '>'. Otherwise, there are no +// restrictions on the format of either field. func ParsePatchIdentity(s string) (PatchIdentity, error) { var emailStart, emailEnd int for i, c := range s { @@ -110,7 +113,11 @@ func ParsePatchIdentity(s string) (PatchIdentity, error) { if emailStart > 0 && emailEnd > 0 { email = strings.TrimSpace(s[emailStart:emailEnd]) } - if name == "" || email == "" { + if name == "" && email != "" { + name = email + } + + if name == "" && email == "" { return PatchIdentity{}, fmt.Errorf("invalid identity string: %s", s) } diff --git a/gitdiff/patch_header_test.go b/gitdiff/patch_header_test.go index 003e8c7..33ec209 100644 --- a/gitdiff/patch_header_test.go +++ b/gitdiff/patch_header_test.go @@ -32,14 +32,32 @@ func TestParsePatchIdentity(t *testing.T) { Email: "mhaypenny@example.com", }, }, - "missingName": { + "onlyEmail": { Input: "", - Err: "invalid identity", + Output: PatchIdentity{ + Name: "mhaypenny@example.com", + Email: "mhaypenny@example.com", + }, + }, + "emptyEmail": { + Input: "Morton Haypenny <>", + Output: PatchIdentity{ + Name: "Morton Haypenny", + Email: "", + }, }, "missingEmail": { Input: "Morton Haypenny", Err: "invalid identity", }, + "missingNameAndEmptyEmail": { + Input: "<>", + Err: "invalid identity", + }, + "empty": { + Input: "", + Err: "invalid identity", + }, "unclosedEmail": { Input: "Morton Haypenny Date: Sun, 5 May 2024 23:16:19 -0400 Subject: [PATCH 03/12] Follow git logic when parsing patch identities (#44) When GitHub creates patches for Dependabot PRs, it generates a "From:" line that is not valid according to RFC 5322: the address spec contains unquoted special characters (the "[bot]" in "dependabot[bot]"). While the 'net/mail' parser makes some exceptions to the spec, this is not one of them, so parsing these patch headers fails. Git's 'mailinfo' command avoids this by only implementing the unquoting part of RFC 5322 and then applying a heuristic to separate the string in to name and email values that seem reasonable. This commit does two things: 1. Reimplements ParsePatchIdentity to follow Git's logic, so that it can accept a wider range of inputs, including quoted strings. Strings accepted by the previous implementation parse in the same way with one exception: inputs that contain whitespace inside the angle brackets for an email address now use the email address as the name and drop any separate name component. 2. When parsing mail-formatted patches, use ParsePatchIdentity to parse the "From:" line instead of the 'net/mail' function. --- gitdiff/patch_header.go | 71 ++------------ gitdiff/patch_header_test.go | 99 +++++--------------- gitdiff/patch_identity.go | 166 +++++++++++++++++++++++++++++++++ gitdiff/patch_identity_test.go | 127 +++++++++++++++++++++++++ 4 files changed, 321 insertions(+), 142 deletions(-) create mode 100644 gitdiff/patch_identity.go create mode 100644 gitdiff/patch_identity_test.go diff --git a/gitdiff/patch_header.go b/gitdiff/patch_header.go index af3293b..f047059 100644 --- a/gitdiff/patch_header.go +++ b/gitdiff/patch_header.go @@ -68,62 +68,6 @@ func (h *PatchHeader) Message() string { return msg.String() } -// PatchIdentity identifies a person who authored or committed a patch. -type PatchIdentity struct { - Name string - Email string -} - -func (i PatchIdentity) String() string { - name := i.Name - if name == "" { - name = `""` - } - return fmt.Sprintf("%s <%s>", name, i.Email) -} - -// ParsePatchIdentity parses a patch identity string. A valid string contains -// an optional name followed by an email address in angle brackets. The angle -// brackets must always exist, but may enclose an empty address. At least one -// of the name or the email address must be non-empty. If the string only -// contains an email address, that value is also used as the name. -// -// The name must not contain a left angle bracket, '<', and the email address -// must not contain a right angle bracket, '>'. Otherwise, there are no -// restrictions on the format of either field. -func ParsePatchIdentity(s string) (PatchIdentity, error) { - var emailStart, emailEnd int - for i, c := range s { - if c == '<' && emailStart == 0 { - emailStart = i + 1 - } - if c == '>' && emailStart > 0 { - emailEnd = i - break - } - } - if emailStart > 0 && emailEnd == 0 { - return PatchIdentity{}, fmt.Errorf("invalid identity string: unclosed email section: %s", s) - } - - var name, email string - if emailStart > 0 { - name = strings.TrimSpace(s[:emailStart-1]) - } - if emailStart > 0 && emailEnd > 0 { - email = strings.TrimSpace(s[emailStart:emailEnd]) - } - if name == "" && email != "" { - name = email - } - - if name == "" && email == "" { - return PatchIdentity{}, fmt.Errorf("invalid identity string: %s", s) - } - - return PatchIdentity{Name: name, Email: email}, nil -} - // ParsePatchDate parses a patch date string. It returns the parsed time or an // error if s has an unknown format. ParsePatchDate supports the iso, rfc, // short, raw, unix, and default formats (with local variants) used by the @@ -425,16 +369,13 @@ func parseHeaderMail(mailLine string, r io.Reader, opts patchHeaderOptions) (*Pa } } - addrs, err := msg.Header.AddressList("From") - if err != nil && !errors.Is(err, mail.ErrHeaderNotPresent) { - return nil, err - } - if len(addrs) > 0 { - addr := addrs[0] - if addr.Name == "" { - addr.Name = addr.Address + from := msg.Header.Get("From") + if from != "" { + u, err := ParsePatchIdentity(from) + if err != nil { + return nil, err } - h.Author = &PatchIdentity{Name: addr.Name, Email: addr.Address} + h.Author = &u } date := msg.Header.Get("Date") diff --git a/gitdiff/patch_header_test.go b/gitdiff/patch_header_test.go index 33ec209..c8559b0 100644 --- a/gitdiff/patch_header_test.go +++ b/gitdiff/patch_header_test.go @@ -5,83 +5,6 @@ import ( "time" ) -func TestParsePatchIdentity(t *testing.T) { - tests := map[string]struct { - Input string - Output PatchIdentity - Err interface{} - }{ - "simple": { - Input: "Morton Haypenny ", - Output: PatchIdentity{ - Name: "Morton Haypenny", - Email: "mhaypenny@example.com", - }, - }, - "extraWhitespace": { - Input: " Morton Haypenny ", - Output: PatchIdentity{ - Name: "Morton Haypenny", - Email: "mhaypenny@example.com", - }, - }, - "trailingCharacters": { - Input: "Morton Haypenny unrelated garbage", - Output: PatchIdentity{ - Name: "Morton Haypenny", - Email: "mhaypenny@example.com", - }, - }, - "onlyEmail": { - Input: "", - Output: PatchIdentity{ - Name: "mhaypenny@example.com", - Email: "mhaypenny@example.com", - }, - }, - "emptyEmail": { - Input: "Morton Haypenny <>", - Output: PatchIdentity{ - Name: "Morton Haypenny", - Email: "", - }, - }, - "missingEmail": { - Input: "Morton Haypenny", - Err: "invalid identity", - }, - "missingNameAndEmptyEmail": { - Input: "<>", - Err: "invalid identity", - }, - "empty": { - Input: "", - Err: "invalid identity", - }, - "unclosedEmail": { - Input: "Morton Haypenny +Date: Sat, 11 Apr 2020 15:21:23 -0700 +Subject: [PATCH] A sample commit to test header parsing + +The medium format shows the body, which +may wrap on to multiple lines. + +Another body line. +`, + Header: PatchHeader{ + SHA: expectedSHA, + Author: &PatchIdentity{ + Name: "dependabot[bot]", + Email: "12345+dependabot[bot]@users.noreply.github.com", + }, + AuthorDate: expectedDate, + Title: expectedTitle, + Body: expectedBody, + }, + }, "mailboxAppendix": { Input: `From 61f5cd90bed4d204ee3feb3aa41ee91d4734855b Mon Sep 17 00:00:00 2001 From: Morton Haypenny diff --git a/gitdiff/patch_identity.go b/gitdiff/patch_identity.go new file mode 100644 index 0000000..018f80c --- /dev/null +++ b/gitdiff/patch_identity.go @@ -0,0 +1,166 @@ +package gitdiff + +import ( + "fmt" + "strings" +) + +// PatchIdentity identifies a person who authored or committed a patch. +type PatchIdentity struct { + Name string + Email string +} + +func (i PatchIdentity) String() string { + name := i.Name + if name == "" { + name = `""` + } + return fmt.Sprintf("%s <%s>", name, i.Email) +} + +// ParsePatchIdentity parses a patch identity string. A patch identity contains +// an email address and an optional name in [RFC 5322] format. This is either a +// plain email adddress or a name followed by an address in angle brackets: +// +// author@example.com +// Author Name +// +// If the input is not one of these formats, ParsePatchIdentity applies a +// heuristic to separate the name and email portions. If both the name and +// email are missing or empty, ParsePatchIdentity returns an error. It +// otherwise does not validate the result. +// +// [RFC 5322]: https://datatracker.ietf.org/doc/html/rfc5322 +func ParsePatchIdentity(s string) (PatchIdentity, error) { + s = normalizeSpace(s) + s = unquotePairs(s) + + var name, email string + if at := strings.IndexByte(s, '@'); at >= 0 { + start, end := at, at + for start >= 0 && !isRFC5332Space(s[start]) && s[start] != '<' { + start-- + } + for end < len(s) && !isRFC5332Space(s[end]) && s[end] != '>' { + end++ + } + email = s[start+1 : end] + + // Adjust the boundaries so that we drop angle brackets, but keep + // spaces when removing the email to form the name. + if start < 0 || s[start] != '<' { + start++ + } + if end >= len(s) || s[end] != '>' { + end-- + } + name = s[:start] + s[end+1:] + } else { + start, end := 0, 0 + for i := 0; i < len(s); i++ { + if s[i] == '<' && start == 0 { + start = i + 1 + } + if s[i] == '>' && start > 0 { + end = i + break + } + } + if start > 0 && end >= start { + email = strings.TrimSpace(s[start:end]) + name = s[:start-1] + } + } + + // After extracting the email, the name might contain extra whitespace + // again and may be surrounded by comment characters. The git source gives + // these examples of when this can happen: + // + // "Name " + // "email@domain (Name)" + // "Name (Comment)" + // + name = normalizeSpace(name) + if strings.HasPrefix(name, "(") && strings.HasSuffix(name, ")") { + name = name[1 : len(name)-1] + } + name = strings.TrimSpace(name) + + // If the name is empty or contains email-like characters, use the email + // instead (assuming one exists) + if name == "" || strings.ContainsAny(name, "@<>") { + name = email + } + + if name == "" && email == "" { + return PatchIdentity{}, fmt.Errorf("invalid identity string %q", s) + } + return PatchIdentity{Name: name, Email: email}, nil +} + +// unquotePairs process the RFC5322 tokens "quoted-string" and "comment" to +// remove any "quoted-pairs" (backslash-espaced characters). It also removes +// the quotes from any quoted strings, but leaves the comment delimiters. +func unquotePairs(s string) string { + quote := false + comments := 0 + escaped := false + + var out strings.Builder + for i := 0; i < len(s); i++ { + if escaped { + escaped = false + } else { + switch s[i] { + case '\\': + // quoted-pair is only allowed in quoted-string/comment + if quote || comments > 0 { + escaped = true + continue // drop '\' character + } + + case '"': + if comments == 0 { + quote = !quote + continue // drop '"' character + } + + case '(': + if !quote { + comments++ + } + case ')': + if comments > 0 { + comments-- + } + } + } + out.WriteByte(s[i]) + } + return out.String() +} + +// normalizeSpace trims leading and trailing whitespace from s and converts +// inner sequences of one or more whitespace characters to single spaces. +func normalizeSpace(s string) string { + var sb strings.Builder + for i := 0; i < len(s); i++ { + c := s[i] + if !isRFC5332Space(c) { + if sb.Len() > 0 && isRFC5332Space(s[i-1]) { + sb.WriteByte(' ') + } + sb.WriteByte(c) + } + } + return sb.String() +} + +func isRFC5332Space(c byte) bool { + switch c { + case '\t', '\n', '\r', ' ': + return true + } + return false +} diff --git a/gitdiff/patch_identity_test.go b/gitdiff/patch_identity_test.go new file mode 100644 index 0000000..f15fe38 --- /dev/null +++ b/gitdiff/patch_identity_test.go @@ -0,0 +1,127 @@ +package gitdiff + +import ( + "testing" +) + +func TestParsePatchIdentity(t *testing.T) { + tests := map[string]struct { + Input string + Output PatchIdentity + Err interface{} + }{ + "simple": { + Input: "Morton Haypenny ", + Output: PatchIdentity{ + Name: "Morton Haypenny", + Email: "mhaypenny@example.com", + }, + }, + "extraWhitespace": { + Input: "\t Morton Haypenny \r\n ", + Output: PatchIdentity{ + Name: "Morton Haypenny", + Email: "mhaypenny@example.com", + }, + }, + "trailingCharacters": { + Input: "Morton Haypenny II", + Output: PatchIdentity{ + Name: "Morton Haypenny II", + Email: "mhaypenny@example.com", + }, + }, + "onlyEmail": { + Input: "mhaypenny@example.com", + Output: PatchIdentity{ + Name: "mhaypenny@example.com", + Email: "mhaypenny@example.com", + }, + }, + "onlyEmailInBrackets": { + Input: "", + Output: PatchIdentity{ + Name: "mhaypenny@example.com", + Email: "mhaypenny@example.com", + }, + }, + "rfc5322SpecialCharacters": { + Input: `"dependabot[bot]" <12345+dependabot[bot]@users.noreply.github.com>`, + Output: PatchIdentity{ + Name: "dependabot[bot]", + Email: "12345+dependabot[bot]@users.noreply.github.com", + }, + }, + "rfc5322QuotedPairs": { + Input: `"Morton \"Old-Timer\" Haypenny" <"mhaypenny\+[1900]"@example.com> (III \(PhD\))`, + Output: PatchIdentity{ + Name: `Morton "Old-Timer" Haypenny (III (PhD))`, + Email: "mhaypenny+[1900]@example.com", + }, + }, + "rfc5322QuotedPairsOutOfContext": { + Input: `Morton \\Backslash Haypenny `, + Output: PatchIdentity{ + Name: `Morton \\Backslash Haypenny`, + Email: "mhaypenny@example.com", + }, + }, + "emptyEmail": { + Input: "Morton Haypenny <>", + Output: PatchIdentity{ + Name: "Morton Haypenny", + Email: "", + }, + }, + "unclosedEmail": { + Input: "Morton Haypenny ", + Output: PatchIdentity{ + Name: "Morton Haypenny", + Email: "mhaypenny", + }, + }, + "bogusEmailWithWhitespace": { + Input: "Morton Haypenny < mhaypenny >", + Output: PatchIdentity{ + Name: "Morton Haypenny", + Email: "mhaypenny", + }, + }, + "missingEmail": { + Input: "Morton Haypenny", + Err: "invalid identity", + }, + "missingNameAndEmptyEmail": { + Input: "<>", + Err: "invalid identity", + }, + "empty": { + Input: "", + Err: "invalid identity", + }, + } + + for name, test := range tests { + t.Run(name, func(t *testing.T) { + id, err := ParsePatchIdentity(test.Input) + if test.Err != nil { + assertError(t, test.Err, err, "parsing identity") + return + } + if err != nil { + t.Fatalf("unexpected error parsing identity: %v", err) + } + + if test.Output != id { + t.Errorf("incorrect identity: expected %#v, actual %#v", test.Output, id) + } + }) + } +} From 0a4e55f9a190b500835e10d187ca4f88a96917e2 Mon Sep 17 00:00:00 2001 From: Billy Keyes Date: Sun, 14 Jul 2024 23:44:16 -0400 Subject: [PATCH 04/12] Return preamble when a patch has no files (#46) While empty patches with only a header were parsable, the parser discarded the preamble content. This meant callers had to handle this case specially. Now, if we reach the end of the input without finding a file, Parse() returns the full content of the patch as the preamble. --- gitdiff/file_header.go | 2 +- gitdiff/parser.go | 6 +++--- gitdiff/parser_test.go | 18 ++++++++++++++++-- gitdiff/testdata/no_files.patch | 8 ++++++++ 4 files changed, 28 insertions(+), 6 deletions(-) create mode 100644 gitdiff/testdata/no_files.patch diff --git a/gitdiff/file_header.go b/gitdiff/file_header.go index 77ceb5d..7ae4bc9 100644 --- a/gitdiff/file_header.go +++ b/gitdiff/file_header.go @@ -57,7 +57,7 @@ func (p *parser) ParseNextFileHeader() (*File, string, error) { return nil, "", err } } - return nil, "", nil + return nil, preamble.String(), nil } func (p *parser) ParseGitFileHeader() (*File, error) { diff --git a/gitdiff/parser.go b/gitdiff/parser.go index d2b29cb..e8f8430 100644 --- a/gitdiff/parser.go +++ b/gitdiff/parser.go @@ -33,6 +33,9 @@ func Parse(r io.Reader) ([]*File, string, error) { if err != nil { return files, preamble, err } + if len(files) == 0 { + preamble = pre + } if file == nil { break } @@ -50,9 +53,6 @@ func Parse(r io.Reader) ([]*File, string, error) { } } - if len(files) == 0 { - preamble = pre - } files = append(files, file) } diff --git a/gitdiff/parser_test.go b/gitdiff/parser_test.go index 30f59f4..15a5d67 100644 --- a/gitdiff/parser_test.go +++ b/gitdiff/parser_test.go @@ -281,8 +281,13 @@ this is another line --- could this be a header? nope, it's just some dashes `, - Output: nil, - Preamble: "", + Output: nil, + Preamble: ` +this is a line +this is another line +--- could this be a header? +nope, it's just some dashes +`, }, "detatchedFragmentLike": { Input: ` @@ -290,6 +295,10 @@ a wild fragment appears? @@ -1,3 +1,4 ~1,5 @@ `, Output: nil, + Preamble: ` +a wild fragment appears? +@@ -1,3 +1,4 ~1,5 @@ +`, }, "detatchedFragment": { Input: ` @@ -426,6 +435,11 @@ Date: Tue Apr 2 22:55:40 2019 -0700 }, Preamble: textPreamble, }, + "noFiles": { + InputFile: "testdata/no_files.patch", + Output: nil, + Preamble: textPreamble, + }, "newBinaryFile": { InputFile: "testdata/new_binary_file.patch", Output: []*File{ diff --git a/gitdiff/testdata/no_files.patch b/gitdiff/testdata/no_files.patch new file mode 100644 index 0000000..9eea12d --- /dev/null +++ b/gitdiff/testdata/no_files.patch @@ -0,0 +1,8 @@ +commit 5d9790fec7d95aa223f3d20936340bf55ff3dcbe +Author: Morton Haypenny +Date: Tue Apr 2 22:55:40 2019 -0700 + + A file with multiple fragments. + + The content is arbitrary. + From 9e0997ef65150db9392595ab9f2a1df593d48d22 Mon Sep 17 00:00:00 2001 From: Billy Keyes Date: Sun, 11 Aug 2024 14:39:23 -0400 Subject: [PATCH 05/12] Update Go and golangci-lint versions (#49) The minimum Go version for the package is now Go 1.21. This is because a future change will use the 'slices' package in test code. Note that non-test code in the package should still be compatible with older versions of Go. As part of this, also update the golangci-lint version to one that works with Go 1.21, which required replacing some deprecated linters. --- .github/workflows/go.yml | 12 ++++++------ .golangci.yml | 28 ++++++++++++++++++++++++---- go.mod | 2 +- 3 files changed, 31 insertions(+), 11 deletions(-) diff --git a/.github/workflows/go.yml b/.github/workflows/go.yml index 558965e..a59475a 100644 --- a/.github/workflows/go.yml +++ b/.github/workflows/go.yml @@ -9,18 +9,18 @@ jobs: name: Verify runs-on: ubuntu-latest steps: - - name: Set up Go 1.19 - uses: actions/setup-go@v3 + - name: Set up Go 1.21 + uses: actions/setup-go@v5 with: - go-version: 1.19 + go-version: 1.21 - name: Check out code into the Go module directory - uses: actions/checkout@v2 + uses: actions/checkout@v4 - name: Lint - uses: golangci/golangci-lint-action@v3 + uses: golangci/golangci-lint-action@v6 with: - version: v1.49 + version: v1.59 - name: Test run: go test -v ./... diff --git a/.golangci.yml b/.golangci.yml index 82dbad2..30b37f8 100644 --- a/.golangci.yml +++ b/.golangci.yml @@ -4,21 +4,41 @@ run: linters: disable-all: true enable: - - deadcode - errcheck - gofmt - goimports - - golint - govet - ineffassign - misspell + - revive - typecheck - unconvert - - varcheck + - unused issues: exclude-use-default: false -linter-settings: +linters-settings: goimports: local-prefixes: github.com/bluekeyes/go-gitdiff + revive: + rules: + # enable all rules from golint + - name: context-keys-type + - name: time-naming + - name: var-declaration + - name: unexported-return + - name: errorf + - name: blank-imports + - name: context-as-argument + - name: dot-imports + - name: error-return + - name: error-strings + - name: error-naming + - name: exported + - name: increment-decrement + - name: var-naming + - name: package-comments + - name: range + - name: receiver-naming + - name: indent-error-flow diff --git a/go.mod b/go.mod index f35826e..27c3738 100644 --- a/go.mod +++ b/go.mod @@ -1,3 +1,3 @@ module github.com/bluekeyes/go-gitdiff -go 1.13 +go 1.21 From 8584cd59afe0fd5af926a98ea3ef650fa4b1f952 Mon Sep 17 00:00:00 2001 From: Billy Keyes Date: Sun, 11 Aug 2024 14:57:07 -0400 Subject: [PATCH 06/12] Add String() methods to parsed types (#48) This enables clients to move back and forth between parsed objects and text patches. The generated patches are semantically equal to the parsed object and should re-parse to the same object, but may not be byte-for-byte identical to the original input. In my testing, formatted text patches are usually identical to the input, but there may be cases where this is not true. Binary patches always differ. This is because Go's 'compress/flate' package ends streams with an empty block instead of adding the end-of-stream flag to the last non-empty block, like Git's C implementation. Since the streams will always be different for this reason, I chose to also enable default compression (the test patches I generated with Git used no compression.) The main tests for this feature involve parsing, formatting, and then re-parsing a patch to make sure we get equal objects. Formatting is handled by a new internal formatter type, which allows writing all data to the same stream. This isn't exposed publicly right now, but will be useful if there's a need for more flexible formatting functions in the future, like formatting to a user-provided io.Writer. --- .golangci.yml | 6 + gitdiff/base85.go | 43 ++- gitdiff/base85_test.go | 58 ++++ gitdiff/format.go | 277 ++++++++++++++++++++ gitdiff/format_roundtrip_test.go | 156 +++++++++++ gitdiff/format_test.go | 28 ++ gitdiff/gitdiff.go | 35 ++- gitdiff/testdata/string/binary_modify.patch | 9 + gitdiff/testdata/string/binary_new.patch | 11 + gitdiff/testdata/string/copy.patch | 4 + gitdiff/testdata/string/copy_modify.patch | 21 ++ gitdiff/testdata/string/delete.patch | 16 ++ gitdiff/testdata/string/mode.patch | 3 + gitdiff/testdata/string/mode_modify.patch | 10 + gitdiff/testdata/string/modify.patch | 16 ++ gitdiff/testdata/string/new.patch | 16 ++ gitdiff/testdata/string/new_empty.patch | 3 + gitdiff/testdata/string/new_mode.patch | 16 ++ gitdiff/testdata/string/rename.patch | 4 + gitdiff/testdata/string/rename_modify.patch | 18 ++ 20 files changed, 746 insertions(+), 4 deletions(-) create mode 100644 gitdiff/format.go create mode 100644 gitdiff/format_roundtrip_test.go create mode 100644 gitdiff/format_test.go create mode 100644 gitdiff/testdata/string/binary_modify.patch create mode 100644 gitdiff/testdata/string/binary_new.patch create mode 100644 gitdiff/testdata/string/copy.patch create mode 100644 gitdiff/testdata/string/copy_modify.patch create mode 100644 gitdiff/testdata/string/delete.patch create mode 100644 gitdiff/testdata/string/mode.patch create mode 100644 gitdiff/testdata/string/mode_modify.patch create mode 100644 gitdiff/testdata/string/modify.patch create mode 100644 gitdiff/testdata/string/new.patch create mode 100644 gitdiff/testdata/string/new_empty.patch create mode 100644 gitdiff/testdata/string/new_mode.patch create mode 100644 gitdiff/testdata/string/rename.patch create mode 100644 gitdiff/testdata/string/rename_modify.patch diff --git a/.golangci.yml b/.golangci.yml index 30b37f8..192c556 100644 --- a/.golangci.yml +++ b/.golangci.yml @@ -19,6 +19,12 @@ issues: exclude-use-default: false linters-settings: + errcheck: + exclude-functions: + - (*github.com/bluekeyes/go-gitdiff/gitdiff.formatter).Write + - (*github.com/bluekeyes/go-gitdiff/gitdiff.formatter).WriteString + - (*github.com/bluekeyes/go-gitdiff/gitdiff.formatter).WriteByte + - fmt.Fprintf(*github.com/bluekeyes/go-gitdiff/gitdiff.formatter) goimports: local-prefixes: github.com/bluekeyes/go-gitdiff revive: diff --git a/gitdiff/base85.go b/gitdiff/base85.go index 385aa64..86db117 100644 --- a/gitdiff/base85.go +++ b/gitdiff/base85.go @@ -19,8 +19,8 @@ func init() { } // base85Decode decodes Base85-encoded data from src into dst. It uses the -// alphabet defined by base85.c in the Git source tree, which appears to be -// unique. src must contain at least len(dst) bytes of encoded data. +// alphabet defined by base85.c in the Git source tree. src must contain at +// least len(dst) bytes of encoded data. func base85Decode(dst, src []byte) error { var v uint32 var n, ndst int @@ -50,3 +50,42 @@ func base85Decode(dst, src []byte) error { } return nil } + +// base85Encode encodes src in Base85, writing the result to dst. It uses the +// alphabet defined by base85.c in the Git source tree. +func base85Encode(dst, src []byte) { + var di, si int + + encode := func(v uint32) { + dst[di+0] = b85Alpha[(v/(85*85*85*85))%85] + dst[di+1] = b85Alpha[(v/(85*85*85))%85] + dst[di+2] = b85Alpha[(v/(85*85))%85] + dst[di+3] = b85Alpha[(v/85)%85] + dst[di+4] = b85Alpha[v%85] + } + + n := (len(src) / 4) * 4 + for si < n { + encode(uint32(src[si+0])<<24 | uint32(src[si+1])<<16 | uint32(src[si+2])<<8 | uint32(src[si+3])) + si += 4 + di += 5 + } + + var v uint32 + switch len(src) - si { + case 3: + v |= uint32(src[si+2]) << 8 + fallthrough + case 2: + v |= uint32(src[si+1]) << 16 + fallthrough + case 1: + v |= uint32(src[si+0]) << 24 + encode(v) + } +} + +// base85Len returns the length of n bytes of Base85 encoded data. +func base85Len(n int) int { + return (n + 3) / 4 * 5 +} diff --git a/gitdiff/base85_test.go b/gitdiff/base85_test.go index 068da63..672c471 100644 --- a/gitdiff/base85_test.go +++ b/gitdiff/base85_test.go @@ -1,6 +1,7 @@ package gitdiff import ( + "bytes" "testing" ) @@ -58,3 +59,60 @@ func TestBase85Decode(t *testing.T) { }) } } + +func TestBase85Encode(t *testing.T) { + tests := map[string]struct { + Input []byte + Output string + }{ + "zeroBytes": { + Input: []byte{}, + Output: "", + }, + "twoBytes": { + Input: []byte{0xCA, 0xFE}, + Output: "%KiWV", + }, + "fourBytes": { + Input: []byte{0x0, 0x0, 0xCA, 0xFE}, + Output: "007GV", + }, + "sixBytes": { + Input: []byte{0x0, 0x0, 0xCA, 0xFE, 0xCA, 0xFE}, + Output: "007GV%KiWV", + }, + } + + for name, test := range tests { + t.Run(name, func(t *testing.T) { + dst := make([]byte, len(test.Output)) + base85Encode(dst, test.Input) + for i, b := range test.Output { + if dst[i] != byte(b) { + t.Errorf("incorrect character at index %d: expected '%c', actual '%c'", i, b, dst[i]) + } + } + }) + } +} + +func FuzzBase85Roundtrip(f *testing.F) { + f.Add([]byte{0x2b, 0x0d}) + f.Add([]byte{0xbc, 0xb4, 0x3f}) + f.Add([]byte{0xfa, 0x62, 0x05, 0x83, 0x24, 0x39, 0xd5, 0x25}) + f.Add([]byte{0x31, 0x59, 0x02, 0xa0, 0x61, 0x12, 0xd9, 0x43, 0xb8, 0x23, 0x1a, 0xb4, 0x02, 0xae, 0xfa, 0xcc, 0x22, 0xad, 0x41, 0xb9, 0xb8}) + + f.Fuzz(func(t *testing.T, in []byte) { + n := len(in) + dst := make([]byte, base85Len(n)) + out := make([]byte, n) + + base85Encode(dst, in) + if err := base85Decode(out, dst); err != nil { + t.Fatalf("unexpected error decoding base85 data: %v", err) + } + if !bytes.Equal(in, out) { + t.Errorf("decoded data differed from input data:\n input: %x\n output: %x\nencoding: %s\n", in, out, string(dst)) + } + }) +} diff --git a/gitdiff/format.go b/gitdiff/format.go new file mode 100644 index 0000000..5653f6f --- /dev/null +++ b/gitdiff/format.go @@ -0,0 +1,277 @@ +package gitdiff + +import ( + "bytes" + "compress/zlib" + "fmt" + "io" + "strconv" +) + +type formatter struct { + w io.Writer + err error +} + +func newFormatter(w io.Writer) *formatter { + return &formatter{w: w} +} + +func (fm *formatter) Write(p []byte) (int, error) { + if fm.err != nil { + return len(p), nil + } + if _, err := fm.w.Write(p); err != nil { + fm.err = err + } + return len(p), nil +} + +func (fm *formatter) WriteString(s string) (int, error) { + fm.Write([]byte(s)) + return len(s), nil +} + +func (fm *formatter) WriteByte(c byte) error { + fm.Write([]byte{c}) + return nil +} + +func (fm *formatter) WriteQuotedName(s string) { + qpos := 0 + for i := 0; i < len(s); i++ { + ch := s[i] + if q, quoted := quoteByte(ch); quoted { + if qpos == 0 { + fm.WriteByte('"') + } + fm.WriteString(s[qpos:i]) + fm.Write(q) + qpos = i + 1 + } + } + fm.WriteString(s[qpos:]) + if qpos > 0 { + fm.WriteByte('"') + } +} + +var quoteEscapeTable = map[byte]byte{ + '\a': 'a', + '\b': 'b', + '\t': 't', + '\n': 'n', + '\v': 'v', + '\f': 'f', + '\r': 'r', + '"': '"', + '\\': '\\', +} + +func quoteByte(b byte) ([]byte, bool) { + if q, ok := quoteEscapeTable[b]; ok { + return []byte{'\\', q}, true + } + if b < 0x20 || b >= 0x7F { + return []byte{ + '\\', + '0' + (b>>6)&0o3, + '0' + (b>>3)&0o7, + '0' + (b>>0)&0o7, + }, true + } + return nil, false +} + +func (fm *formatter) FormatFile(f *File) { + fm.WriteString("diff --git ") + + var aName, bName string + switch { + case f.OldName == "": + aName = f.NewName + bName = f.NewName + + case f.NewName == "": + aName = f.OldName + bName = f.OldName + + default: + aName = f.OldName + bName = f.NewName + } + + fm.WriteQuotedName("a/" + aName) + fm.WriteByte(' ') + fm.WriteQuotedName("b/" + bName) + fm.WriteByte('\n') + + if f.OldMode != 0 { + if f.IsDelete { + fmt.Fprintf(fm, "deleted file mode %o\n", f.OldMode) + } else if f.NewMode != 0 { + fmt.Fprintf(fm, "old mode %o\n", f.OldMode) + } + } + + if f.NewMode != 0 { + if f.IsNew { + fmt.Fprintf(fm, "new file mode %o\n", f.NewMode) + } else if f.OldMode != 0 { + fmt.Fprintf(fm, "new mode %o\n", f.NewMode) + } + } + + if f.Score > 0 { + if f.IsCopy || f.IsRename { + fmt.Fprintf(fm, "similarity index %d%%\n", f.Score) + } else { + fmt.Fprintf(fm, "dissimilarity index %d%%\n", f.Score) + } + } + + if f.IsCopy { + if f.OldName != "" { + fm.WriteString("copy from ") + fm.WriteQuotedName(f.OldName) + fm.WriteByte('\n') + } + if f.NewName != "" { + fm.WriteString("copy to ") + fm.WriteQuotedName(f.NewName) + fm.WriteByte('\n') + } + } + + if f.IsRename { + if f.OldName != "" { + fm.WriteString("rename from ") + fm.WriteQuotedName(f.OldName) + fm.WriteByte('\n') + } + if f.NewName != "" { + fm.WriteString("rename to ") + fm.WriteQuotedName(f.NewName) + fm.WriteByte('\n') + } + } + + if f.OldOIDPrefix != "" && f.NewOIDPrefix != "" { + fmt.Fprintf(fm, "index %s..%s", f.OldOIDPrefix, f.NewOIDPrefix) + + // Mode is only included on the index line when it is not changing + if f.OldMode != 0 && ((f.NewMode == 0 && !f.IsDelete) || f.OldMode == f.NewMode) { + fmt.Fprintf(fm, " %o", f.OldMode) + } + + fm.WriteByte('\n') + } + + if f.IsBinary { + if f.BinaryFragment == nil { + fm.WriteString("Binary files fmer\n") + } else { + fm.WriteString("GIT binary patch\n") + fm.FormatBinaryFragment(f.BinaryFragment) + if f.ReverseBinaryFragment != nil { + fm.FormatBinaryFragment(f.ReverseBinaryFragment) + } + } + } + + // The "---" and "+++" lines only appear for text patches with fragments + if len(f.TextFragments) > 0 { + fm.WriteString("--- ") + if f.OldName == "" { + fm.WriteString("/dev/null") + } else { + fm.WriteQuotedName("a/" + f.OldName) + } + fm.WriteByte('\n') + + fm.WriteString("+++ ") + if f.NewName == "" { + fm.WriteString("/dev/null") + } else { + fm.WriteQuotedName("b/" + f.NewName) + } + fm.WriteByte('\n') + + for _, frag := range f.TextFragments { + fm.FormatTextFragment(frag) + } + } +} + +func (fm *formatter) FormatTextFragment(f *TextFragment) { + fm.FormatTextFragmentHeader(f) + fm.WriteByte('\n') + + for _, line := range f.Lines { + fm.WriteString(line.Op.String()) + fm.WriteString(line.Line) + if line.NoEOL() { + fm.WriteString("\n\\ No newline at end of file\n") + } + } +} + +func (fm *formatter) FormatTextFragmentHeader(f *TextFragment) { + fmt.Fprintf(fm, "@@ -%d,%d +%d,%d @@", f.OldPosition, f.OldLines, f.NewPosition, f.NewLines) + if f.Comment != "" { + fm.WriteByte(' ') + fm.WriteString(f.Comment) + } +} + +func (fm *formatter) FormatBinaryFragment(f *BinaryFragment) { + const ( + maxBytesPerLine = 52 + ) + + switch f.Method { + case BinaryPatchDelta: + fm.WriteString("delta ") + case BinaryPatchLiteral: + fm.WriteString("literal ") + } + fm.Write(strconv.AppendInt(nil, f.Size, 10)) + fm.WriteByte('\n') + + data := deflateBinaryChunk(f.Data) + n := (len(data) / maxBytesPerLine) * maxBytesPerLine + + buf := make([]byte, base85Len(maxBytesPerLine)) + for i := 0; i < n; i += maxBytesPerLine { + base85Encode(buf, data[i:i+maxBytesPerLine]) + fm.WriteByte('z') + fm.Write(buf) + fm.WriteByte('\n') + } + if remainder := len(data) - n; remainder > 0 { + buf = buf[0:base85Len(remainder)] + + sizeChar := byte(remainder) + if remainder <= 26 { + sizeChar = 'A' + sizeChar - 1 + } else { + sizeChar = 'a' + sizeChar - 27 + } + + base85Encode(buf, data[n:]) + fm.WriteByte(sizeChar) + fm.Write(buf) + fm.WriteByte('\n') + } + fm.WriteByte('\n') +} + +func deflateBinaryChunk(data []byte) []byte { + var b bytes.Buffer + + zw := zlib.NewWriter(&b) + _, _ = zw.Write(data) + _ = zw.Close() + + return b.Bytes() +} diff --git a/gitdiff/format_roundtrip_test.go b/gitdiff/format_roundtrip_test.go new file mode 100644 index 0000000..58cff97 --- /dev/null +++ b/gitdiff/format_roundtrip_test.go @@ -0,0 +1,156 @@ +package gitdiff + +import ( + "bytes" + "fmt" + "os" + "path/filepath" + "slices" + "testing" +) + +func TestFormatRoundtrip(t *testing.T) { + patches := []struct { + File string + SkipTextCompare bool + }{ + {File: "copy.patch"}, + {File: "copy_modify.patch"}, + {File: "delete.patch"}, + {File: "mode.patch"}, + {File: "mode_modify.patch"}, + {File: "modify.patch"}, + {File: "new.patch"}, + {File: "new_empty.patch"}, + {File: "new_mode.patch"}, + {File: "rename.patch"}, + {File: "rename_modify.patch"}, + + // Due to differences between Go's 'encoding/zlib' package and the zlib + // C library, binary patches cannot be compared directly as the patch + // data is slightly different when re-encoded by Go. + {File: "binary_modify.patch", SkipTextCompare: true}, + {File: "binary_new.patch", SkipTextCompare: true}, + } + + for _, patch := range patches { + t.Run(patch.File, func(t *testing.T) { + b, err := os.ReadFile(filepath.Join("testdata", "string", patch.File)) + if err != nil { + t.Fatalf("failed to read patch: %v", err) + } + + original := assertParseSingleFile(t, b, "patch") + str := original.String() + + if !patch.SkipTextCompare { + if string(b) != str { + t.Errorf("incorrect patch text\nexpected: %q\n actual: %q\n", string(b), str) + } + } + + reparsed := assertParseSingleFile(t, []byte(str), "formatted patch") + assertFilesEqual(t, original, reparsed) + }) + } +} + +func assertParseSingleFile(t *testing.T, b []byte, kind string) *File { + files, _, err := Parse(bytes.NewReader(b)) + if err != nil { + t.Fatalf("failed to parse %s: %v", kind, err) + } + if len(files) != 1 { + t.Fatalf("expected %s to contain a single files, but found %d", kind, len(files)) + } + return files[0] +} + +func assertFilesEqual(t *testing.T, expected, actual *File) { + assertEqual(t, expected.OldName, actual.OldName, "OldName") + assertEqual(t, expected.NewName, actual.NewName, "NewName") + + assertEqual(t, expected.IsNew, actual.IsNew, "IsNew") + assertEqual(t, expected.IsDelete, actual.IsDelete, "IsDelete") + assertEqual(t, expected.IsCopy, actual.IsCopy, "IsCopy") + assertEqual(t, expected.IsRename, actual.IsRename, "IsRename") + + assertEqual(t, expected.OldMode, actual.OldMode, "OldMode") + assertEqual(t, expected.NewMode, actual.NewMode, "NewMode") + + assertEqual(t, expected.OldOIDPrefix, actual.OldOIDPrefix, "OldOIDPrefix") + assertEqual(t, expected.NewOIDPrefix, actual.NewOIDPrefix, "NewOIDPrefix") + assertEqual(t, expected.Score, actual.Score, "Score") + + if len(expected.TextFragments) == len(actual.TextFragments) { + for i := range expected.TextFragments { + prefix := fmt.Sprintf("TextFragments[%d].", i) + ef := expected.TextFragments[i] + af := actual.TextFragments[i] + + assertEqual(t, ef.Comment, af.Comment, prefix+"Comment") + + assertEqual(t, ef.OldPosition, af.OldPosition, prefix+"OldPosition") + assertEqual(t, ef.OldLines, af.OldLines, prefix+"OldLines") + + assertEqual(t, ef.NewPosition, af.NewPosition, prefix+"NewPosition") + assertEqual(t, ef.NewLines, af.NewLines, prefix+"NewLines") + + assertEqual(t, ef.LinesAdded, af.LinesAdded, prefix+"LinesAdded") + assertEqual(t, ef.LinesDeleted, af.LinesDeleted, prefix+"LinesDeleted") + + assertEqual(t, ef.LeadingContext, af.LeadingContext, prefix+"LeadingContext") + assertEqual(t, ef.TrailingContext, af.TrailingContext, prefix+"TrailingContext") + + if !slices.Equal(ef.Lines, af.Lines) { + t.Errorf("%sLines: expected %#v, actual %#v", prefix, ef.Lines, af.Lines) + } + } + } else { + t.Errorf("TextFragments: expected length %d, actual length %d", len(expected.TextFragments), len(actual.TextFragments)) + } + + assertEqual(t, expected.IsBinary, actual.IsBinary, "IsBinary") + + if expected.BinaryFragment != nil { + if actual.BinaryFragment == nil { + t.Errorf("BinaryFragment: expected non-nil, actual is nil") + } else { + ef := expected.BinaryFragment + af := expected.BinaryFragment + + assertEqual(t, ef.Method, af.Method, "BinaryFragment.Method") + assertEqual(t, ef.Size, af.Size, "BinaryFragment.Size") + + if !slices.Equal(ef.Data, af.Data) { + t.Errorf("BinaryFragment.Data: expected %#v, actual %#v", ef.Data, af.Data) + } + } + } else if actual.BinaryFragment != nil { + t.Errorf("BinaryFragment: expected nil, actual is non-nil") + } + + if expected.ReverseBinaryFragment != nil { + if actual.ReverseBinaryFragment == nil { + t.Errorf("ReverseBinaryFragment: expected non-nil, actual is nil") + } else { + ef := expected.ReverseBinaryFragment + af := expected.ReverseBinaryFragment + + assertEqual(t, ef.Method, af.Method, "ReverseBinaryFragment.Method") + assertEqual(t, ef.Size, af.Size, "ReverseBinaryFragment.Size") + + if !slices.Equal(ef.Data, af.Data) { + t.Errorf("ReverseBinaryFragment.Data: expected %#v, actual %#v", ef.Data, af.Data) + } + } + } else if actual.ReverseBinaryFragment != nil { + t.Errorf("ReverseBinaryFragment: expected nil, actual is non-nil") + } +} + +func assertEqual[T comparable](t *testing.T, expected, actual T, name string) { + if expected != actual { + t.Errorf("%s: expected %#v, actual %#v", name, expected, actual) + } +} diff --git a/gitdiff/format_test.go b/gitdiff/format_test.go new file mode 100644 index 0000000..3325296 --- /dev/null +++ b/gitdiff/format_test.go @@ -0,0 +1,28 @@ +package gitdiff + +import ( + "strings" + "testing" +) + +func TestFormatter_WriteQuotedName(t *testing.T) { + tests := []struct { + Input string + Expected string + }{ + {"noquotes.txt", `noquotes.txt`}, + {"no quotes.txt", `no quotes.txt`}, + {"new\nline", `"new\nline"`}, + {"escape\x1B null\x00", `"escape\033 null\000"`}, + {"snowman \u2603 snowman", `"snowman \342\230\203 snowman"`}, + {"\"already quoted\"", `"\"already quoted\""`}, + } + + for _, test := range tests { + var b strings.Builder + newFormatter(&b).WriteQuotedName(test.Input) + if b.String() != test.Expected { + t.Errorf("expected %q, got %q", test.Expected, b.String()) + } + } +} diff --git a/gitdiff/gitdiff.go b/gitdiff/gitdiff.go index 18645bd..5365645 100644 --- a/gitdiff/gitdiff.go +++ b/gitdiff/gitdiff.go @@ -4,6 +4,7 @@ import ( "errors" "fmt" "os" + "strings" ) // File describes changes to a single file. It can be either a text file or a @@ -38,6 +39,15 @@ type File struct { ReverseBinaryFragment *BinaryFragment } +// String returns a git diff representation of this file. The value can be +// parsed by this library to obtain the same File, but may not be the same as +// the original input. +func (f *File) String() string { + var diff strings.Builder + newFormatter(&diff).FormatFile(f) + return diff.String() +} + // TextFragment describes changed lines starting at a specific line in a text file. type TextFragment struct { Comment string @@ -57,9 +67,20 @@ type TextFragment struct { Lines []Line } -// Header returns the canonical header of this fragment. +// String returns a git diff format of this fragment. See [File.String] for +// more details on this format. +func (f *TextFragment) String() string { + var diff strings.Builder + newFormatter(&diff).FormatTextFragment(f) + return diff.String() +} + +// Header returns a git diff header of this fragment. See [File.String] for +// more details on this format. func (f *TextFragment) Header() string { - return fmt.Sprintf("@@ -%d,%d +%d,%d @@ %s", f.OldPosition, f.OldLines, f.NewPosition, f.NewLines, f.Comment) + var hdr strings.Builder + newFormatter(&hdr).FormatTextFragmentHeader(f) + return hdr.String() } // Validate checks that the fragment is self-consistent and appliable. Validate @@ -197,3 +218,13 @@ const ( // BinaryPatchLiteral indicates the data is the exact file content BinaryPatchLiteral ) + +// String returns a git diff format of this fragment. Due to differences in +// zlib implementation between Go and Git, encoded binary data in the result +// will likely differ from what Git produces for the same input. See +// [File.String] for more details on this format. +func (f *BinaryFragment) String() string { + var diff strings.Builder + newFormatter(&diff).FormatBinaryFragment(f) + return diff.String() +} diff --git a/gitdiff/testdata/string/binary_modify.patch b/gitdiff/testdata/string/binary_modify.patch new file mode 100644 index 0000000..12ddad5 --- /dev/null +++ b/gitdiff/testdata/string/binary_modify.patch @@ -0,0 +1,9 @@ +diff --git a/file.bin b/file.bin +index a7f4d5d6975ec021016c02b6d58345ebf434f38c..bdc9a70f055892146612dcdb413f0e339faaa0df 100644 +GIT binary patch +delta 66 +QcmeZhVVvM$!$1K50C&Ox;s5{u + +delta 5 +McmZo+^qAlQ00i9urT_o{ + diff --git a/gitdiff/testdata/string/binary_new.patch b/gitdiff/testdata/string/binary_new.patch new file mode 100644 index 0000000..c56f35e --- /dev/null +++ b/gitdiff/testdata/string/binary_new.patch @@ -0,0 +1,11 @@ +diff --git a/file.bin b/file.bin +new file mode 100644 +index 0000000000000000000000000000000000000000..a7f4d5d6975ec021016c02b6d58345ebf434f38c +GIT binary patch +literal 72 +zcmV-O0Jr~td-`u6JcK&{KDK= Date: Tue, 7 Jan 2025 19:27:03 -0800 Subject: [PATCH 07/12] Parse binary headers with file paths (#55) Some patches may include one or more file paths as part of the binary header when there is no binary data. Git accounts for this by only checking the prefix and suffix of the line, but I missed that logic when implementing this originally. --- gitdiff/binary.go | 15 +++++++++++---- gitdiff/binary_test.go | 10 ++++++++++ 2 files changed, 21 insertions(+), 4 deletions(-) diff --git a/gitdiff/binary.go b/gitdiff/binary.go index c65a9a6..282e323 100644 --- a/gitdiff/binary.go +++ b/gitdiff/binary.go @@ -50,11 +50,11 @@ func (p *parser) ParseBinaryFragments(f *File) (n int, err error) { } func (p *parser) ParseBinaryMarker() (isBinary bool, hasData bool, err error) { - switch p.Line(0) { - case "GIT binary patch\n": + line := p.Line(0) + switch { + case line == "GIT binary patch\n": hasData = true - case "Binary files differ\n": - case "Files differ\n": + case isBinaryNoDataMarker(line): default: return false, false, nil } @@ -65,6 +65,13 @@ func (p *parser) ParseBinaryMarker() (isBinary bool, hasData bool, err error) { return true, hasData, nil } +func isBinaryNoDataMarker(line string) bool { + if strings.HasSuffix(line, " differ\n") { + return strings.HasPrefix(line, "Binary files ") || strings.HasPrefix(line, "Files ") + } + return false +} + func (p *parser) ParseBinaryFragmentHeader() (*BinaryFragment, error) { parts := strings.SplitN(strings.TrimSuffix(p.Line(0), "\n"), " ", 2) if len(parts) < 2 { diff --git a/gitdiff/binary_test.go b/gitdiff/binary_test.go index a31a0e0..64db243 100644 --- a/gitdiff/binary_test.go +++ b/gitdiff/binary_test.go @@ -25,6 +25,16 @@ func TestParseBinaryMarker(t *testing.T) { IsBinary: true, HasData: false, }, + "binaryFileNoPatchPaths": { + Input: "Binary files a/foo.bin and b/foo.bin differ\n", + IsBinary: true, + HasData: false, + }, + "fileNoPatch": { + Input: "Files differ\n", + IsBinary: true, + HasData: false, + }, "textFile": { Input: "@@ -10,14 +22,31 @@\n", IsBinary: false, From fffa3cca9bd29785ec7f99ea6921de60e32a724a Mon Sep 17 00:00:00 2001 From: Billy Keyes Date: Tue, 7 Jan 2025 19:35:14 -0800 Subject: [PATCH 08/12] Fix binary headers in formatted patches (#56) Include file names in the header (now that we can actually parse them) and fix a bad find-and-replace that changed "differ" to "fmer". Add a new test to verify that binary files without data format correctly. --- gitdiff/format.go | 6 +++++- gitdiff/format_roundtrip_test.go | 1 + gitdiff/testdata/string/binary_modify_nodata.patch | 3 +++ 3 files changed, 9 insertions(+), 1 deletion(-) create mode 100644 gitdiff/testdata/string/binary_modify_nodata.patch diff --git a/gitdiff/format.go b/gitdiff/format.go index 5653f6f..d97aba9 100644 --- a/gitdiff/format.go +++ b/gitdiff/format.go @@ -169,7 +169,11 @@ func (fm *formatter) FormatFile(f *File) { if f.IsBinary { if f.BinaryFragment == nil { - fm.WriteString("Binary files fmer\n") + fm.WriteString("Binary files ") + fm.WriteQuotedName("a/" + aName) + fm.WriteString(" and ") + fm.WriteQuotedName("b/" + bName) + fm.WriteString(" differ\n") } else { fm.WriteString("GIT binary patch\n") fm.FormatBinaryFragment(f.BinaryFragment) diff --git a/gitdiff/format_roundtrip_test.go b/gitdiff/format_roundtrip_test.go index 58cff97..a230e91 100644 --- a/gitdiff/format_roundtrip_test.go +++ b/gitdiff/format_roundtrip_test.go @@ -31,6 +31,7 @@ func TestFormatRoundtrip(t *testing.T) { // data is slightly different when re-encoded by Go. {File: "binary_modify.patch", SkipTextCompare: true}, {File: "binary_new.patch", SkipTextCompare: true}, + {File: "binary_modify_nodata.patch"}, } for _, patch := range patches { diff --git a/gitdiff/testdata/string/binary_modify_nodata.patch b/gitdiff/testdata/string/binary_modify_nodata.patch new file mode 100644 index 0000000..833a534 --- /dev/null +++ b/gitdiff/testdata/string/binary_modify_nodata.patch @@ -0,0 +1,3 @@ +diff --git a/file.bin b/file.bin +index a7f4d5d..bdc9a70 100644 +Binary files a/file.bin and b/file.bin differ From 823d31db22199232e1400cddc39f06d890380055 Mon Sep 17 00:00:00 2001 From: Billy Keyes Date: Tue, 4 Mar 2025 20:14:23 -0800 Subject: [PATCH 09/12] Add a test for removing the last newline in a file (#59) This worked, but completes the test coverage in combination with the test that adds a final newline and one that leaves the missing newline in place. --- gitdiff/apply_test.go | 1 + .../testdata/apply/text_fragment_change_end_eol.out | 3 +++ .../testdata/apply/text_fragment_change_end_eol.patch | 10 ++++++++++ .../testdata/apply/text_fragment_change_end_eol.src | 3 +++ 4 files changed, 17 insertions(+) create mode 100644 gitdiff/testdata/apply/text_fragment_change_end_eol.out create mode 100644 gitdiff/testdata/apply/text_fragment_change_end_eol.patch create mode 100644 gitdiff/testdata/apply/text_fragment_change_end_eol.src diff --git a/gitdiff/apply_test.go b/gitdiff/apply_test.go index dd076bb..f19153b 100644 --- a/gitdiff/apply_test.go +++ b/gitdiff/apply_test.go @@ -22,6 +22,7 @@ func TestApplyTextFragment(t *testing.T) { "changeStart": {Files: getApplyFiles("text_fragment_change_start")}, "changeMiddle": {Files: getApplyFiles("text_fragment_change_middle")}, "changeEnd": {Files: getApplyFiles("text_fragment_change_end")}, + "changeEndEOL": {Files: getApplyFiles("text_fragment_change_end_eol")}, "changeExact": {Files: getApplyFiles("text_fragment_change_exact")}, "changeSingleNoEOL": {Files: getApplyFiles("text_fragment_change_single_noeol")}, diff --git a/gitdiff/testdata/apply/text_fragment_change_end_eol.out b/gitdiff/testdata/apply/text_fragment_change_end_eol.out new file mode 100644 index 0000000..8cf2f17 --- /dev/null +++ b/gitdiff/testdata/apply/text_fragment_change_end_eol.out @@ -0,0 +1,3 @@ +line 1 +line 2 +line 3 \ No newline at end of file diff --git a/gitdiff/testdata/apply/text_fragment_change_end_eol.patch b/gitdiff/testdata/apply/text_fragment_change_end_eol.patch new file mode 100644 index 0000000..f1c9477 --- /dev/null +++ b/gitdiff/testdata/apply/text_fragment_change_end_eol.patch @@ -0,0 +1,10 @@ +diff --git a/gitdiff/testdata/apply/text_fragment_remove_last_eol.src b/gitdiff/testdata/apply/text_fragment_remove_last_eol.src +index a92d664..8cf2f17 100644 +--- a/gitdiff/testdata/apply/text_fragment_remove_last_eol.src ++++ b/gitdiff/testdata/apply/text_fragment_remove_last_eol.src +@@ -1,3 +1,3 @@ + line 1 + line 2 +-line 3 ++line 3 +\ No newline at end of file diff --git a/gitdiff/testdata/apply/text_fragment_change_end_eol.src b/gitdiff/testdata/apply/text_fragment_change_end_eol.src new file mode 100644 index 0000000..a92d664 --- /dev/null +++ b/gitdiff/testdata/apply/text_fragment_change_end_eol.src @@ -0,0 +1,3 @@ +line 1 +line 2 +line 3 From 0896f01926d529e4c0781f72c7957aa942e2068f Mon Sep 17 00:00:00 2001 From: Billy Keyes Date: Sat, 29 Mar 2025 13:27:19 -0700 Subject: [PATCH 10/12] Clarify that we only support GNU diff In issue #57, it was reported that the library could not apply some patches generated by BSD `diff` because of the way that variant reports changes in files without trailing newlines. Since the library behavior matches Git, I don't consider this a bug, but update the README to mention that the support for standard unified diffs is only for the GNU variant of the `diff` tool. --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index e5fe24f..0f65127 100644 --- a/README.md +++ b/README.md @@ -4,7 +4,7 @@ A Go library for parsing and applying patches generated by `git diff`, `git show`, and `git format-patch`. It can also parse and apply unified diffs -generated by the standard `diff` tool. +generated by the standard GNU `diff` tool. It supports standard line-oriented text patches and Git binary patches, and aims to parse anything accepted by the `git apply` command. From 7413262a184380473b5cad4b12a2a061ecbc86c9 Mon Sep 17 00:00:00 2001 From: Billy Keyes Date: Mon, 31 Mar 2025 18:37:50 -0700 Subject: [PATCH 11/12] Update golangci-lint to v2 (#61) --- .github/workflows/go.yml | 4 +-- .golangci.yml | 75 ++++++++++++++++++++-------------------- 2 files changed, 40 insertions(+), 39 deletions(-) diff --git a/.github/workflows/go.yml b/.github/workflows/go.yml index a59475a..c89fc7e 100644 --- a/.github/workflows/go.yml +++ b/.github/workflows/go.yml @@ -18,9 +18,9 @@ jobs: uses: actions/checkout@v4 - name: Lint - uses: golangci/golangci-lint-action@v6 + uses: golangci/golangci-lint-action@v7 with: - version: v1.59 + version: v2.0 - name: Test run: go test -v ./... diff --git a/.golangci.yml b/.golangci.yml index 192c556..655cb5a 100644 --- a/.golangci.yml +++ b/.golangci.yml @@ -1,50 +1,51 @@ +version: "2" + run: tests: false linters: - disable-all: true + default: none enable: - errcheck - - gofmt - - goimports - govet - ineffassign - misspell - revive - - typecheck - unconvert - unused + settings: + errcheck: + exclude-functions: + - (*github.com/bluekeyes/go-gitdiff/gitdiff.formatter).Write + - (*github.com/bluekeyes/go-gitdiff/gitdiff.formatter).WriteString + - (*github.com/bluekeyes/go-gitdiff/gitdiff.formatter).WriteByte + - fmt.Fprintf(*github.com/bluekeyes/go-gitdiff/gitdiff.formatter) + revive: + rules: + - name: context-keys-type + - name: time-naming + - name: var-declaration + - name: unexported-return + - name: errorf + - name: blank-imports + - name: context-as-argument + - name: dot-imports + - name: error-return + - name: error-strings + - name: error-naming + - name: exported + - name: increment-decrement + - name: var-naming + - name: package-comments + - name: range + - name: receiver-naming + - name: indent-error-flow -issues: - exclude-use-default: false - -linters-settings: - errcheck: - exclude-functions: - - (*github.com/bluekeyes/go-gitdiff/gitdiff.formatter).Write - - (*github.com/bluekeyes/go-gitdiff/gitdiff.formatter).WriteString - - (*github.com/bluekeyes/go-gitdiff/gitdiff.formatter).WriteByte - - fmt.Fprintf(*github.com/bluekeyes/go-gitdiff/gitdiff.formatter) - goimports: - local-prefixes: github.com/bluekeyes/go-gitdiff - revive: - rules: - # enable all rules from golint - - name: context-keys-type - - name: time-naming - - name: var-declaration - - name: unexported-return - - name: errorf - - name: blank-imports - - name: context-as-argument - - name: dot-imports - - name: error-return - - name: error-strings - - name: error-naming - - name: exported - - name: increment-decrement - - name: var-naming - - name: package-comments - - name: range - - name: receiver-naming - - name: indent-error-flow +formatters: + enable: + - gofmt + - goimports + settings: + goimports: + local-prefixes: + - github.com/bluekeyes/go-gitdiff From 17bd72f0e2e89a2c6aa03de29fc2583a7ec07943 Mon Sep 17 00:00:00 2001 From: Billy Keyes Date: Wed, 3 Sep 2025 17:26:05 -0700 Subject: [PATCH 12/12] Report short sources as conflicts during apply (#63) Previously, this returned an io.ErrUnexpectedEOF. This is not wrong, in that we did unexpectedly hit the end of the input file, but it is vague and implies a possible library bug rather than a problem with the patch or the input. This condition is really a conflict, as the changes described by the patch are not compatible with the state of the input. --- gitdiff/apply_test.go | 4 ++-- gitdiff/apply_text.go | 6 ++++++ 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/gitdiff/apply_test.go b/gitdiff/apply_test.go index f19153b..05915ba 100644 --- a/gitdiff/apply_test.go +++ b/gitdiff/apply_test.go @@ -31,14 +31,14 @@ func TestApplyTextFragment(t *testing.T) { Src: "text_fragment_error.src", Patch: "text_fragment_error_short_src_before.patch", }, - Err: io.ErrUnexpectedEOF, + Err: &Conflict{}, }, "errorShortSrc": { Files: applyFiles{ Src: "text_fragment_error.src", Patch: "text_fragment_error_short_src.patch", }, - Err: io.ErrUnexpectedEOF, + Err: &Conflict{}, }, "errorContextConflict": { Files: applyFiles{ diff --git a/gitdiff/apply_text.go b/gitdiff/apply_text.go index 43af83a..8d0accb 100644 --- a/gitdiff/apply_text.go +++ b/gitdiff/apply_text.go @@ -1,6 +1,7 @@ package gitdiff import ( + "errors" "io" ) @@ -85,6 +86,11 @@ func (a *TextApplier) ApplyFragment(f *TextFragment) error { preimage := make([][]byte, fragEnd-start) n, err := a.lineSrc.ReadLinesAt(preimage, start) if err != nil { + // an EOF indicates that source file is shorter than the patch expects, + // which should be reported as a conflict rather than a generic error + if errors.Is(err, io.EOF) { + err = &Conflict{"src has fewer lines than required by fragment"} + } return applyError(err, lineNum(start+int64(n))) }