diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..9f11b75 --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +.idea/ diff --git a/.golangci.yml b/.golangci.yml index 12cdbd2..05db235 100644 --- a/.golangci.yml +++ b/.golangci.yml @@ -20,4 +20,4 @@ issues: linter-settings: goimports: - local-prefixes: github.com/bluekeyes/go-gitdiff + local-prefixes: github.com/gitleaks/go-gitdiff diff --git a/README.md b/README.md index 8f9671b..1879ef3 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,6 @@ # go-gitdiff -[![PkgGoDev](https://pkg.go.dev/badge/github.com/bluekeyes/go-gitdiff/gitdiff)](https://pkg.go.dev/github.com/bluekeyes/go-gitdiff/gitdiff) [![Go Report Card](https://goreportcard.com/badge/github.com/bluekeyes/go-gitdiff)](https://goreportcard.com/report/github.com/bluekeyes/go-gitdiff) +[![PkgGoDev](https://pkg.go.dev/badge/github.com/gitleaks/go-gitdiff/gitdiff)](https://pkg.go.dev/github.com/gitleaks/go-gitdiff/gitdiff) [![Go Report Card](https://goreportcard.com/badge/github.com/gitleaks/go-gitdiff)](https://goreportcard.com/report/github.com/gitleaks/go-gitdiff) A Go library for parsing and applying patches generated by `git diff`, `git show`, and `git format-patch`. It can also parse and apply unified diffs diff --git a/gitdiff/apply_test.go b/gitdiff/apply_test.go index d981e96..a34ea56 100644 --- a/gitdiff/apply_test.go +++ b/gitdiff/apply_test.go @@ -231,10 +231,14 @@ type applyTest struct { func (at applyTest) run(t *testing.T, apply func(io.Writer, *Applier, *File) error) { src, patch, out := at.Files.Load(t) - files, _, err := Parse(bytes.NewReader(patch)) + fileChan, err := Parse(io.NopCloser(bytes.NewReader(patch))) if err != nil { t.Fatalf("failed to parse patch file: %v", err) } + var files []*File + for file := range fileChan { + files = append(files, file) + } if len(files) != 1 { t.Fatalf("patch should contain exactly one file, but it has %d", len(files)) } diff --git a/gitdiff/binary.go b/gitdiff/binary.go index c65a9a6..0a49a5e 100644 --- a/gitdiff/binary.go +++ b/gitdiff/binary.go @@ -6,10 +6,13 @@ import ( "fmt" "io" "io/ioutil" + "regexp" "strconv" "strings" ) +var binaryRegexp = regexp.MustCompile(`^Binary files (/dev/null|a/(.+)|"a/(.+)") and (/dev/null|b/(.+)|"b/(.+)") differ\s*$`) + func (p *parser) ParseBinaryFragments(f *File) (n int, err error) { isBinary, hasData, err := p.ParseBinaryMarker() if err != nil || !isBinary { @@ -50,13 +53,17 @@ func (p *parser) ParseBinaryFragments(f *File) (n int, err error) { } func (p *parser) ParseBinaryMarker() (isBinary bool, hasData bool, err error) { - switch p.Line(0) { - case "GIT binary patch\n": + line := p.Line(0) + switch { + case line == "GIT binary patch\n": hasData = true - case "Binary files differ\n": - case "Files differ\n": + case line == "Binary files differ\n": + case line == "Files differ\n": + case strings.HasPrefix(line, "Binary files ") && strings.HasSuffix(line, "differ\n"): default: - return false, false, nil + if !binaryRegexp.MatchString(p.Line(0)) { + return false, false, nil + } } if err = p.Next(); err != nil && err != io.EOF { diff --git a/gitdiff/binary_test.go b/gitdiff/binary_test.go index a31a0e0..9b00cd7 100644 --- a/gitdiff/binary_test.go +++ b/gitdiff/binary_test.go @@ -30,6 +30,26 @@ func TestParseBinaryMarker(t *testing.T) { IsBinary: false, HasData: false, }, + "binaryPatchCreated": { + Input: "Binary files /dev/null and b/path/to/file.ext differ\n", + IsBinary: true, + HasData: false, + }, + "binaryPatchModified": { + Input: "Binary files a/path/to/file.ext and b/path/to/file.ext differ\n", + IsBinary: true, + HasData: false, + }, + "binaryPatchModifiedQuoted": { + Input: "Binary files \"a/path/to/file.ext\" and \"b/path/to/file.ext\" differ\n", + IsBinary: true, + HasData: false, + }, + "binaryPatchDeleted": { + Input: "Binary files a/path/to/file.ext and /dev/null differ\n", + IsBinary: true, + HasData: false, + }, } for name, test := range tests { diff --git a/gitdiff/file_header.go b/gitdiff/file_header.go index 58904b4..1962e64 100644 --- a/gitdiff/file_header.go +++ b/gitdiff/file_header.go @@ -30,6 +30,12 @@ func (p *parser) ParseNextFileHeader() (*File, string, error) { return nil, "", p.Errorf(-1, "patch fragment without file header: %s", frag.Header()) } + // check for end of merge header, and start of a new header + if strings.HasPrefix(p.Line(0), commitPrefix) { + preamble.Reset() + goto NextLine + } + // check for a git-generated patch file, err = p.ParseGitFileHeader() if err != nil { diff --git a/gitdiff/gitdiff.go b/gitdiff/gitdiff.go index 18645bd..2dbfc4c 100644 --- a/gitdiff/gitdiff.go +++ b/gitdiff/gitdiff.go @@ -4,6 +4,7 @@ import ( "errors" "fmt" "os" + "strings" ) // File describes changes to a single file. It can be either a text file or a @@ -24,6 +25,8 @@ type File struct { NewOIDPrefix string Score int + PatchHeader *PatchHeader + // TextFragments contains the fragments describing changes to a text file. It // may be empty if the file is empty or if only the mode changes. TextFragments []*TextFragment @@ -57,6 +60,16 @@ type TextFragment struct { Lines []Line } +func (f *TextFragment) Raw(op LineOp) string { + sb := strings.Builder{} + for _, l := range f.Lines { + if l.Op == op { + sb.WriteString(l.Line) + } + } + return sb.String() +} + // Header returns the canonical header of this fragment. func (f *TextFragment) Header() string { return fmt.Sprintf("@@ -%d,%d +%d,%d @@ %s", f.OldPosition, f.OldLines, f.NewPosition, f.NewLines, f.Comment) diff --git a/gitdiff/parser.go b/gitdiff/parser.go index d44465a..5ffa9bd 100644 --- a/gitdiff/parser.go +++ b/gitdiff/parser.go @@ -7,52 +7,67 @@ import ( "bufio" "fmt" "io" + "strings" ) +const commitPrefix = "commit" + // Parse parses a patch with changes to one or more files. Any content before // the first file is returned as the second value. If an error occurs while // parsing, it returns all files parsed before the error. -func Parse(r io.Reader) ([]*File, string, error) { +func Parse(r io.Reader) (<-chan *File, error) { p := newParser(r) + out := make(chan *File) if err := p.Next(); err != nil { + close(out) if err == io.EOF { - return nil, "", nil + return out, nil } - return nil, "", err + return out, err } - var preamble string - var files []*File - for { - file, pre, err := p.ParseNextFileHeader() - if err != nil { - return files, preamble, err - } - if file == nil { - break - } + go func(out chan *File, r io.Reader) { + defer close(out) - for _, fn := range []func(*File) (int, error){ - p.ParseTextFragments, - p.ParseBinaryFragments, - } { - n, err := fn(file) + ph := &PatchHeader{} + for { + file, pre, err := p.ParseNextFileHeader() if err != nil { - return files, preamble, err + if err == io.EOF { + return + } + p.Next() + continue + } + + if strings.Contains(pre, commitPrefix) { + ph, _ = ParsePatchHeader(pre) } - if n > 0 { + + if file == nil { break } - } - if len(files) == 0 { - preamble = pre + for _, fn := range []func(*File) (int, error){ + p.ParseTextFragments, + p.ParseBinaryFragments, + } { + n, err := fn(file) + if err != nil { + return + } + if n > 0 { + break + } + } + + file.PatchHeader = ph + out <- file } - files = append(files, file) - } + }(out, r) - return files, preamble, nil + return out, nil } // TODO(bkeyes): consider exporting the parser type with configuration diff --git a/gitdiff/parser_test.go b/gitdiff/parser_test.go index 30f59f4..2e7a91a 100644 --- a/gitdiff/parser_test.go +++ b/gitdiff/parser_test.go @@ -4,10 +4,13 @@ import ( "bytes" "encoding/binary" "encoding/json" + "fmt" "io" "os" "reflect" + "strings" "testing" + "time" ) func TestLineOperations(t *testing.T) { @@ -298,6 +301,42 @@ a wild fragment appears? `, Err: true, }, + "mergeHeaderFollowedByNormalHeader": { + Input: `commit f6ded7a51cf917bdb44097066fab608c0facde5b +Merge: b2cf1cd0de 0254477421 +Author: BoloniniD +Date: Thu Apr 7 01:07:38 2022 +0300 + + Merge branch 'BLAKE3' of github.com:BoloniniD/ClickHouse into BLAKE3 + +commit 645e156af6b362145fad82d714f8e70a5b5a55a8 +Author: Meena Renganathan +Date: Wed Apr 6 14:50:10 2022 -0700 + + Updated the boringssl-cmake to match the latest broingssl module update + +diff --git a/.gitmodules b/.gitmodules +index 6c9e66f9cb..9cee5f697e 100644 +--- a/.gitmodules ++++ b/.gitmodules +@@ -207 +206,0 @@ +- branch = MergeWithUpstream +`, + Output: &File{ + OldName: ".gitmodules", + NewName: ".gitmodules", + OldMode: os.FileMode(0100644), + OldOIDPrefix: "6c9e66f9cb", + NewOIDPrefix: "9cee5f697e", + }, + Preamble: `commit 645e156af6b362145fad82d714f8e70a5b5a55a8 +Author: Meena Renganathan +Date: Wed Apr 6 14:50:10 2022 -0700 + + Updated the boringssl-cmake to match the latest broingssl module update + +`, + }, } for name, test := range tests { @@ -394,6 +433,16 @@ Date: Tue Apr 2 22:55:40 2019 -0700 InputFile: "testdata/one_file.patch", Output: []*File{ { + PatchHeader: &PatchHeader{ + SHA: "5d9790fec7d95aa223f3d20936340bf55ff3dcbe", + Author: &PatchIdentity{ + Name: "Morton Haypenny", + Email: "mhaypenny@example.com", + }, + AuthorDate: asTime("2019-04-02T22:55:40-07:00"), + Title: "A file with multiple fragments.", + Body: "The content is arbitrary.", + }, OldName: "dir/file1.txt", NewName: "dir/file1.txt", OldMode: os.FileMode(0100644), @@ -408,6 +457,16 @@ Date: Tue Apr 2 22:55:40 2019 -0700 InputFile: "testdata/two_files.patch", Output: []*File{ { + PatchHeader: &PatchHeader{ + SHA: "5d9790fec7d95aa223f3d20936340bf55ff3dcbe", + Author: &PatchIdentity{ + Name: "Morton Haypenny", + Email: "mhaypenny@example.com", + }, + AuthorDate: asTime("2019-04-02T22:55:40-07:00"), + Title: "A file with multiple fragments.", + Body: "The content is arbitrary.", + }, OldName: "dir/file1.txt", NewName: "dir/file1.txt", OldMode: os.FileMode(0100644), @@ -416,6 +475,16 @@ Date: Tue Apr 2 22:55:40 2019 -0700 TextFragments: textFragments, }, { + PatchHeader: &PatchHeader{ + SHA: "5d9790fec7d95aa223f3d20936340bf55ff3dcbe", + Author: &PatchIdentity{ + Name: "Morton Haypenny", + Email: "mhaypenny@example.com", + }, + AuthorDate: asTime("2019-04-02T22:55:40-07:00"), + Title: "A file with multiple fragments.", + Body: "The content is arbitrary.", + }, OldName: "dir/file2.txt", NewName: "dir/file2.txt", OldMode: os.FileMode(0100644), @@ -430,6 +499,15 @@ Date: Tue Apr 2 22:55:40 2019 -0700 InputFile: "testdata/new_binary_file.patch", Output: []*File{ { + PatchHeader: &PatchHeader{ + SHA: "5d9790fec7d95aa223f3d20936340bf55ff3dcbe", + Author: &PatchIdentity{ + Name: "Morton Haypenny", + Email: "mhaypenny@example.com", + }, + AuthorDate: asTime("2019-04-02T22:55:40-07:00"), + Title: "A binary file with the first 10 fibonacci numbers.", + }, OldName: "", NewName: "dir/ten.bin", NewMode: os.FileMode(0100644), @@ -460,7 +538,7 @@ Date: Tue Apr 2 22:55:40 2019 -0700 t.Fatalf("unexpected error opening input file: %v", err) } - files, pre, err := Parse(f) + fileChan, err := Parse(f) if test.Err { if err == nil || err == io.EOF { t.Fatalf("expected error parsing patch, but got %v", err) @@ -470,13 +548,14 @@ Date: Tue Apr 2 22:55:40 2019 -0700 if err != nil { t.Fatalf("unexpected error parsing patch: %v", err) } + var files []*File + for file := range fileChan { + files = append(files, file) + } if len(test.Output) != len(files) { t.Fatalf("incorrect number of parsed files: expected %d, actual %d", len(test.Output), len(files)) } - if test.Preamble != pre { - t.Errorf("incorrect preamble\nexpected: %q\n actual: %q", test.Preamble, pre) - } for i := range test.Output { if !reflect.DeepEqual(test.Output[i], files[i]) { exp, _ := json.MarshalIndent(test.Output[i], "", " ") @@ -488,6 +567,61 @@ Date: Tue Apr 2 22:55:40 2019 -0700 } } +func BenchmarkParse(b *testing.B) { + var inputDiff string + { + builder := strings.Builder{} + builder.WriteString(`commit 5d9790fec7d95aa223f3d20936340bf55ff3dcbe +Author: Morton Haypenny +Date: Tue Apr 2 22:55:40 2019 -0700 + + A file with multiple fragments. + + The content is arbitrary. + +`) + fileDiff := func(i int) string { + return fmt.Sprintf(`diff --git a/dir/file%[1]d.txt b/dir/file%[1]d.txt +index ebe9fa54..fe103e1d 100644 +--- a/dir/file%[1]d.txt ++++ b/dir/file%[1]d.txt +@@ -3,6 +3,8 @@ fragment 1 + context line +-old line 1 +-old line 2 + context line ++new line 1 ++new line 2 ++new line 3 + context line +-old line 3 ++new line 4 ++new line 5 +@@ -31,2 +33,2 @@ fragment 2 + context line +-old line 4 ++new line 6 +`, i) + } + for i := 0; i < 1000; i++ { + _, err := builder.WriteString(fileDiff(i)) + if err != nil { + panic(err) + } + } + inputDiff = builder.String() + } + for i := 0; i < b.N; i++ { + reader := io.NopCloser(strings.NewReader(inputDiff)) + ch, err := Parse(reader) + if err != nil { + panic(err) + } + for range ch { + } + } +} + func newTestParser(input string, init bool) *parser { p := newParser(bytes.NewBufferString(input)) if init { @@ -495,3 +629,11 @@ func newTestParser(input string, init bool) *parser { } return p } + +func asTime(s string) time.Time { + t, err := time.Parse(time.RFC3339, s) + if err != nil { + panic(err) + } + return t +} diff --git a/gitdiff/patch_header.go b/gitdiff/patch_header.go index c3c387d..a4671b7 100644 --- a/gitdiff/patch_header.go +++ b/gitdiff/patch_header.go @@ -82,9 +82,9 @@ func (i PatchIdentity) String() string { } // ParsePatchIdentity parses a patch identity string. A valid string contains a -// non-empty name followed by an email address in angle brackets. Like Git, +// name followed by an email address in angle brackets. // ParsePatchIdentity does not require that the email address is valid or -// properly formatted, only that it is non-empty. The name must not contain a +// properly formatted. The name must not contain a // left angle bracket, '<', and the email address must not contain a right // angle bracket, '>'. func ParsePatchIdentity(s string) (PatchIdentity, error) { @@ -109,9 +109,6 @@ func ParsePatchIdentity(s string) (PatchIdentity, error) { if emailStart > 0 && emailEnd > 0 { email = strings.TrimSpace(s[emailStart:emailEnd]) } - if name == "" || email == "" { - return PatchIdentity{}, fmt.Errorf("invalid identity string: %s", s) - } return PatchIdentity{Name: name, Email: email}, nil } diff --git a/gitdiff/patch_header_test.go b/gitdiff/patch_header_test.go index bda91fe..9181b5c 100644 --- a/gitdiff/patch_header_test.go +++ b/gitdiff/patch_header_test.go @@ -34,11 +34,38 @@ func TestParsePatchIdentity(t *testing.T) { }, "missingName": { Input: "", - Err: "invalid identity", + Output: PatchIdentity{ + Name: "", + Email: "mhaypenny@example.com", + }, }, "missingEmail": { Input: "Morton Haypenny", - Err: "invalid identity", + Output: PatchIdentity{ + Name: "", + Email: "", + }, + }, + "emptyEmail": { + Input: "Morton Haypenny <>", + Output: PatchIdentity{ + Name: "Morton Haypenny", + Email: "", + }, + }, + "missingNameAndEmail": { + Input: "", + Output: PatchIdentity{ + Name: "", + Email: "", + }, + }, + "emptyNameAndEmail": { + Input: " <>", + Output: PatchIdentity{ + Name: "", + Email: "", + }, }, "unclosedEmail": { Input: "Morton Haypenny