Skip to content

Commit cf528e7

Browse files
authored
Merge pull request #21 from tbrittain/6-repo-indexing-performance-optimizations
Implement native git repository handling for improved performance
2 parents 7a1b765 + 039adf7 commit cf528e7

3 files changed

Lines changed: 371 additions & 1 deletion

File tree

app.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -85,7 +85,7 @@ func (a *App) OpenRepository(path string) error {
8585
a.db = nil
8686
}
8787

88-
repo, err := git.Open(path)
88+
repo, err := git.NativeOpen(path)
8989
if err != nil {
9090
return fmt.Errorf("opening repository: %w", err)
9191
}

internal/git/native.go

Lines changed: 207 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,207 @@
1+
package git
2+
3+
import (
4+
"bufio"
5+
"fmt"
6+
"os/exec"
7+
"path/filepath"
8+
"strconv"
9+
"strings"
10+
"time"
11+
)
12+
13+
// nativeRepo implements Repository by shelling out to the native git CLI.
14+
// This is dramatically faster than go-git for large repositories because
15+
// native git has optimized packfile handling and memory-mapped I/O.
16+
type nativeRepo struct {
17+
path string
18+
}
19+
20+
// NativeOpen opens an existing git repository using the native git CLI.
21+
func NativeOpen(path string) (Repository, error) {
22+
cmd := exec.Command("git", "-C", path, "rev-parse", "--git-dir")
23+
if out, err := cmd.CombinedOutput(); err != nil {
24+
return nil, fmt.Errorf("not a git repository (or git not installed): %s", strings.TrimSpace(string(out)))
25+
}
26+
return &nativeRepo{path: path}, nil
27+
}
28+
29+
func (r *nativeRepo) RepoName() string {
30+
return filepath.Base(r.path)
31+
}
32+
33+
func (r *nativeRepo) CurrentBranch() string {
34+
cmd := exec.Command("git", "-C", r.path, "symbolic-ref", "--short", "HEAD")
35+
out, err := cmd.Output()
36+
if err != nil {
37+
return "HEAD"
38+
}
39+
return strings.TrimSpace(string(out))
40+
}
41+
42+
func (r *nativeRepo) HeadHash() (string, error) {
43+
cmd := exec.Command("git", "-C", r.path, "rev-parse", "HEAD")
44+
out, err := cmd.Output()
45+
if err != nil {
46+
return "", fmt.Errorf("rev-parse HEAD: %w", err)
47+
}
48+
return strings.TrimSpace(string(out)), nil
49+
}
50+
51+
func (r *nativeRepo) Log(sinceHash string) (CommitIter, error) {
52+
args := []string{
53+
"-C", r.path, "log",
54+
"--format=GITANALYTICS_COMMIT%n%H%n%aN%n%aE%n%aI%n%s",
55+
"--numstat",
56+
}
57+
if sinceHash != "" {
58+
args = append(args, sinceHash+"..HEAD")
59+
}
60+
61+
cmd := exec.Command("git", args...)
62+
stdout, err := cmd.StdoutPipe()
63+
if err != nil {
64+
return nil, fmt.Errorf("creating stdout pipe: %w", err)
65+
}
66+
if err := cmd.Start(); err != nil {
67+
return nil, fmt.Errorf("starting git log: %w", err)
68+
}
69+
70+
return &nativeCommitIter{
71+
scanner: bufio.NewScanner(stdout),
72+
cmd: cmd,
73+
}, nil
74+
}
75+
76+
func (r *nativeRepo) Close() error {
77+
return nil
78+
}
79+
80+
// nativeCommitIter parses streaming output from git log --numstat.
81+
type nativeCommitIter struct {
82+
scanner *bufio.Scanner
83+
cmd *exec.Cmd
84+
peeked bool // true if we've already scanned a line that needs re-reading
85+
peekLine string // the line we peeked at
86+
exhausted bool
87+
}
88+
89+
func (it *nativeCommitIter) nextLine() (string, bool) {
90+
if it.peeked {
91+
it.peeked = false
92+
return it.peekLine, true
93+
}
94+
if it.scanner.Scan() {
95+
return it.scanner.Text(), true
96+
}
97+
return "", false
98+
}
99+
100+
func (it *nativeCommitIter) unread(line string) {
101+
it.peeked = true
102+
it.peekLine = line
103+
}
104+
105+
func (it *nativeCommitIter) Next() (*Commit, error) {
106+
if it.exhausted {
107+
return nil, nil
108+
}
109+
110+
// Scan until we find the sentinel line.
111+
for {
112+
line, ok := it.nextLine()
113+
if !ok {
114+
it.exhausted = true
115+
return nil, nil
116+
}
117+
if line == "GITANALYTICS_COMMIT" {
118+
break
119+
}
120+
}
121+
122+
// Read 5 metadata lines: hash, name, email, date, subject.
123+
meta := make([]string, 5)
124+
for i := range meta {
125+
line, ok := it.nextLine()
126+
if !ok {
127+
return nil, fmt.Errorf("unexpected end of git log output (expected metadata line %d)", i)
128+
}
129+
meta[i] = line
130+
}
131+
132+
date, err := time.Parse(time.RFC3339, meta[3])
133+
if err != nil {
134+
return nil, fmt.Errorf("parsing date %q: %w", meta[3], err)
135+
}
136+
137+
// Read numstat lines until next sentinel or EOF.
138+
var files []FileStat
139+
for {
140+
line, ok := it.nextLine()
141+
if !ok {
142+
it.exhausted = true
143+
break
144+
}
145+
if line == "GITANALYTICS_COMMIT" {
146+
it.unread(line)
147+
break
148+
}
149+
if line == "" {
150+
continue
151+
}
152+
153+
fs, err := parseNumstatLine(line)
154+
if err != nil {
155+
continue // skip unparseable lines
156+
}
157+
files = append(files, fs)
158+
}
159+
160+
return &Commit{
161+
Hash: meta[0],
162+
AuthorName: meta[1],
163+
AuthorEmail: meta[2],
164+
Date: date,
165+
Message: meta[4],
166+
FilesChanged: files,
167+
}, nil
168+
}
169+
170+
func (it *nativeCommitIter) Close() {
171+
if it.cmd != nil && it.cmd.Process != nil {
172+
it.cmd.Process.Kill()
173+
it.cmd.Wait()
174+
}
175+
}
176+
177+
// parseNumstatLine parses a single --numstat output line.
178+
// Format: "additions\tdeletions\tpath"
179+
// Binary files show "-\t-\tpath" — treated as 0/0.
180+
func parseNumstatLine(line string) (FileStat, error) {
181+
parts := strings.SplitN(line, "\t", 3)
182+
if len(parts) != 3 {
183+
return FileStat{}, fmt.Errorf("expected 3 tab-separated fields, got %d", len(parts))
184+
}
185+
186+
var additions, deletions int
187+
if parts[0] != "-" {
188+
var err error
189+
additions, err = strconv.Atoi(parts[0])
190+
if err != nil {
191+
return FileStat{}, fmt.Errorf("parsing additions %q: %w", parts[0], err)
192+
}
193+
}
194+
if parts[1] != "-" {
195+
var err error
196+
deletions, err = strconv.Atoi(parts[1])
197+
if err != nil {
198+
return FileStat{}, fmt.Errorf("parsing deletions %q: %w", parts[1], err)
199+
}
200+
}
201+
202+
return FileStat{
203+
Path: parts[2],
204+
Additions: additions,
205+
Deletions: deletions,
206+
}, nil
207+
}

internal/git/native_test.go

Lines changed: 163 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,163 @@
1+
package git_test
2+
3+
import (
4+
"testing"
5+
6+
"git-analytics/internal/git"
7+
)
8+
9+
func TestNativeOpenAndLog(t *testing.T) {
10+
repoPath := initTestRepo(t)
11+
12+
repo, err := git.NativeOpen(repoPath)
13+
if err != nil {
14+
t.Fatalf("NativeOpen: %v", err)
15+
}
16+
defer repo.Close()
17+
18+
headHash, err := repo.HeadHash()
19+
if err != nil {
20+
t.Fatalf("HeadHash: %v", err)
21+
}
22+
if headHash == "" {
23+
t.Fatal("HeadHash returned empty string")
24+
}
25+
26+
iter, err := repo.Log("")
27+
if err != nil {
28+
t.Fatalf("Log: %v", err)
29+
}
30+
defer iter.Close()
31+
32+
var commits []git.Commit
33+
for {
34+
c, err := iter.Next()
35+
if err != nil {
36+
t.Fatalf("Next: %v", err)
37+
}
38+
if c == nil {
39+
break
40+
}
41+
commits = append(commits, *c)
42+
}
43+
44+
// We created 2 commits in initTestRepo.
45+
if len(commits) != 2 {
46+
t.Fatalf("expected 2 commits, got %d", len(commits))
47+
}
48+
49+
// Commits should be in reverse chronological order.
50+
second := commits[0]
51+
first := commits[1]
52+
53+
if first.AuthorName != "Test User" {
54+
t.Errorf("expected author 'Test User', got %q", first.AuthorName)
55+
}
56+
if first.AuthorEmail != "test@example.com" {
57+
t.Errorf("expected email 'test@example.com', got %q", first.AuthorEmail)
58+
}
59+
if first.Message != "first commit\n" && first.Message != "first commit" {
60+
t.Errorf("unexpected message %q", first.Message)
61+
}
62+
if first.Date.IsZero() {
63+
t.Error("expected non-zero date")
64+
}
65+
66+
// First commit should have 1 file added.
67+
if len(first.FilesChanged) != 1 {
68+
t.Fatalf("first commit: expected 1 file changed, got %d", len(first.FilesChanged))
69+
}
70+
if first.FilesChanged[0].Path != "hello.txt" {
71+
t.Errorf("expected file 'hello.txt', got %q", first.FilesChanged[0].Path)
72+
}
73+
if first.FilesChanged[0].Additions != 1 {
74+
t.Errorf("expected 1 addition, got %d", first.FilesChanged[0].Additions)
75+
}
76+
77+
// Second commit should modify hello.txt.
78+
if second.Message != "second commit\n" && second.Message != "second commit" {
79+
t.Errorf("unexpected message %q", second.Message)
80+
}
81+
if len(second.FilesChanged) != 1 {
82+
t.Fatalf("second commit: expected 1 file changed, got %d", len(second.FilesChanged))
83+
}
84+
}
85+
86+
func TestNativeLogSinceHash(t *testing.T) {
87+
repoPath := initTestRepo(t)
88+
89+
repo, err := git.NativeOpen(repoPath)
90+
if err != nil {
91+
t.Fatalf("NativeOpen: %v", err)
92+
}
93+
defer repo.Close()
94+
95+
// Get all commits to find the first commit's hash.
96+
iter, err := repo.Log("")
97+
if err != nil {
98+
t.Fatalf("Log: %v", err)
99+
}
100+
var allCommits []git.Commit
101+
for {
102+
c, err := iter.Next()
103+
if err != nil {
104+
t.Fatalf("Next: %v", err)
105+
}
106+
if c == nil {
107+
break
108+
}
109+
allCommits = append(allCommits, *c)
110+
}
111+
iter.Close()
112+
113+
if len(allCommits) != 2 {
114+
t.Fatalf("expected 2 commits, got %d", len(allCommits))
115+
}
116+
117+
// Log since the first commit — should only return the second commit.
118+
firstHash := allCommits[1].Hash // oldest commit
119+
iter2, err := repo.Log(firstHash)
120+
if err != nil {
121+
t.Fatalf("Log(sinceHash): %v", err)
122+
}
123+
defer iter2.Close()
124+
125+
var newCommits []git.Commit
126+
for {
127+
c, err := iter2.Next()
128+
if err != nil {
129+
t.Fatalf("Next: %v", err)
130+
}
131+
if c == nil {
132+
break
133+
}
134+
newCommits = append(newCommits, *c)
135+
}
136+
137+
if len(newCommits) != 1 {
138+
t.Fatalf("expected 1 new commit, got %d", len(newCommits))
139+
}
140+
if newCommits[0].Hash != allCommits[0].Hash {
141+
t.Errorf("expected hash %s, got %s", allCommits[0].Hash, newCommits[0].Hash)
142+
}
143+
}
144+
145+
func TestNativeHeadHash(t *testing.T) {
146+
repoPath := initTestRepo(t)
147+
148+
repo, err := git.NativeOpen(repoPath)
149+
if err != nil {
150+
t.Fatalf("NativeOpen: %v", err)
151+
}
152+
defer repo.Close()
153+
154+
hash, err := repo.HeadHash()
155+
if err != nil {
156+
t.Fatalf("HeadHash: %v", err)
157+
}
158+
159+
// SHA1 hex is 40 chars.
160+
if len(hash) != 40 {
161+
t.Errorf("expected 40-char hash, got %d chars: %q", len(hash), hash)
162+
}
163+
}

0 commit comments

Comments
 (0)