diff --git a/README.md b/README.md index a94e626..7bb6fa1 100644 --- a/README.md +++ b/README.md @@ -46,6 +46,7 @@ know what they are looking for. | MCP | `mcp` | JSON host configs: `mcp.json`, `.mcp.json`, `claude_desktop_config.json`, `mcp_config.json`, `mcp_settings.json`, `cline_mcp_settings.json`, plus `~/.gemini/settings.json` (Gemini CLI / Code Assist). Non-JSON configs (Codex `config.toml`, Continue YAML) are not parsed in v0.1. | | Editor extensions | `editor-extension` | VS Code, Cursor, Windsurf, VSCodium manifests | | Browser extensions | `browser-extension` | Chromium-family (`manifest.json`) and Firefox (`extensions.json`) per profile | +| Homebrew | `homebrew` | Formula `INSTALL_RECEIPT.json` files and cask `.metadata` install markers | Per-ecosystem detail: [docs/inventory-sources.md](docs/inventory-sources.md). diff --git a/cmd/bumblebee/main.go b/cmd/bumblebee/main.go index a65845c..7569a7a 100644 --- a/cmd/bumblebee/main.go +++ b/cmd/bumblebee/main.go @@ -131,7 +131,7 @@ func registerScanFlags(fs *flag.FlagSet, o *scanOpts) { "scan profile: baseline (bounded known package/tool roots), project (configured developer/project roots), or deep (incident-response exposure scan; may include user home roots)") fs.Var(&o.roots, "root", "directory to scan (repeatable or comma-separated; unrelated to running as root). Required for deep; optional for baseline/project.") fs.Var(&o.excludes, "exclude", "additional directory name or suffix path to exclude (repeatable)") - fs.Var(&o.ecosystems, "ecosystem", "limit scanning to emitted ecosystem values (repeatable or comma-separated): npm,pypi,go,rubygems,packagist,mcp,editor-extension,browser-extension") + fs.Var(&o.ecosystems, "ecosystem", "limit scanning to emitted ecosystem values (repeatable or comma-separated): npm,pypi,go,rubygems,packagist,mcp,editor-extension,browser-extension,homebrew") fs.Int64Var(&o.maxFileSize, "max-file-size", 5*1024*1024, "max bytes to read from any single metadata file") fs.DurationVar(&o.maxDuration, "max-duration", 0, "max wall-clock duration for the whole scan (0 = unbounded)") fs.IntVar(&o.concurrency, "concurrency", 4, "number of concurrent file parsers") diff --git a/cmd/bumblebee/main_test.go b/cmd/bumblebee/main_test.go index 7e9c1d1..39dac56 100644 --- a/cmd/bumblebee/main_test.go +++ b/cmd/bumblebee/main_test.go @@ -326,6 +326,23 @@ func TestClassifyRootEditorExtension(t *testing.T) { } } +func TestClassifyRootHomebrewCellarAndCaskroom(t *testing.T) { + for _, p := range []string{ + "/opt/homebrew/Cellar", + "/opt/homebrew/Caskroom", + "/usr/local/Cellar", + "/usr/local/Caskroom", + "/home/linuxbrew/.linuxbrew/Cellar", + "/home/linuxbrew/.linuxbrew/Caskroom", + "/custom/prefix/Cellar", + "/custom/prefix/Caskroom", + } { + if got := classifyRoot(p, model.ProfileBaseline); got != model.RootKindHomebrew { + t.Errorf("classifyRoot(%q) = %q, want %q", p, got, model.RootKindHomebrew) + } + } +} + func TestIsLikelyUserHomeName(t *testing.T) { keep := []string{"alice", "bob", "Alice", "user1", "first.last"} drop := []string{"", ".", "..", ".DS_Store", ".localized", "Shared", "shared", "Guest", "guest", "root", "Deleted Users"} diff --git a/cmd/bumblebee/roots.go b/cmd/bumblebee/roots.go index bde8205..d8886b5 100644 --- a/cmd/bumblebee/roots.go +++ b/cmd/bumblebee/roots.go @@ -141,7 +141,11 @@ func classifyRoot(path, profile string) string { strings.HasSuffix(p, "/.config/Claude Code") || strings.HasSuffix(p, "/.continue"): return model.RootKindMCPConfig - case p == "/opt/homebrew/lib" || p == "/usr/local/lib" || strings.HasSuffix(p, "/Library/Python"): + case p == "/opt/homebrew/lib" || + p == "/usr/local/lib" || + strings.HasSuffix(p, "/Cellar") || + strings.HasSuffix(p, "/Caskroom") || + strings.HasSuffix(p, "/Library/Python"): return model.RootKindHomebrew case isBroadHomeRoot(path): return model.RootKindDeepHome @@ -289,13 +293,22 @@ func projectHomeCandidates(home string) []scanner.Root { func systemRoots() []scanner.Root { switch runtime.GOOS { case "darwin": - return []scanner.Root{ + roots := []scanner.Root{ + {Path: "/opt/homebrew/Cellar", Kind: model.RootKindHomebrew}, + {Path: "/opt/homebrew/Caskroom", Kind: model.RootKindHomebrew}, {Path: "/opt/homebrew/lib", Kind: model.RootKindHomebrew}, + {Path: "/usr/local/Cellar", Kind: model.RootKindHomebrew}, + {Path: "/usr/local/Caskroom", Kind: model.RootKindHomebrew}, {Path: "/usr/local/lib", Kind: model.RootKindHomebrew}, {Path: "/Library/Python", Kind: model.RootKindHomebrew}, } + return roots case "linux": - roots := []scanner.Root{{Path: "/usr/local/lib", Kind: model.RootKindGlobalPackage}} + roots := []scanner.Root{ + {Path: "/usr/local/lib", Kind: model.RootKindGlobalPackage}, + {Path: "/home/linuxbrew/.linuxbrew/Cellar", Kind: model.RootKindHomebrew}, + {Path: "/home/linuxbrew/.linuxbrew/Caskroom", Kind: model.RootKindHomebrew}, + } for _, pattern := range []string{"/usr/lib/python*"} { for _, p := range globExisting(pattern) { roots = append(roots, scanner.Root{Path: p, Kind: model.RootKindGlobalPackage}) diff --git a/cmd/bumblebee/selftest.go b/cmd/bumblebee/selftest.go index 5167bc2..063bd62 100644 --- a/cmd/bumblebee/selftest.go +++ b/cmd/bumblebee/selftest.go @@ -18,17 +18,17 @@ import ( "github.com/perplexityai/bumblebee/internal/scanner" ) -//go:embed selftest/fixtures selftest/catalog.json +//go:embed all:selftest/fixtures selftest/catalog.json var selftestFS embed.FS // expectedSelftestFindings is the count of catalog-matched findings the // embedded fixtures must produce. One npm package-lock.json entry, one -// PyPI dist-info METADATA file, and one MCP config naming a pinned -// docker image — each matched against the embedded catalog: three -// findings. The MCP fixture guards against regressions in the MCP -// parser/scanner integration (basename dispatch, docker tag split, -// catalog matching for the mcp ecosystem). -const expectedSelftestFindings = 3 +// PyPI dist-info METADATA file, one MCP config naming a pinned docker +// image, one Homebrew formula receipt, and one Homebrew cask metadata +// marker — each matched against the embedded catalog: five findings. The +// MCP and Homebrew fixtures guard scanner integration for basename/path +// dispatch and catalog matching. +const expectedSelftestFindings = 5 // runSelftest extracts the embedded fixture tree to a temp directory, // runs the scanner with the embedded exposure catalog, and asserts the diff --git a/cmd/bumblebee/selftest/catalog.json b/cmd/bumblebee/selftest/catalog.json index d44e58c..f2702aa 100644 --- a/cmd/bumblebee/selftest/catalog.json +++ b/cmd/bumblebee/selftest/catalog.json @@ -28,6 +28,24 @@ "versions": ["0.0.0"], "severity": "critical", "source": "bumblebee selftest" + }, + { + "id": "selftest-homebrew-formula-evil", + "name": "bumblebee selftest fixture (homebrew formula)", + "ecosystem": "homebrew", + "package": "bumblebee-selftest-brew", + "versions": ["0.0.0"], + "severity": "critical", + "source": "bumblebee selftest" + }, + { + "id": "selftest-homebrew-cask-evil", + "name": "bumblebee selftest fixture (homebrew cask)", + "ecosystem": "homebrew", + "package": "bumblebee-selftest-cask", + "versions": ["0.0.0"], + "severity": "critical", + "source": "bumblebee selftest" } ] } diff --git a/cmd/bumblebee/selftest/fixtures/homebrew-fixture/Caskroom/bumblebee-selftest-cask/.metadata/0.0.0/20260523010203.004/Casks/bumblebee-selftest-cask.json b/cmd/bumblebee/selftest/fixtures/homebrew-fixture/Caskroom/bumblebee-selftest-cask/.metadata/0.0.0/20260523010203.004/Casks/bumblebee-selftest-cask.json new file mode 100644 index 0000000..6243fd8 --- /dev/null +++ b/cmd/bumblebee/selftest/fixtures/homebrew-fixture/Caskroom/bumblebee-selftest-cask/.metadata/0.0.0/20260523010203.004/Casks/bumblebee-selftest-cask.json @@ -0,0 +1,4 @@ +{ + "token": "bumblebee-selftest-cask", + "version": "0.0.0" +} diff --git a/cmd/bumblebee/selftest/fixtures/homebrew-fixture/Caskroom/bumblebee-selftest-cask/.metadata/INSTALL_RECEIPT.json b/cmd/bumblebee/selftest/fixtures/homebrew-fixture/Caskroom/bumblebee-selftest-cask/.metadata/INSTALL_RECEIPT.json new file mode 100644 index 0000000..cb604b4 --- /dev/null +++ b/cmd/bumblebee/selftest/fixtures/homebrew-fixture/Caskroom/bumblebee-selftest-cask/.metadata/INSTALL_RECEIPT.json @@ -0,0 +1,7 @@ +{ + "installed_on_request": true, + "source": { + "tap": "homebrew/cask", + "version": "0.0.0" + } +} diff --git a/cmd/bumblebee/selftest/fixtures/homebrew-fixture/Cellar/bumblebee-selftest-brew/0.0.0/INSTALL_RECEIPT.json b/cmd/bumblebee/selftest/fixtures/homebrew-fixture/Cellar/bumblebee-selftest-brew/0.0.0/INSTALL_RECEIPT.json new file mode 100644 index 0000000..121f886 --- /dev/null +++ b/cmd/bumblebee/selftest/fixtures/homebrew-fixture/Cellar/bumblebee-selftest-brew/0.0.0/INSTALL_RECEIPT.json @@ -0,0 +1,7 @@ +{ + "installed_on_request": true, + "source": { + "tap": "homebrew/core", + "spec": "stable" + } +} diff --git a/docs/inventory-sources.md b/docs/inventory-sources.md index a2dfb81..2cb6327 100644 --- a/docs/inventory-sources.md +++ b/docs/inventory-sources.md @@ -29,13 +29,54 @@ Each scan profile reads from a different slice of the sources below: | Profile | Sources walked | |-------------|---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| -| `baseline` | Homebrew lib prefixes; `/Library/Python`; Linux system Python (`/usr/lib/python*`, plus `/usr/local/lib`); user Python (`~/.local/lib/python*`, `~/.local/share/pipx/venvs`, `pyenv`); language version managers (`asdf`, `nvm`, `rbenv`, `rvm`); `~/.cargo`; `~/go`; editor-extension trees; MCP config locations; per-profile browser-extension trees (Chromium-family + Firefox-family, including common snap/flatpak paths). No project trees. | +| `baseline` | Homebrew `Cellar` / `Caskroom` install metadata and lib prefixes; `/Library/Python`; Linux system Python (`/usr/lib/python*`, plus `/usr/local/lib`); user Python (`~/.local/lib/python*`, `~/.local/share/pipx/venvs`, `pyenv`); language version managers (`asdf`, `nvm`, `rbenv`, `rvm`); `~/.cargo`; `~/go`; editor-extension trees; MCP config locations; per-profile browser-extension trees (Chromium-family + Firefox-family, including common snap/flatpak paths). No project trees. | | `project` | Configured developer/project roots (`~/code`, `~/src`, `~/Developer`, `~/Projects`, `~/workspace`, and any explicit `--root`). All ecosystem parsers below apply within those trees. | | `deep` | Operator-supplied roots, typically a bare home directory during a campaign. Same ecosystem parsers; recommended only in combination with `--exposure-catalog` to emit `record_type=finding` records. | The `source_type` values emitted are the same across profiles. What changes is the population of files the walker visits. +## Homebrew + +Files read: + +- Formulae: `/Cellar///INSTALL_RECEIPT.json`. + The formula name and installed version are derived from the Cellar path, + matching Homebrew's own filesystem listing behavior. The receipt is read + only for small install metadata such as `installed_on_request`, which is + emitted as `direct_dependency` when present. +- Casks: `/Caskroom//.metadata///Casks/.{internal.json,json,rb}` + as the installed-cask marker, plus the optional cask-level + `/Caskroom//.metadata/INSTALL_RECEIPT.json` for + `installed_on_request`. JSON marker files are not used as a broad + metadata source in v0.1, and `.rb` cask definitions are never opened; + a `.rb` marker only proves Homebrew saved an installed cask snapshot. + +Captured fields emitted on the record: `package_name` (formula rack name +or cask token), `version`, `package_manager=homebrew`, +`source_type` (`homebrew-formula-receipt` or `homebrew-cask-metadata`), +and `direct_dependency` when Homebrew's receipt records +`installed_on_request`. + +Baseline defaults include Apple Silicon macOS (`/opt/homebrew/Cellar`, +`/opt/homebrew/Caskroom`), Intel macOS (`/usr/local/Cellar`, +`/usr/local/Caskroom`), Linuxbrew (`/home/linuxbrew/.linuxbrew/Cellar`, +`/home/linuxbrew/.linuxbrew/Caskroom`). Custom Homebrew prefixes are not +inferred from environment variables; pass their `Cellar` and `Caskroom` +paths with `--root` when they need coverage. + +We do not run `brew list`, `brew info`, or any other Homebrew command. We +do not read formula Ruby files, cask Ruby definitions, installed payload +files, app bundles, linked `opt` symlinks, pinned symlinks, or tap source +trees. Tap names, bottle/source build flags, install timestamps, runtime +dependencies, and artifact lists may exist in Homebrew metadata but are +not emitted in v0.1's slim schema. + +References: + +- Homebrew formula tab metadata: +- Homebrew installation paths: + ## npm Files read: @@ -506,7 +547,8 @@ strong installed-state correlation tooling today. ## What this collector deliberately does not do - No package-manager command execution. No `npm ls`, no `pnpm list`, no - `pip show`, no `go list`, no `bundle list`, no `composer show`. + `pip show`, no `go list`, no `bundle list`, no `composer show`, no + `brew list`. - No source-file reading. Only the metadata files listed above. The walker visits directories; the scanners open only the targeted files. - No bundled threat intelligence. Bumblebee ships no built-in advisory diff --git a/docs/schema/v0.1.0/exposure-catalog.schema.json b/docs/schema/v0.1.0/exposure-catalog.schema.json index f34877d..e661e05 100644 --- a/docs/schema/v0.1.0/exposure-catalog.schema.json +++ b/docs/schema/v0.1.0/exposure-catalog.schema.json @@ -17,7 +17,7 @@ "id": { "type": "string" }, "name": { "type": "string" }, "ecosystem": { - "enum": ["npm", "pypi", "go", "rubygems", "packagist", "mcp", "editor-extension", "browser-extension"] + "enum": ["npm", "pypi", "go", "rubygems", "packagist", "mcp", "editor-extension", "browser-extension", "homebrew"] }, "package": { "type": "string" }, "versions": { diff --git a/docs/schema/v0.1.0/package-record.schema.json b/docs/schema/v0.1.0/package-record.schema.json index 93b5131..0098d0e 100644 --- a/docs/schema/v0.1.0/package-record.schema.json +++ b/docs/schema/v0.1.0/package-record.schema.json @@ -34,7 +34,7 @@ "endpoint": { "$ref": "#/$defs/endpoint" }, "profile": { "enum": ["baseline", "project", "deep"] }, "ecosystem": { - "enum": ["npm", "pypi", "go", "rubygems", "packagist", "mcp", "editor-extension", "browser-extension"] + "enum": ["npm", "pypi", "go", "rubygems", "packagist", "mcp", "editor-extension", "browser-extension", "homebrew"] }, "package_name": { "type": "string" }, "normalized_name": { "type": "string" }, diff --git a/internal/ecosystem/homebrew/homebrew.go b/internal/ecosystem/homebrew/homebrew.go new file mode 100644 index 0000000..54fd4dd --- /dev/null +++ b/internal/ecosystem/homebrew/homebrew.go @@ -0,0 +1,264 @@ +// Package homebrew scans installed Homebrew formula and cask metadata. +// +// Homebrew records are derived from install metadata path shapes only. The +// scanner never executes `brew`; for casks installed from Ruby definitions it +// treats the saved .rb caskfile as an existence marker and does not open it. +package homebrew + +import ( + "encoding/json" + "errors" + "fmt" + "io" + "os" + "path/filepath" + "sort" + "strings" + + "github.com/perplexityai/bumblebee/internal/model" +) + +const Ecosystem = model.EcosystemHomebrew + +const ( + receiptFile = "INSTALL_RECEIPT.json" + packageManager = "homebrew" + formulaSourceType = "homebrew-formula-receipt" + caskSourceType = "homebrew-cask-metadata" +) + +type Scanner struct { + MaxFileSize int64 + Emit func(model.Record) + Diag func(level, path, msg string) +} + +// IsFormulaReceipt reports whether path is a Homebrew formula receipt under +// .../Cellar///INSTALL_RECEIPT.json. +func IsFormulaReceipt(path string) (ok bool, name, version, cellarDir string) { + if filepath.Base(path) != receiptFile { + return false, "", "", "" + } + kegDir := filepath.Dir(path) + rackDir := filepath.Dir(kegDir) + cellarDir = filepath.Dir(rackDir) + if filepath.Base(cellarDir) != "Cellar" { + return false, "", "", "" + } + name = filepath.Base(rackDir) + version = filepath.Base(kegDir) + if invalidPathSegment(name) || invalidPathSegment(version) { + return false, "", "", "" + } + return true, name, version, cellarDir +} + +// IsCaskMetadataMarker reports whether path is the preferred installed-cask +// marker for .../Caskroom//.metadata///Casks/. +func IsCaskMetadataMarker(path string) (ok bool, token, version, caskroomDir string) { + m, ok := parseCaskMetadataPath(path) + if !ok { + return false, "", "", "" + } + preferred, ok := preferredCaskMarker(m.tokenDir, m.version, m.token) + if !ok || filepath.Clean(preferred) != m.cleanPath { + return false, "", "", "" + } + return true, m.token, m.version, m.caskroomDir +} + +// LooksLikeCaskMetadataMarker reports whether path has the installed-cask +// metadata marker shape, before applying latest-timestamp/preferred-file +// selection. +func LooksLikeCaskMetadataMarker(path string) bool { + _, ok := parseCaskMetadataPath(path) + return ok +} + +type caskMetadataPath struct { + cleanPath string + token string + version string + tokenDir string + caskroomDir string +} + +func parseCaskMetadataPath(path string) (caskMetadataPath, bool) { + clean := filepath.Clean(path) + base := filepath.Base(clean) + markerToken, ok := caskTokenFromMarker(base) + if !ok { + return caskMetadataPath{}, false + } + casksDir := filepath.Dir(clean) + timestampDir := filepath.Dir(casksDir) + versionDir := filepath.Dir(timestampDir) + metadataDir := filepath.Dir(versionDir) + tokenDir := filepath.Dir(metadataDir) + caskroomDir := filepath.Dir(tokenDir) + + if filepath.Base(casksDir) != "Casks" || + filepath.Base(metadataDir) != ".metadata" || + filepath.Base(caskroomDir) != "Caskroom" { + return caskMetadataPath{}, false + } + token := filepath.Base(tokenDir) + version := filepath.Base(versionDir) + if invalidPathSegment(token) || invalidPathSegment(version) || invalidPathSegment(filepath.Base(timestampDir)) { + return caskMetadataPath{}, false + } + if markerToken != token { + return caskMetadataPath{}, false + } + return caskMetadataPath{ + cleanPath: clean, + token: token, + version: version, + tokenDir: tokenDir, + caskroomDir: caskroomDir, + }, true +} + +func (s *Scanner) ScanFormulaReceipt(path, name, version, cellarDir string, base model.Record) error { + data, err := s.readBounded(path) + if err != nil { + return err + } + var receipt installReceipt + if err := json.Unmarshal(data, &receipt); err != nil { + if s.Diag != nil { + s.Diag("warn", path, "skipping receipt fields: "+err.Error()) + } + receipt = installReceipt{} + } + + r := base + r.Ecosystem = Ecosystem + r.PackageName = name + r.NormalizedName = strings.ToLower(strings.TrimSpace(name)) + r.Version = version + r.ProjectPath = cellarDir + r.RootKind = model.RootKindHomebrew + r.PackageManager = packageManager + r.SourceType = formulaSourceType + r.SourceFile = path + r.DirectDependency = receipt.InstalledOnRequest + r.Confidence = "high" + s.Emit(r) + return nil +} + +func (s *Scanner) ScanCaskMetadata(path, token, version, caskroomDir string, base model.Record) error { + tokenDir := filepath.Join(caskroomDir, token) + receipt := s.readCaskReceipt(filepath.Join(tokenDir, ".metadata", receiptFile)) + + r := base + r.Ecosystem = Ecosystem + r.PackageName = token + r.NormalizedName = strings.ToLower(strings.TrimSpace(token)) + r.Version = version + r.ProjectPath = caskroomDir + r.RootKind = model.RootKindHomebrew + r.PackageManager = packageManager + r.SourceType = caskSourceType + r.SourceFile = path + r.DirectDependency = receipt.InstalledOnRequest + r.Confidence = "high" + s.Emit(r) + return nil +} + +type installReceipt struct { + InstalledOnRequest *bool `json:"installed_on_request"` +} + +func invalidPathSegment(s string) bool { + return s == "" || s == "." || s == ".." +} + +func caskTokenFromMarker(base string) (string, bool) { + for _, suffix := range []string{".internal.json", ".json", ".rb"} { + if strings.HasSuffix(base, suffix) { + token := strings.TrimSuffix(base, suffix) + if token != "" { + return token, true + } + } + } + return "", false +} + +func preferredCaskMarker(tokenDir, version, token string) (string, bool) { + versionDir := filepath.Join(tokenDir, ".metadata", version) + entries, err := os.ReadDir(versionDir) + if err != nil { + return "", false + } + var timestamps []string + for _, e := range entries { + if e.IsDir() && !invalidPathSegment(e.Name()) { + timestamps = append(timestamps, e.Name()) + } + } + sort.Strings(timestamps) + for i := len(timestamps) - 1; i >= 0; i-- { + casksDir := filepath.Join(versionDir, timestamps[i], "Casks") + for _, suffix := range []string{".internal.json", ".json", ".rb"} { + candidate := filepath.Join(casksDir, token+suffix) + if fileExists(candidate) { + return candidate, true + } + } + } + return "", false +} + +func fileExists(path string) bool { + info, err := os.Stat(path) + return err == nil && !info.IsDir() +} + +func (s *Scanner) readCaskReceipt(path string) installReceipt { + data, ok := s.readOptional(path) + if !ok { + return installReceipt{} + } + var receipt installReceipt + if err := json.Unmarshal(data, &receipt); err != nil { + if s.Diag != nil { + s.Diag("warn", path, "skipping receipt fields: "+err.Error()) + } + receipt = installReceipt{} + } + return receipt +} + +func (s *Scanner) readBounded(path string) ([]byte, error) { + f, err := os.Open(path) + if err != nil { + return nil, err + } + defer f.Close() + info, err := f.Stat() + if err != nil { + return nil, err + } + if !info.Mode().IsRegular() { + return nil, errors.New("not a regular file") + } + if s.MaxFileSize > 0 && info.Size() > s.MaxFileSize { + if s.Diag != nil { + s.Diag("warn", path, fmt.Sprintf("skipping: size %d exceeds max %d", info.Size(), s.MaxFileSize)) + } + return nil, fmt.Errorf("file %s exceeds max size %d", path, s.MaxFileSize) + } + return io.ReadAll(f) +} + +func (s *Scanner) readOptional(path string) ([]byte, bool) { + data, err := s.readBounded(path) + if err != nil { + return nil, false + } + return data, true +} diff --git a/internal/ecosystem/homebrew/homebrew_test.go b/internal/ecosystem/homebrew/homebrew_test.go new file mode 100644 index 0000000..fab7ab7 --- /dev/null +++ b/internal/ecosystem/homebrew/homebrew_test.go @@ -0,0 +1,208 @@ +package homebrew + +import ( + "os" + "path/filepath" + "strings" + "testing" + + "github.com/perplexityai/bumblebee/internal/model" +) + +func TestScanFormulaReceipt(t *testing.T) { + root := t.TempDir() + receipt := filepath.Join(root, "Cellar", "wget", "1.21.4", receiptFile) + writeFile(t, receipt, `{"installed_on_request":true,"source":{"tap":"homebrew/core"}}`) + + ok, name, version, cellarDir := IsFormulaReceipt(receipt) + if !ok { + t.Fatalf("IsFormulaReceipt(%q) = false", receipt) + } + var out []model.Record + s := &Scanner{ + MaxFileSize: 1024, + Emit: func(r model.Record) { out = append(out, r) }, + Diag: func(string, string, string) {}, + } + if err := s.ScanFormulaReceipt(receipt, name, version, cellarDir, model.Record{}); err != nil { + t.Fatalf("ScanFormulaReceipt: %v", err) + } + if len(out) != 1 { + t.Fatalf("records = %d, want 1", len(out)) + } + r := out[0] + if r.Ecosystem != model.EcosystemHomebrew || r.PackageName != "wget" || r.NormalizedName != "wget" || r.Version != "1.21.4" { + t.Fatalf("unexpected record identity: %+v", r) + } + if r.ProjectPath != filepath.Join(root, "Cellar") { + t.Errorf("ProjectPath = %q, want Cellar dir", r.ProjectPath) + } + if r.PackageManager != "homebrew" || r.SourceType != "homebrew-formula-receipt" || r.Confidence != "high" { + t.Errorf("unexpected source fields: %+v", r) + } + if r.DirectDependency == nil || !*r.DirectDependency { + t.Fatalf("DirectDependency = %v, want true", r.DirectDependency) + } +} + +func TestScanFormulaReceiptWarnsOnMalformedReceiptFields(t *testing.T) { + root := t.TempDir() + receipt := filepath.Join(root, "Cellar", "broken", "1.0.0", receiptFile) + writeFile(t, receipt, `{"installed_on_request":"not-a-bool"}`) + + ok, name, version, cellarDir := IsFormulaReceipt(receipt) + if !ok { + t.Fatalf("IsFormulaReceipt(%q) = false", receipt) + } + var out []model.Record + var diagnostics []string + s := &Scanner{ + MaxFileSize: 1024, + Emit: func(r model.Record) { out = append(out, r) }, + Diag: func(_, _, msg string) { diagnostics = append(diagnostics, msg) }, + } + if err := s.ScanFormulaReceipt(receipt, name, version, cellarDir, model.Record{}); err != nil { + t.Fatalf("ScanFormulaReceipt: %v", err) + } + if len(out) != 1 { + t.Fatalf("records = %d, want 1", len(out)) + } + if out[0].DirectDependency != nil { + t.Fatalf("DirectDependency = %v, want nil", out[0].DirectDependency) + } + if len(diagnostics) == 0 || !strings.Contains(diagnostics[0], "skipping receipt fields") { + t.Fatalf("expected malformed receipt diagnostic, got %v", diagnostics) + } +} + +func TestCaskMetadataMarkerPreference(t *testing.T) { + root := t.TempDir() + casksDir := filepath.Join(root, "Caskroom", "foo", ".metadata", "1.2.3", "20260523010203.004", "Casks") + internalPath := filepath.Join(casksDir, "foo.internal.json") + jsonPath := filepath.Join(casksDir, "foo.json") + rbPath := filepath.Join(casksDir, "foo.rb") + writeFile(t, internalPath, `{"token":"foo","version":"1.2.3"}`) + writeFile(t, jsonPath, `{"token":"foo","version":"1.2.3"}`) + writeFile(t, rbPath, `cask "foo" do`) + + if ok, token, version, caskroomDir := IsCaskMetadataMarker(internalPath); !ok || token != "foo" || version != "1.2.3" || caskroomDir != filepath.Join(root, "Caskroom") { + t.Fatalf("internal marker = (%v,%q,%q,%q), want foo 1.2.3 Caskroom", ok, token, version, caskroomDir) + } + for _, p := range []string{jsonPath, rbPath} { + if !LooksLikeCaskMetadataMarker(p) { + t.Errorf("LooksLikeCaskMetadataMarker(%q) = false, want true", p) + } + if ok, _, _, _ := IsCaskMetadataMarker(p); ok { + t.Fatalf("IsCaskMetadataMarker(%q) = true, want false because internal.json is preferred", p) + } + } +} + +func TestScanCaskMetadataFromJSONAndRubyMarkers(t *testing.T) { + root := t.TempDir() + caskroom := filepath.Join(root, "Caskroom") + jsonMarker := filepath.Join(caskroom, "json-only", ".metadata", "2.0.0", "20260523010203.004", "Casks", "json-only.json") + rbMarker := filepath.Join(caskroom, "ruby-only", ".metadata", "latest", "20260523010203.004", "Casks", "ruby-only.rb") + writeFile(t, jsonMarker, `{"token":"json-only","version":"2.0.0"}`) + writeFile(t, rbMarker, `cask "ruby-only" do`) + writeFile(t, filepath.Join(caskroom, "ruby-only", ".metadata", receiptFile), `{"installed_on_request":false}`) + + var out []model.Record + s := &Scanner{ + MaxFileSize: 1024, + Emit: func(r model.Record) { out = append(out, r) }, + Diag: func(string, string, string) {}, + } + for _, path := range []string{jsonMarker, rbMarker} { + ok, token, version, caskroomDir := IsCaskMetadataMarker(path) + if !ok { + t.Fatalf("IsCaskMetadataMarker(%q) = false", path) + } + if err := s.ScanCaskMetadata(path, token, version, caskroomDir, model.Record{}); err != nil { + t.Fatalf("ScanCaskMetadata(%q): %v", path, err) + } + } + if len(out) != 2 { + t.Fatalf("records = %d, want 2", len(out)) + } + got := map[string]model.Record{} + for _, r := range out { + got[r.PackageName] = r + if r.Ecosystem != model.EcosystemHomebrew || r.PackageManager != "homebrew" || r.SourceType != "homebrew-cask-metadata" { + t.Errorf("unexpected cask record: %+v", r) + } + } + if got["json-only"].Version != "2.0.0" { + t.Errorf("json-only version = %q", got["json-only"].Version) + } + if got["json-only"].DirectDependency != nil { + t.Errorf("json-only DirectDependency = %v, want nil", got["json-only"].DirectDependency) + } + ruby := got["ruby-only"] + if ruby.Version != "latest" { + t.Errorf("ruby-only version = %q", ruby.Version) + } + if ruby.DirectDependency == nil || *ruby.DirectDependency { + t.Fatalf("ruby-only DirectDependency = %v, want false", ruby.DirectDependency) + } +} + +func TestCaskMetadataMarkerWithoutTimestampIsNotPreferred(t *testing.T) { + root := t.TempDir() + versionDir := filepath.Join(root, "Caskroom", "foo", ".metadata", "1.2.3") + if err := os.MkdirAll(versionDir, 0o755); err != nil { + t.Fatal(err) + } + if _, ok := preferredCaskMarker(filepath.Join(root, "Caskroom", "foo"), "1.2.3", "foo"); ok { + t.Fatal("preferredCaskMarker found a marker without timestamp subdirs") + } +} + +func TestRejectsUnrelatedHomebrewPaths(t *testing.T) { + root := t.TempDir() + if ok, _, _, _ := IsFormulaReceipt(filepath.Join(root, "not-cellar", "pkg", "1.0.0", receiptFile)); ok { + t.Fatal("unrelated INSTALL_RECEIPT.json matched as formula") + } + unrelated := filepath.Join(root, "Caskroom", "foo", ".metadata", "1.0.0", "Casks", "foo.json") + writeFile(t, unrelated, `{}`) + if ok, _, _, _ := IsCaskMetadataMarker(unrelated); ok { + t.Fatal("metadata path without timestamp matched as cask") + } +} + +func TestFormulaReceiptMaxFileSize(t *testing.T) { + root := t.TempDir() + receipt := filepath.Join(root, "Cellar", "big", "1.0.0", receiptFile) + writeFile(t, receipt, strings.Repeat("x", 64)) + + ok, name, version, cellarDir := IsFormulaReceipt(receipt) + if !ok { + t.Fatalf("IsFormulaReceipt(%q) = false", receipt) + } + var emitted bool + var diagnostics []string + s := &Scanner{ + MaxFileSize: 8, + Emit: func(model.Record) { emitted = true }, + Diag: func(_, _, msg string) { diagnostics = append(diagnostics, msg) }, + } + if err := s.ScanFormulaReceipt(receipt, name, version, cellarDir, model.Record{}); err == nil { + t.Fatal("expected max-size error") + } + if emitted { + t.Fatal("oversized receipt emitted a record") + } + if len(diagnostics) == 0 || !strings.Contains(diagnostics[0], "exceeds max") { + t.Fatalf("expected max-size diagnostic, got %v", diagnostics) + } +} + +func writeFile(t *testing.T, path, body string) { + t.Helper() + if err := os.MkdirAll(filepath.Dir(path), 0o755); err != nil { + t.Fatal(err) + } + if err := os.WriteFile(path, []byte(body), 0o644); err != nil { + t.Fatal(err) + } +} diff --git a/internal/model/model.go b/internal/model/model.go index 2bbd436..adff388 100644 --- a/internal/model/model.go +++ b/internal/model/model.go @@ -43,6 +43,7 @@ const ( EcosystemMCP = "mcp" EcosystemEditorExtension = "editor-extension" EcosystemBrowserExtension = "browser-extension" + EcosystemHomebrew = "homebrew" ) var supportedEcosystems = map[string]struct{}{ @@ -54,6 +55,7 @@ var supportedEcosystems = map[string]struct{}{ EcosystemMCP: {}, EcosystemEditorExtension: {}, EcosystemBrowserExtension: {}, + EcosystemHomebrew: {}, } var supportedEcosystemOrder = []string{ @@ -65,6 +67,7 @@ var supportedEcosystemOrder = []string{ EcosystemMCP, EcosystemEditorExtension, EcosystemBrowserExtension, + EcosystemHomebrew, } // SupportedEcosystems returns the emitted ecosystem values supported by v0.1. diff --git a/internal/scanner/scanner.go b/internal/scanner/scanner.go index 29c8c7c..f5aef3f 100644 --- a/internal/scanner/scanner.go +++ b/internal/scanner/scanner.go @@ -23,6 +23,7 @@ import ( "github.com/perplexityai/bumblebee/internal/ecosystem/composer" "github.com/perplexityai/bumblebee/internal/ecosystem/editorext" "github.com/perplexityai/bumblebee/internal/ecosystem/gomod" + "github.com/perplexityai/bumblebee/internal/ecosystem/homebrew" "github.com/perplexityai/bumblebee/internal/ecosystem/mcp" "github.com/perplexityai/bumblebee/internal/ecosystem/npm" "github.com/perplexityai/bumblebee/internal/ecosystem/pnpm" @@ -249,6 +250,7 @@ func Run(ctx context.Context, cfg Config) (Result, error) { mcpS := &mcp.Scanner{MaxFileSize: cfg.MaxFileSize, Emit: emit, Diag: diag} extS := &editorext.Scanner{MaxFileSize: cfg.MaxFileSize, Emit: emit, Diag: diag} bxS := &browserext.Scanner{MaxFileSize: cfg.MaxFileSize, Emit: emit, Diag: diag} + hbS := &homebrew.Scanner{MaxFileSize: cfg.MaxFileSize, Emit: emit, Diag: diag} type job struct { kind string @@ -314,6 +316,10 @@ func Run(ctx context.Context, cfg Config) (Result, error) { err = bxS.ScanChromiumExtension(j.path, j.extra1, j.extra2, j.projectPath, cfg.BaseRecord) case "firefox-ext": err = bxS.ScanFirefoxExtensions(j.path, cfg.BaseRecord) + case "homebrew-formula": + err = hbS.ScanFormulaReceipt(j.path, j.extra1, j.extra2, j.projectPath, cfg.BaseRecord) + case "homebrew-cask": + err = hbS.ScanCaskMetadata(j.path, j.extra1, j.extra2, j.projectPath, cfg.BaseRecord) } if err != nil { cfg.Emitter.Diag("error", j.path, err.Error()) @@ -428,6 +434,19 @@ func Run(ctx context.Context, cfg Config) (Result, error) { if browserext.IsFirefoxExtensionsJSON(path) { send(job{kind: "firefox-ext", path: path}) } + case enabled(model.EcosystemHomebrew) && base == "INSTALL_RECEIPT.json": + if ok, name, version, cellarDir := homebrew.IsFormulaReceipt(path); ok { + send(job{kind: "homebrew-formula", path: path, projectPath: cellarDir, extra1: name, extra2: version}) + } + case enabled(model.EcosystemHomebrew) && homebrew.LooksLikeCaskMetadataMarker(path): + // This one intentionally does a tiny sibling check in the walker + // so a cask with .internal.json, .json, and .rb markers emits only + // Homebrew's preferred installed-cask snapshot. That adds serial + // I/O to cask marker dispatch, but typical Caskroom cardinality is + // small and avoiding duplicate records keeps downstream state clean. + if ok, token, version, caskroomDir := homebrew.IsCaskMetadataMarker(path); ok { + send(job{kind: "homebrew-cask", path: path, projectPath: caskroomDir, extra1: token, extra2: version}) + } case base == "package.json": // Prefer extension match over node_modules. if enabled(model.EcosystemEditorExtension) { diff --git a/internal/scanner/scanner_integration_test.go b/internal/scanner/scanner_integration_test.go index 3cf9dec..ac9a0b9 100644 --- a/internal/scanner/scanner_integration_test.go +++ b/internal/scanner/scanner_integration_test.go @@ -107,6 +107,14 @@ PLATFORMS filepath.Join(root, ".vscode", "extensions", "ms-python.python-2024.0.0", "package.json"), `{"name":"python","version":"2024.0.0","publisher":"ms-python"}`) + // Homebrew formula receipt + cask metadata marker. + writeFile(t, + filepath.Join(root, "Cellar", "wget", "1.21.4", "INSTALL_RECEIPT.json"), + `{"installed_on_request":true,"source":{"tap":"homebrew/core"}}`) + writeFile(t, + filepath.Join(root, "Caskroom", "sample-cask", ".metadata", "2.0.0", "20260523010203.004", "Casks", "sample-cask.json"), + `{"token":"sample-cask","version":"2.0.0"}`) + // Chromium-family browser extension (Chrome layout). We create the // fixture under a fresh tempdir so the walker's default home-tree // excludes (which suffix-match Library/Application Support/Google/Chrome, @@ -185,13 +193,15 @@ PLATFORMS "mcp-config", "editor-extension", "browser-extension", + "homebrew-formula-receipt", + "homebrew-cask-metadata", } for _, st := range wantSourceTypes { if !gotSource[st] { t.Errorf("missing source_type %q", st) } } - wantEcosystems := []string{"npm", "go", "rubygems", "packagist", "mcp", "editor-extension", "browser-extension"} + wantEcosystems := []string{"npm", "go", "rubygems", "packagist", "mcp", "editor-extension", "browser-extension", "homebrew"} for _, e := range wantEcosystems { if !gotEcosystem[e] { t.Errorf("missing ecosystem %q", e) @@ -210,6 +220,8 @@ PLATFORMS "editor-extension:ms-python.python", "browser-extension:" + chromeExtID, "browser-extension:sample@example.com", + "homebrew:wget", + "homebrew:sample-cask", } for _, p := range wantPkgs { if !gotPkg[p] {