Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@ know what they are looking for.
| MCP | `mcp` | JSON host configs: `mcp.json`, `.mcp.json`, `claude_desktop_config.json`, `mcp_config.json`, `mcp_settings.json`, `cline_mcp_settings.json`, plus `~/.gemini/settings.json` (Gemini CLI / Code Assist). Non-JSON configs (Codex `config.toml`, Continue YAML) are not parsed in v0.1. |
| Editor extensions | `editor-extension` | VS Code, Cursor, Windsurf, VSCodium manifests |
| Browser extensions | `browser-extension` | Chromium-family (`manifest.json`) and Firefox (`extensions.json`) per profile |
| Homebrew | `homebrew` | Formula `INSTALL_RECEIPT.json` files and cask `.metadata` install markers |

Per-ecosystem detail: [docs/inventory-sources.md](docs/inventory-sources.md).

Expand Down
2 changes: 1 addition & 1 deletion cmd/bumblebee/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -131,7 +131,7 @@ func registerScanFlags(fs *flag.FlagSet, o *scanOpts) {
"scan profile: baseline (bounded known package/tool roots), project (configured developer/project roots), or deep (incident-response exposure scan; may include user home roots)")
fs.Var(&o.roots, "root", "directory to scan (repeatable or comma-separated; unrelated to running as root). Required for deep; optional for baseline/project.")
fs.Var(&o.excludes, "exclude", "additional directory name or suffix path to exclude (repeatable)")
fs.Var(&o.ecosystems, "ecosystem", "limit scanning to emitted ecosystem values (repeatable or comma-separated): npm,pypi,go,rubygems,packagist,mcp,editor-extension,browser-extension")
fs.Var(&o.ecosystems, "ecosystem", "limit scanning to emitted ecosystem values (repeatable or comma-separated): npm,pypi,go,rubygems,packagist,mcp,editor-extension,browser-extension,homebrew")
fs.Int64Var(&o.maxFileSize, "max-file-size", 5*1024*1024, "max bytes to read from any single metadata file")
fs.DurationVar(&o.maxDuration, "max-duration", 0, "max wall-clock duration for the whole scan (0 = unbounded)")
fs.IntVar(&o.concurrency, "concurrency", 4, "number of concurrent file parsers")
Expand Down
17 changes: 17 additions & 0 deletions cmd/bumblebee/main_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -326,6 +326,23 @@ func TestClassifyRootEditorExtension(t *testing.T) {
}
}

func TestClassifyRootHomebrewCellarAndCaskroom(t *testing.T) {
for _, p := range []string{
"/opt/homebrew/Cellar",
"/opt/homebrew/Caskroom",
"/usr/local/Cellar",
"/usr/local/Caskroom",
"/home/linuxbrew/.linuxbrew/Cellar",
"/home/linuxbrew/.linuxbrew/Caskroom",
"/custom/prefix/Cellar",
"/custom/prefix/Caskroom",
} {
if got := classifyRoot(p, model.ProfileBaseline); got != model.RootKindHomebrew {
t.Errorf("classifyRoot(%q) = %q, want %q", p, got, model.RootKindHomebrew)
}
}
}

func TestIsLikelyUserHomeName(t *testing.T) {
keep := []string{"alice", "bob", "Alice", "user1", "first.last"}
drop := []string{"", ".", "..", ".DS_Store", ".localized", "Shared", "shared", "Guest", "guest", "root", "Deleted Users"}
Expand Down
19 changes: 16 additions & 3 deletions cmd/bumblebee/roots.go
Original file line number Diff line number Diff line change
Expand Up @@ -141,7 +141,11 @@ func classifyRoot(path, profile string) string {
strings.HasSuffix(p, "/.config/Claude Code") ||
strings.HasSuffix(p, "/.continue"):
return model.RootKindMCPConfig
case p == "/opt/homebrew/lib" || p == "/usr/local/lib" || strings.HasSuffix(p, "/Library/Python"):
case p == "/opt/homebrew/lib" ||
p == "/usr/local/lib" ||
strings.HasSuffix(p, "/Cellar") ||
strings.HasSuffix(p, "/Caskroom") ||
strings.HasSuffix(p, "/Library/Python"):
return model.RootKindHomebrew
case isBroadHomeRoot(path):
return model.RootKindDeepHome
Expand Down Expand Up @@ -289,13 +293,22 @@ func projectHomeCandidates(home string) []scanner.Root {
func systemRoots() []scanner.Root {
switch runtime.GOOS {
case "darwin":
return []scanner.Root{
roots := []scanner.Root{
{Path: "/opt/homebrew/Cellar", Kind: model.RootKindHomebrew},
{Path: "/opt/homebrew/Caskroom", Kind: model.RootKindHomebrew},
{Path: "/opt/homebrew/lib", Kind: model.RootKindHomebrew},
{Path: "/usr/local/Cellar", Kind: model.RootKindHomebrew},
{Path: "/usr/local/Caskroom", Kind: model.RootKindHomebrew},
{Path: "/usr/local/lib", Kind: model.RootKindHomebrew},
{Path: "/Library/Python", Kind: model.RootKindHomebrew},
}
return roots
case "linux":
roots := []scanner.Root{{Path: "/usr/local/lib", Kind: model.RootKindGlobalPackage}}
roots := []scanner.Root{
{Path: "/usr/local/lib", Kind: model.RootKindGlobalPackage},
{Path: "/home/linuxbrew/.linuxbrew/Cellar", Kind: model.RootKindHomebrew},
{Path: "/home/linuxbrew/.linuxbrew/Caskroom", Kind: model.RootKindHomebrew},
}
for _, pattern := range []string{"/usr/lib/python*"} {
for _, p := range globExisting(pattern) {
roots = append(roots, scanner.Root{Path: p, Kind: model.RootKindGlobalPackage})
Expand Down
14 changes: 7 additions & 7 deletions cmd/bumblebee/selftest.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,17 +18,17 @@ import (
"github.com/perplexityai/bumblebee/internal/scanner"
)

//go:embed selftest/fixtures selftest/catalog.json
//go:embed all:selftest/fixtures selftest/catalog.json
var selftestFS embed.FS

// expectedSelftestFindings is the count of catalog-matched findings the
// embedded fixtures must produce. One npm package-lock.json entry, one
// PyPI dist-info METADATA file, and one MCP config naming a pinned
// docker image — each matched against the embedded catalog: three
// findings. The MCP fixture guards against regressions in the MCP
// parser/scanner integration (basename dispatch, docker tag split,
// catalog matching for the mcp ecosystem).
const expectedSelftestFindings = 3
// PyPI dist-info METADATA file, one MCP config naming a pinned docker
// image, one Homebrew formula receipt, and one Homebrew cask metadata
// marker — each matched against the embedded catalog: five findings. The
// MCP and Homebrew fixtures guard scanner integration for basename/path
// dispatch and catalog matching.
const expectedSelftestFindings = 5

// runSelftest extracts the embedded fixture tree to a temp directory,
// runs the scanner with the embedded exposure catalog, and asserts the
Expand Down
18 changes: 18 additions & 0 deletions cmd/bumblebee/selftest/catalog.json
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,24 @@
"versions": ["0.0.0"],
"severity": "critical",
"source": "bumblebee selftest"
},
{
"id": "selftest-homebrew-formula-evil",
"name": "bumblebee selftest fixture (homebrew formula)",
"ecosystem": "homebrew",
"package": "bumblebee-selftest-brew",
"versions": ["0.0.0"],
"severity": "critical",
"source": "bumblebee selftest"
},
{
"id": "selftest-homebrew-cask-evil",
"name": "bumblebee selftest fixture (homebrew cask)",
"ecosystem": "homebrew",
"package": "bumblebee-selftest-cask",
"versions": ["0.0.0"],
"severity": "critical",
"source": "bumblebee selftest"
}
]
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
{
"token": "bumblebee-selftest-cask",
"version": "0.0.0"
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
{
"installed_on_request": true,
"source": {
"tap": "homebrew/cask",
"version": "0.0.0"
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
{
"installed_on_request": true,
"source": {
"tap": "homebrew/core",
"spec": "stable"
}
}
46 changes: 44 additions & 2 deletions docs/inventory-sources.md
Original file line number Diff line number Diff line change
Expand Up @@ -29,13 +29,54 @@ Each scan profile reads from a different slice of the sources below:

| Profile | Sources walked |
|-------------|---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
| `baseline` | Homebrew lib prefixes; `/Library/Python`; Linux system Python (`/usr/lib/python*`, plus `/usr/local/lib`); user Python (`~/.local/lib/python*`, `~/.local/share/pipx/venvs`, `pyenv`); language version managers (`asdf`, `nvm`, `rbenv`, `rvm`); `~/.cargo`; `~/go`; editor-extension trees; MCP config locations; per-profile browser-extension trees (Chromium-family + Firefox-family, including common snap/flatpak paths). No project trees. |
| `baseline` | Homebrew `Cellar` / `Caskroom` install metadata and lib prefixes; `/Library/Python`; Linux system Python (`/usr/lib/python*`, plus `/usr/local/lib`); user Python (`~/.local/lib/python*`, `~/.local/share/pipx/venvs`, `pyenv`); language version managers (`asdf`, `nvm`, `rbenv`, `rvm`); `~/.cargo`; `~/go`; editor-extension trees; MCP config locations; per-profile browser-extension trees (Chromium-family + Firefox-family, including common snap/flatpak paths). No project trees. |
| `project` | Configured developer/project roots (`~/code`, `~/src`, `~/Developer`, `~/Projects`, `~/workspace`, and any explicit `--root`). All ecosystem parsers below apply within those trees. |
| `deep` | Operator-supplied roots, typically a bare home directory during a campaign. Same ecosystem parsers; recommended only in combination with `--exposure-catalog` to emit `record_type=finding` records. |

The `source_type` values emitted are the same across profiles. What
changes is the population of files the walker visits.

## Homebrew

Files read:

- Formulae: `<prefix>/Cellar/<formula>/<version>/INSTALL_RECEIPT.json`.
The formula name and installed version are derived from the Cellar path,
matching Homebrew's own filesystem listing behavior. The receipt is read
only for small install metadata such as `installed_on_request`, which is
emitted as `direct_dependency` when present.
- Casks: `<prefix>/Caskroom/<token>/.metadata/<version>/<timestamp>/Casks/<token>.{internal.json,json,rb}`
as the installed-cask marker, plus the optional cask-level
`<prefix>/Caskroom/<token>/.metadata/INSTALL_RECEIPT.json` for
`installed_on_request`. JSON marker files are not used as a broad
metadata source in v0.1, and `.rb` cask definitions are never opened;
a `.rb` marker only proves Homebrew saved an installed cask snapshot.

Captured fields emitted on the record: `package_name` (formula rack name
or cask token), `version`, `package_manager=homebrew`,
`source_type` (`homebrew-formula-receipt` or `homebrew-cask-metadata`),
and `direct_dependency` when Homebrew's receipt records
`installed_on_request`.

Baseline defaults include Apple Silicon macOS (`/opt/homebrew/Cellar`,
`/opt/homebrew/Caskroom`), Intel macOS (`/usr/local/Cellar`,
`/usr/local/Caskroom`), Linuxbrew (`/home/linuxbrew/.linuxbrew/Cellar`,
`/home/linuxbrew/.linuxbrew/Caskroom`). Custom Homebrew prefixes are not
inferred from environment variables; pass their `Cellar` and `Caskroom`
paths with `--root` when they need coverage.

We do not run `brew list`, `brew info`, or any other Homebrew command. We
do not read formula Ruby files, cask Ruby definitions, installed payload
files, app bundles, linked `opt` symlinks, pinned symlinks, or tap source
trees. Tap names, bottle/source build flags, install timestamps, runtime
dependencies, and artifact lists may exist in Homebrew metadata but are
not emitted in v0.1's slim schema.

References:

- Homebrew formula tab metadata: <https://docs.brew.sh/Formula-Cookbook>
- Homebrew installation paths: <https://docs.brew.sh/Installation>

## npm

Files read:
Expand Down Expand Up @@ -506,7 +547,8 @@ strong installed-state correlation tooling today.
## What this collector deliberately does not do

- No package-manager command execution. No `npm ls`, no `pnpm list`, no
`pip show`, no `go list`, no `bundle list`, no `composer show`.
`pip show`, no `go list`, no `bundle list`, no `composer show`, no
`brew list`.
- No source-file reading. Only the metadata files listed above. The
walker visits directories; the scanners open only the targeted files.
- No bundled threat intelligence. Bumblebee ships no built-in advisory
Expand Down
2 changes: 1 addition & 1 deletion docs/schema/v0.1.0/exposure-catalog.schema.json
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
"id": { "type": "string" },
"name": { "type": "string" },
"ecosystem": {
"enum": ["npm", "pypi", "go", "rubygems", "packagist", "mcp", "editor-extension", "browser-extension"]
"enum": ["npm", "pypi", "go", "rubygems", "packagist", "mcp", "editor-extension", "browser-extension", "homebrew"]
},
"package": { "type": "string" },
"versions": {
Expand Down
2 changes: 1 addition & 1 deletion docs/schema/v0.1.0/package-record.schema.json
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@
"endpoint": { "$ref": "#/$defs/endpoint" },
"profile": { "enum": ["baseline", "project", "deep"] },
"ecosystem": {
"enum": ["npm", "pypi", "go", "rubygems", "packagist", "mcp", "editor-extension", "browser-extension"]
"enum": ["npm", "pypi", "go", "rubygems", "packagist", "mcp", "editor-extension", "browser-extension", "homebrew"]
},
"package_name": { "type": "string" },
"normalized_name": { "type": "string" },
Expand Down
Loading