Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 14 additions & 1 deletion cmd/modelfile/generate.go
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,16 @@ Full URLs with domain names will auto-detect the provider.`,
modctl modelfile generate ./my-model-dir --output ./output/modelfile.yaml

# Generate with metadata overrides
modctl modelfile generate ./my-model-dir --name my-custom-model --family llama3`,
modctl modelfile generate ./my-model-dir --name my-custom-model --family llama3

# Include hidden files at any depth
modctl modelfile generate ./my-model-dir --include "**/.*"

# Include a specific hidden directory
modctl modelfile generate ./my-model-dir --include ".weights/**"

# Include hidden files but exclude sensitive ones
modctl modelfile generate ./my-model-dir --include "**/.*" --exclude "**/.env"`,
Args: cobra.MaximumNArgs(1),
DisableAutoGenTag: true,
SilenceUsage: true,
Expand Down Expand Up @@ -112,6 +121,10 @@ func init() {
flags.StringVarP(&generateConfig.Provider, "provider", "p", "", "explicitly specify the provider for short-form URLs (huggingface, modelscope)")
flags.StringVar(&generateConfig.DownloadDir, "download-dir", "", "custom directory for downloading models (default: system temp directory)")
flags.StringArrayVar(&generateConfig.ExcludePatterns, "exclude", []string{}, "specify glob patterns to exclude files/directories (e.g. *.log, checkpoints/*)")
flags.StringArrayVar(&generateConfig.IncludePatterns, "include", []string{},
"glob patterns to include files/directories that are normally skipped (e.g. hidden files).\n"+
"Uses doublestar syntax (*, **, ?, [...], {a,b}), matching against relative paths from workspace root.\n"+
"Note: broad patterns like **/.* may include large directories (.git) or sensitive files (.env)")

// Mark the ignore-unrecognized-file-types flag as deprecated and hidden
flags.MarkDeprecated("ignore-unrecognized-file-types", "this flag will be removed in the next release")
Expand Down
2 changes: 2 additions & 0 deletions pkg/config/modelfile/modelfile.go
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@ type GenerateConfig struct {
Provider string // Explicit provider for short-form URLs (e.g., "huggingface", "modelscope")
DownloadDir string // Custom directory for downloading models (optional)
ExcludePatterns []string
IncludePatterns []string
}

func NewGenerateConfig() *GenerateConfig {
Expand All @@ -63,6 +64,7 @@ func NewGenerateConfig() *GenerateConfig {
Provider: "",
DownloadDir: "",
ExcludePatterns: []string{},
IncludePatterns: []string{},
}
}

Expand Down
88 changes: 44 additions & 44 deletions pkg/modelfile/constants.go
Original file line number Diff line number Diff line change
Expand Up @@ -65,12 +65,12 @@ var (
// PyTorch formats.
"*.bin", // General binary format
"*.bin.*", // Sharded binary files (e.g., model.bin.1)
"*.pt", // PyTorch model
"*.pth", // PyTorch model (alternative extension)
"*.mar", // PyTorch Model Archive
"*.pte", // PyTorch ExecuTorch format
"*.pt2", // PyTorch 2.0 export format
"*.ptl", // PyTorch Mobile format
"*.pt", // PyTorch model
"*.pth", // PyTorch model (alternative extension)
"*.mar", // PyTorch Model Archive
"*.pte", // PyTorch ExecuTorch format
"*.pt2", // PyTorch 2.0 export format
"*.ptl", // PyTorch Mobile format

// TensorFlow formats.
"*.tflite", // TensorFlow Lite
Expand All @@ -85,16 +85,16 @@ var (
// GGML formats.
"*.gguf", // GGML Universal Format
"*.gguf.*", // Partitioned GGUF files
"*.ggml", // GGML format (legacy)
"*.ggmf", // GGMF format (deprecated)
"*.ggjt", // GGJT format (deprecated)
"*.q4_0", // GGML Q4_0 quantization
"*.q4_1", // GGML Q4_1 quantization
"*.q5_0", // GGML Q5_0 quantization
"*.q5_1", // GGML Q5_1 quantization
"*.q8_0", // GGML Q8_0 quantization
"*.f16", // GGML F16 format
"*.f32", // GGML F32 format
"*.ggml", // GGML format (legacy)
"*.ggmf", // GGMF format (deprecated)
"*.ggjt", // GGJT format (deprecated)
"*.q4_0", // GGML Q4_0 quantization
"*.q4_1", // GGML Q4_1 quantization
"*.q5_0", // GGML Q5_0 quantization
"*.q5_1", // GGML Q5_1 quantization
"*.q8_0", // GGML Q8_0 quantization
"*.f16", // GGML F16 format
"*.f32", // GGML F32 format

// checkpoint formats.
"*.ckpt", // Checkpoint format
Expand All @@ -109,37 +109,37 @@ var (
"*.vocab", // Vocabulary files (when binary)

// Other ML frameworks.
"*.ot", // OpenVINO format
"*.engine", // TensorRT format
"*.trt", // TensorRT format (alternative extension)
"*.onnx", // Open Neural Network Exchange format
"*.msgpack", // MessagePack serialization
"*.model", // Some NLP frameworks
"*.pkl", // Pickle format
"*.pickle", // Pickle format (alternative extension)
"*.keras", // Keras native format
"*.joblib", // Joblib serialization (scikit-learn)
"*.npy", // NumPy array format
"*.npz", // NumPy compressed archive
"*.nc", // NetCDF format
"*.mlmodel", // Apple Core ML format
"*.coreml", // Apple Core ML format (alternative)
"*.mleap", // MLeap format (Spark ML)
"*.surml", // SurrealML format
"*.llamafile", // Llamafile format
"*.ot", // OpenVINO format
"*.engine", // TensorRT format
"*.trt", // TensorRT format (alternative extension)
"*.onnx", // Open Neural Network Exchange format
"*.msgpack", // MessagePack serialization
"*.model", // Some NLP frameworks
"*.pkl", // Pickle format
"*.pickle", // Pickle format (alternative extension)
"*.keras", // Keras native format
"*.joblib", // Joblib serialization (scikit-learn)
"*.npy", // NumPy array format
"*.npz", // NumPy compressed archive
"*.nc", // NetCDF format
"*.mlmodel", // Apple Core ML format
"*.coreml", // Apple Core ML format (alternative)
"*.mleap", // MLeap format (Spark ML)
"*.surml", // SurrealML format
"*.llamafile", // Llamafile format
"*.llamafile.*", // Llamafile variants
"*.caffemodel", // Caffe model format
"*.prototxt", // Caffe model definition
"*.dlc", // Qualcomm Deep Learning Container
"*.circle", // Samsung Circle format
"*.nb", // Neural Network Binary format
"*.caffemodel", // Caffe model format
"*.prototxt", // Caffe model definition
"*.dlc", // Qualcomm Deep Learning Container
"*.circle", // Samsung Circle format
"*.nb", // Neural Network Binary format

// Data and dataset formats.
"*.arrow", // Apache Arrow columnar format
"*.parquet", // Apache Parquet columnar format
"*.ftz", // FastText compressed model
"*.ark", // Kaldi ark format (speech/audio models)
"*.db", // Database files (LMDB, etc.)
"*.arrow", // Apache Arrow columnar format
"*.parquet", // Apache Parquet columnar format
"*.ftz", // FastText compressed model
"*.ark", // Kaldi ark format (speech/audio models)
"*.db", // Database files (LMDB, etc.)
}

// Code file patterns - supported script and notebook files.
Expand Down
28 changes: 24 additions & 4 deletions pkg/modelfile/modelfile.go
Original file line number Diff line number Diff line change
Expand Up @@ -258,7 +258,7 @@ func (mf *modelfile) generateByWorkspace(config *configmodelfile.GenerateConfig)
var totalSize int64

// Initialize exclude patterns
filter, err := NewPathFilter(config.ExcludePatterns...)
filter, err := NewPathFilter(config.ExcludePatterns, config.IncludePatterns)
if err != nil {
return err
}
Expand All @@ -277,12 +277,32 @@ func (mf *modelfile) generateByWorkspace(config *configmodelfile.GenerateConfig)
return err
}

// Skip hidden, skippable, and excluded files/directories.
if isSkippable(filename) || filter.Match(relPath) {
// Directory exclude is absolute — cannot be reversed by --include.
if info.IsDir() && filter.Match(relPath) {
return filepath.SkipDir
}

// Check skipPatterns — include can rescue skippable entries.
if isSkippable(filename) {
if info.IsDir() {
return filepath.SkipDir
if filter.ShouldDescend(relPath) {
// Rescued by --include, enter directory
} else {
return filepath.SkipDir
}
} else {
if !filter.MatchInclude(relPath) {
return nil
}
// Rescued file still goes through exclude check below
}
}
Comment on lines +286 to +299
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

medium

The logic for handling skippable files and directories can be simplified to improve readability. By inverting conditions and removing the empty if block, the code becomes less nested and easier to follow.

                if isSkippable(filename) {
                        if info.IsDir() {
                                if !filter.ShouldDescend(relPath) {
                                        return filepath.SkipDir
                                }
                        } else { // is a file
                                if !filter.MatchInclude(relPath) {
                                        return nil
                                }
                        }
                }


// Exclude check for non-skippable files (and include-rescued files).
if filter.Match(relPath) {
if info.IsDir() {
return filepath.SkipDir
}
return nil
}

Expand Down
135 changes: 135 additions & 0 deletions pkg/modelfile/modelfile_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -676,6 +676,141 @@ func TestNewModelfileByWorkspace(t *testing.T) {
expectCodes: []string{"valid_dir/model.py"},
expectName: "skip-test",
},
{
name: "include all hidden files with recursive pattern",
setupFiles: map[string]string{
"config.json": "",
"model.bin": "",
".hidden_config.json": "",
".hidden_dir/model.bin": "",
".hidden_dir/.nested.py": "",
"normal_dir/.hidden_code.py": "",
"normal_dir/visible.py": "",
},
setupDirs: []string{
".hidden_dir",
"normal_dir",
},
config: &configmodelfile.GenerateConfig{
Name: "include-all-hidden",
IncludePatterns: []string{"**/.*"},
},
expectError: false,
expectConfigs: []string{"config.json", ".hidden_config.json"},
expectModels: []string{"model.bin", ".hidden_dir/model.bin"},
expectCodes: []string{".hidden_dir/.nested.py", "normal_dir/.hidden_code.py", "normal_dir/visible.py"},
expectName: "include-all-hidden",
},
{
name: "include specific hidden directory",
setupFiles: map[string]string{
"config.json": "",
"model.bin": "",
".weights/extra.bin": "",
".weights/data.bin": "",
".other/secret.py": "",
},
setupDirs: []string{
".weights",
".other",
},
config: &configmodelfile.GenerateConfig{
Name: "include-weights-dir",
IncludePatterns: []string{".weights/**"},
},
expectError: false,
expectConfigs: []string{"config.json"},
expectModels: []string{"model.bin", ".weights/extra.bin", ".weights/data.bin"},
expectCodes: []string{},
expectName: "include-weights-dir",
},
{
name: "include with exclude override",
setupFiles: map[string]string{
"config.json": "",
"model.bin": "",
".hidden.py": "",
".env": "",
"sub/.secret.yml": "",
},
setupDirs: []string{
"sub",
},
config: &configmodelfile.GenerateConfig{
Name: "include-exclude",
IncludePatterns: []string{"**/.*"},
ExcludePatterns: []string{"**/.env"},
},
expectError: false,
expectConfigs: []string{"config.json", "sub/.secret.yml"},
expectModels: []string{"model.bin"},
expectCodes: []string{".hidden.py"},
expectName: "include-exclude",
},
{
name: "no include patterns regression",
setupFiles: map[string]string{
"config.json": "",
"model.bin": "",
".hidden_file": "",
".hidden_dir/x.py": "",
},
setupDirs: []string{
".hidden_dir",
},
config: &configmodelfile.GenerateConfig{
Name: "no-include-regression",
},
expectError: false,
expectConfigs: []string{"config.json"},
expectModels: []string{"model.bin"},
expectCodes: []string{},
expectName: "no-include-regression",
},
{
name: "multiple include patterns",
setupFiles: map[string]string{
"config.json": "",
"model.bin": "",
".hidden.py": "",
"__pycache__/cache.pyc": "",
},
setupDirs: []string{
"__pycache__",
},
config: &configmodelfile.GenerateConfig{
Name: "multi-include",
IncludePatterns: []string{".*", "**/__pycache__/**"},
},
expectError: false,
expectConfigs: []string{"config.json"},
expectModels: []string{"model.bin"},
expectCodes: []string{".hidden.py", "__pycache__/cache.pyc"},
expectName: "multi-include",
},
{
name: "skippable dirs not matching include are still skipped",
setupFiles: map[string]string{
"config.json": "",
"model.bin": "",
".git/objects/pack": "",
".weights/model.bin": "",
},
setupDirs: []string{
".git",
".git/objects",
".weights",
},
config: &configmodelfile.GenerateConfig{
Name: "selective-include",
IncludePatterns: []string{".weights/**"},
},
expectError: false,
expectConfigs: []string{"config.json"},
expectModels: []string{"model.bin", ".weights/model.bin"},
expectCodes: []string{},
expectName: "selective-include",
},
}

assert := assert.New(t)
Expand Down
Loading
Loading