diff --git a/go.mod b/go.mod index f7e0d25..02a313c 100644 --- a/go.mod +++ b/go.mod @@ -2,9 +2,9 @@ module mediakit-cli go 1.22 -require github.com/spf13/cobra v1.10.2 - require ( - github.com/inconshreveable/mousetrap v1.1.0 // indirect - github.com/spf13/pflag v1.0.9 // indirect + github.com/spf13/cobra v1.10.2 + github.com/spf13/pflag v1.0.9 ) + +require github.com/inconshreveable/mousetrap v1.1.0 // indirect diff --git a/internal/cloud/api_info.go b/internal/cloud/api_info.go index 62bd87c..9a02fc6 100644 --- a/internal/cloud/api_info.go +++ b/internal/cloud/api_info.go @@ -6,18 +6,43 @@ type APIInfo struct { } var apiInfoRegistry = map[string]APIInfo{ - "erase-video-subtitle-pro": {Method: "POST", Path: "/api/v1/tools/erase-video-subtitle-pro"}, - "image-to-video": {Method: "POST", Path: "/api/v1/tools/image-to-video"}, - "extract-audio": {Method: "POST", Path: "/api/v1/tools/extract-audio"}, - "add-image-to-video": {Method: "POST", Path: "/api/v1/tools/add-image-to-video"}, - "add-subtitle-to-video": {Method: "POST", Path: "/api/v1/tools/add-subtitle-to-video"}, - "mux-audio-video": {Method: "POST", Path: "/api/v1/tools/mux-audio-video"}, - "concat-video": {Method: "POST", Path: "/api/v1/tools/concat-video"}, - "flip-video": {Method: "POST", Path: "/api/v1/tools/flip-video"}, - "trim-video": {Method: "POST", Path: "/api/v1/tools/trim-video"}, - "adjust-video-speed": {Method: "POST", Path: "/api/v1/tools/adjust-video-speed"}, - "concat-audio": {Method: "POST", Path: "/api/v1/tools/concat-audio"}, - "trim-audio": {Method: "POST", Path: "/api/v1/tools/trim-audio"}, - "enhance-video": {Method: "POST", Path: "/api/v1/tools/enhance-video"}, - "query-task": {Method: "GET", Path: "/api/v1/tasks/{task_id}"}, + "erase-video-subtitle-pro": {Method: "POST", Path: "/api/v1/tools/erase-video-subtitle-pro"}, + "image-to-video": {Method: "POST", Path: "/api/v1/tools/image-to-video"}, + "extract-audio": {Method: "POST", Path: "/api/v1/tools/extract-audio"}, + "add-image-to-video": {Method: "POST", Path: "/api/v1/tools/add-image-to-video"}, + "add-subtitle-to-video": {Method: "POST", Path: "/api/v1/tools/add-subtitle-to-video"}, + "mux-audio-video": {Method: "POST", Path: "/api/v1/tools/mux-audio-video"}, + "concat-video": {Method: "POST", Path: "/api/v1/tools/concat-video"}, + "flip-video": {Method: "POST", Path: "/api/v1/tools/flip-video"}, + "trim-video": {Method: "POST", Path: "/api/v1/tools/trim-video"}, + "adjust-video-speed": {Method: "POST", Path: "/api/v1/tools/adjust-video-speed"}, + "concat-audio": {Method: "POST", Path: "/api/v1/tools/concat-audio"}, + "trim-audio": {Method: "POST", Path: "/api/v1/tools/trim-audio"}, + "enhance-video": {Method: "POST", Path: "/api/v1/tools/enhance-video"}, + "erase-video-subtitle": {Method: "POST", Path: "/api/v1/tools/erase-video-subtitle"}, + "video-ocr": {Method: "POST", Path: "/api/v1/tools/video-ocr"}, + "asr-subtitles": {Method: "POST", Path: "/api/v1/tools/asr-subtitles"}, + "separate-voice": {Method: "POST", Path: "/api/v1/tools/separate-voice"}, + "enhance-video-generative": {Method: "POST", Path: "/api/v1/tools/enhance-video-generative"}, + "generate-highlights-minigame": {Method: "POST", Path: "/api/v1/tools/generate-highlights-minigame"}, + "generate-highlights-microdrama": {Method: "POST", Path: "/api/v1/tools/generate-highlights-microdrama"}, + "segment-scenes": {Method: "POST", Path: "/api/v1/tools/segment-scenes"}, + "analyze-video-storyline": {Method: "POST", Path: "/api/v1/tools/analyze-video-storyline"}, + "analyze-video-highlights": {Method: "POST", Path: "/api/v1/tools/analyze-video-highlights"}, + "matte-portrait-video": {Method: "POST", Path: "/api/v1/tools/matte-portrait-video"}, + "matte-greenscreen-video": {Method: "POST", Path: "/api/v1/tools/matte-greenscreen-video"}, + "probe-video-metadata": {Method: "POST", Path: "/api/v1/tools/probe-video-metadata"}, + "fade-video-audio": {Method: "POST", Path: "/api/v1/tools/fade-video-audio"}, + "apply-video-filter": {Method: "POST", Path: "/api/v1/tools/apply-video-filter"}, + "adjust-video-volume": {Method: "POST", Path: "/api/v1/tools/adjust-video-volume"}, + "fade-audio": {Method: "POST", Path: "/api/v1/tools/fade-audio"}, + "mix-audio": {Method: "POST", Path: "/api/v1/tools/mix-audio"}, + "adjust-audio-speed": {Method: "POST", Path: "/api/v1/tools/adjust-audio-speed"}, + "probe-audio-metadata": {Method: "POST", Path: "/api/v1/tools/probe-audio-metadata"}, + "image-ocr": {Method: "POST", Path: "/api/v1/tools-sync/image-ocr"}, + "erase-image": {Method: "POST", Path: "/api/v1/tools-sync/erase-image"}, + "remove-image-background": {Method: "POST", Path: "/api/v1/tools-sync/remove-image-background"}, + "enhance-image": {Method: "POST", Path: "/api/v1/tools-sync/enhance-image"}, + "evaluate-image-quality": {Method: "POST", Path: "/api/v1/tools-sync/evaluate-image-quality"}, + "query-task": {Method: "GET", Path: "/api/v1/tasks/{task_id}"}, } diff --git a/internal/cloud/executor.go b/internal/cloud/executor.go index 179cf23..147518d 100644 --- a/internal/cloud/executor.go +++ b/internal/cloud/executor.go @@ -41,6 +41,10 @@ func Execute(cmd *cobra.Command, command string, params map[string]any, apiKey s } client := NewClient(apiKey, endpoint, surface, runtime) + requestParams, err = materializeCloudMediaInputs(client, normalizedCommand, requestParams) + if err != nil { + return writeJSON(cmd.OutOrStdout(), errorResponse(err, extractTaskID(requestParams), "")) + } response, err := client.Call(normalizedCommand, requestParams) if err != nil { return writeJSON(cmd.OutOrStdout(), errorResponse(err, extractTaskID(requestParams), "")) @@ -169,9 +173,45 @@ func formatCommandResponse(command string, response map[string]any) map[string]a if command == queryTaskCommand { return queryTaskResponse(response) } + if isSyncCommand(command) { + return syncToolResponse(response) + } return asyncTaskResponse(response) } +func isSyncCommand(command string) bool { + api, ok := apiInfoRegistry[command] + if !ok { + return false + } + return strings.HasPrefix(api.Path, "/api/v1/tools-sync/") +} + +func syncToolResponse(result map[string]any) map[string]any { + if len(result) == 0 { + return map[string]any{} + } + + output := map[string]any{} + if taskID := strings.TrimSpace(fmt.Sprint(result["task_id"])); taskID != "" && taskID != "" { + output["task_id"] = taskID + } + if requestID := strings.TrimSpace(fmt.Sprint(result["request_id"])); requestID != "" && requestID != "" { + output["request_id"] = requestID + } + if status := strings.TrimSpace(fmt.Sprint(result["status"])); status != "" && status != "" { + output["status"] = status + } + taskResult, ok := result["result"].(map[string]any) + if !ok { + return output + } + for key, value := range taskResult { + output[key] = value + } + return output +} + func asyncTaskResponse(result map[string]any) map[string]any { output := map[string]any{ "task_id": "", diff --git a/internal/cloud/media_inputs.go b/internal/cloud/media_inputs.go new file mode 100644 index 0000000..f5e5ac9 --- /dev/null +++ b/internal/cloud/media_inputs.go @@ -0,0 +1,197 @@ +package cloud + +import ( + "fmt" + "os" + "path/filepath" + "strings" + "time" + + cliconfig "mediakit-cli/internal/config" +) + +const mediaUploadCommand = "request-media-upload-url" + +var mediaInputNames = map[string]bool{ + "video_url": true, + "video_urls": true, + "audio_url": true, + "audio_urls": true, + "image_url": true, + "image_urls": true, + "subtitle_url": true, + "subtitle_urls": true, + "sub_image_url": true, +} + +func materializeCloudMediaInputs(client *Client, command string, params map[string]any) (map[string]any, error) { + if command == queryTaskCommand || command == mediaUploadCommand || len(params) == 0 { + return params, nil + } + home, err := cliconfig.ResolveHomeDir() + if err != nil { + return nil, err + } + materialized, err := materializeCloudValue(client, home, command, "", params, false) + if err != nil { + return nil, err + } + next, ok := materialized.(map[string]any) + if !ok { + return params, nil + } + return next, nil +} + +func materializeCloudValue(client *Client, home string, command string, key string, value any, mediaContext bool) (any, error) { + switch typed := value.(type) { + case map[string]any: + next := make(map[string]any, len(typed)) + for childKey, childValue := range typed { + childMediaContext := mediaContext || isMediaInputField(childKey) + materialized, err := materializeCloudValue(client, home, command, childKey, childValue, childMediaContext) + if err != nil { + return nil, err + } + next[childKey] = materialized + } + return next, nil + case []any: + next := make([]any, len(typed)) + childMediaContext := mediaContext || isMediaInputField(key) + for i, childValue := range typed { + materialized, err := materializeCloudValue(client, home, command, key, childValue, childMediaContext) + if err != nil { + return nil, err + } + next[i] = materialized + } + return next, nil + case []string: + if !mediaContext && !isMediaInputField(key) { + return typed, nil + } + next := make([]string, len(typed)) + for i, childValue := range typed { + materialized, err := materializeCloudMediaString(client, home, command, childValue) + if err != nil { + return nil, err + } + next[i] = materialized + } + return next, nil + case string: + if !mediaContext && !isMediaInputField(key) { + return typed, nil + } + return materializeCloudMediaString(client, home, command, typed) + default: + return value, nil + } +} + +func materializeCloudMediaString(client *Client, home string, command string, value string) (string, error) { + value = strings.TrimSpace(value) + if value == "" || isRemoteOrMediaKitURL(value) { + return value, nil + } + + identity, ok, err := resolveLocalMediaIdentity(value) + if err != nil { + return "", err + } + if !ok { + return value, nil + } + + now := time.Now().UTC() + if fileID, err := lookupUploadCache(home, identity, now); err != nil { + return "", err + } else if fileID != "" { + return fileID, nil + } + + fileID, err := client.uploadLocalMediaFile(command, identity.AbsPath) + if err != nil { + return "", err + } + return storeUploadCache(home, identity, fileID, now) +} + +func resolveLocalMediaIdentity(value string) (fileIdentity, bool, error) { + path := expandUserPath(value) + info, err := os.Stat(path) + if err != nil { + if os.IsNotExist(err) && looksLikeLocalPath(value) { + return fileIdentity{}, false, fmt.Errorf("本地媒体文件不存在: %s", value) + } + return fileIdentity{}, false, nil + } + if info.IsDir() { + return fileIdentity{}, false, fmt.Errorf("本地媒体输入不能是目录: %s", value) + } + absPath, err := filepath.Abs(path) + if err != nil { + return fileIdentity{}, false, err + } + return fileIdentity{ + AbsPath: absPath, + Size: info.Size(), + MTimeUnixNano: info.ModTime().UnixNano(), + }, true, nil +} + +func isMediaInputField(name string) bool { + normalized := strings.ToLower(strings.ReplaceAll(strings.TrimSpace(name), "-", "_")) + if mediaInputNames[normalized] { + return true + } + if strings.HasSuffix(normalized, "_url") || strings.HasSuffix(normalized, "_urls") { + return strings.Contains(normalized, "video") || + strings.Contains(normalized, "audio") || + strings.Contains(normalized, "image") || + strings.Contains(normalized, "subtitle") + } + return false +} + +func isRemoteOrMediaKitURL(value string) bool { + lower := strings.ToLower(value) + return strings.HasPrefix(lower, "http://") || + strings.HasPrefix(lower, "https://") || + strings.HasPrefix(lower, "mediakit://") +} + +func looksLikeLocalPath(value string) bool { + if filepath.IsAbs(value) { + return true + } + if strings.HasPrefix(value, "./") || strings.HasPrefix(value, "../") || + strings.HasPrefix(value, "~/") || strings.HasPrefix(value, ".\\") || + strings.HasPrefix(value, "..\\") || strings.HasPrefix(value, "~\\") { + return true + } + if strings.Contains(value, "/") || strings.Contains(value, "\\") { + return true + } + switch strings.ToLower(filepath.Ext(value)) { + case ".mp4", ".mov", ".m4v", ".avi", ".mkv", ".webm", ".mp3", ".m4a", ".wav", ".aac", ".flac", ".jpg", ".jpeg", ".png", ".webp", ".gif", ".srt", ".ass", ".vtt": + return true + default: + return false + } +} + +func expandUserPath(value string) string { + if value == "~" { + if home, err := os.UserHomeDir(); err == nil { + return home + } + } + if strings.HasPrefix(value, "~/") || strings.HasPrefix(value, "~\\") { + if home, err := os.UserHomeDir(); err == nil { + return filepath.Join(home, value[2:]) + } + } + return value +} diff --git a/internal/cloud/media_upload.go b/internal/cloud/media_upload.go new file mode 100644 index 0000000..a79f2e9 --- /dev/null +++ b/internal/cloud/media_upload.go @@ -0,0 +1,117 @@ +package cloud + +import ( + "fmt" + "io" + "net/http" + "os" + "strings" +) + +type mediaUploadTarget struct { + FileID string + Method string + URL string + Headers map[string]string +} + +func (c *Client) uploadLocalMediaFile(command string, path string) (string, error) { + target, err := c.requestMediaUploadTarget(command) + if err != nil { + return "", err + } + if target.FileID == "" || target.URL == "" { + return "", fmt.Errorf("申请上传地址返回缺少 file_id 或 upload_url") + } + + file, err := os.Open(path) + if err != nil { + return "", err + } + defer file.Close() + + method := strings.ToUpper(strings.TrimSpace(target.Method)) + if method == "" { + method = http.MethodPut + } + req, err := http.NewRequest(method, target.URL, file) + if err != nil { + return "", err + } + for key, value := range target.Headers { + if strings.TrimSpace(key) != "" { + req.Header.Set(key, value) + } + } + + resp, err := c.HTTPClient.Do(req) + if err != nil { + return "", err + } + defer resp.Body.Close() + + body, _ := io.ReadAll(resp.Body) + if resp.StatusCode >= http.StatusBadRequest { + return "", fmt.Errorf("上传媒体文件失败: HTTP %d: %s", resp.StatusCode, strings.TrimSpace(string(body))) + } + return target.FileID, nil +} + +func (c *Client) requestMediaUploadTarget(command string) (mediaUploadTarget, error) { + req, err := c.newRequest(http.MethodPost, "/api/v1/tools-sync/request-media-upload-url", nil, map[string]any{ + "tool_name": command, + }) + if err != nil { + return mediaUploadTarget{}, err + } + payload, err := c.do(req) + if err != nil { + return mediaUploadTarget{}, err + } + if isBusinessFailure(payload) { + return mediaUploadTarget{}, fmt.Errorf("申请上传地址失败: %v", payload["error"]) + } + result, ok := payload["result"].(map[string]any) + if !ok { + return mediaUploadTarget{}, fmt.Errorf("申请上传地址响应缺少 result") + } + return mediaUploadTarget{ + FileID: strings.TrimSpace(fmt.Sprint(result["file_id"])), + Method: strings.TrimSpace(fmt.Sprint(result["method"])), + URL: strings.TrimSpace(fmt.Sprint(result["upload_url"])), + Headers: parseUploadHeaders(result["upload_headers"]), + }, nil +} + +func parseUploadHeaders(value any) map[string]string { + headers := map[string]string{} + items, ok := value.([]any) + if !ok { + return headers + } + for _, item := range items { + switch typed := item.(type) { + case map[string]any: + key := strings.TrimSpace(fmt.Sprint(firstPresent(typed, "key", "name", "header"))) + val := strings.TrimSpace(fmt.Sprint(firstPresent(typed, "value", "val"))) + if key != "" && key != "" { + headers[key] = val + } + case string: + parts := strings.SplitN(typed, ":", 2) + if len(parts) == 2 { + headers[strings.TrimSpace(parts[0])] = strings.TrimSpace(parts[1]) + } + } + } + return headers +} + +func firstPresent(values map[string]any, keys ...string) any { + for _, key := range keys { + if value, ok := values[key]; ok { + return value + } + } + return "" +} diff --git a/internal/cloud/upload_cache.go b/internal/cloud/upload_cache.go new file mode 100644 index 0000000..65edbdc --- /dev/null +++ b/internal/cloud/upload_cache.go @@ -0,0 +1,218 @@ +package cloud + +import ( + "encoding/json" + "errors" + "os" + "path/filepath" + "time" + + cliconfig "mediakit-cli/internal/config" +) + +const ( + uploadCacheVersion = 1 + uploadCacheTTL = 30 * 24 * time.Hour + uploadLockTimeout = 5 * time.Second + uploadLockStaleAge = 10 * time.Minute +) + +type uploadCache struct { + Version int `json:"version"` + Entries map[string]uploadCacheEntry `json:"entries"` +} + +type uploadCacheEntry struct { + FileID string `json:"file_id"` + UploadedAt string `json:"uploaded_at"` + ExpiresAt string `json:"expires_at"` + Size int64 `json:"size"` + MTimeUnixNano int64 `json:"mtime_unix_nano"` +} + +type fileIdentity struct { + AbsPath string + Size int64 + MTimeUnixNano int64 +} + +type cacheLock struct { + path string + file *os.File +} + +func lookupUploadCache(home string, identity fileIdentity, now time.Time) (string, error) { + lock, err := acquireUploadCacheLock(home) + if err != nil { + return "", err + } + defer lock.Release() + + cache, err := readUploadCache(home) + if err != nil { + return "", err + } + entry, ok := cache.Entries[identity.AbsPath] + if !ok || !entry.matches(identity, now) { + return "", nil + } + return entry.FileID, nil +} + +func storeUploadCache(home string, identity fileIdentity, fileID string, now time.Time) (string, error) { + lock, err := acquireUploadCacheLock(home) + if err != nil { + return "", err + } + defer lock.Release() + + cache, err := readUploadCache(home) + if err != nil { + return "", err + } + if entry, ok := cache.Entries[identity.AbsPath]; ok && entry.matches(identity, now) { + return entry.FileID, nil + } + + cache.Entries[identity.AbsPath] = uploadCacheEntry{ + FileID: fileID, + UploadedAt: now.Format(time.RFC3339), + ExpiresAt: now.Add(uploadCacheTTL).Format(time.RFC3339), + Size: identity.Size, + MTimeUnixNano: identity.MTimeUnixNano, + } + pruneExpiredUploadCache(cache, now) + return fileID, writeUploadCache(home, cache) +} + +func (entry uploadCacheEntry) matches(identity fileIdentity, now time.Time) bool { + if entry.FileID == "" { + return false + } + if entry.Size != identity.Size || entry.MTimeUnixNano != identity.MTimeUnixNano { + return false + } + expiresAt, err := time.Parse(time.RFC3339, entry.ExpiresAt) + if err != nil { + return false + } + return now.Before(expiresAt) +} + +func readUploadCache(home string) (uploadCache, error) { + cache := uploadCache{ + Version: uploadCacheVersion, + Entries: map[string]uploadCacheEntry{}, + } + data, err := os.ReadFile(cliconfig.UploadCacheFile(home)) + if errors.Is(err, os.ErrNotExist) { + return cache, nil + } + if err != nil { + return cache, err + } + if len(data) == 0 { + return cache, nil + } + if err := json.Unmarshal(data, &cache); err != nil { + return uploadCache{Version: uploadCacheVersion, Entries: map[string]uploadCacheEntry{}}, nil + } + if cache.Version == 0 { + cache.Version = uploadCacheVersion + } + if cache.Entries == nil { + cache.Entries = map[string]uploadCacheEntry{} + } + return cache, nil +} + +func writeUploadCache(home string, cache uploadCache) error { + if cache.Version == 0 { + cache.Version = uploadCacheVersion + } + if cache.Entries == nil { + cache.Entries = map[string]uploadCacheEntry{} + } + path := cliconfig.UploadCacheFile(home) + if err := os.MkdirAll(filepath.Dir(path), 0o755); err != nil { + return err + } + data, err := json.MarshalIndent(cache, "", " ") + if err != nil { + return err + } + tmp, err := os.CreateTemp(filepath.Dir(path), ".upload-cache-*") + if err != nil { + return err + } + tmpName := tmp.Name() + defer os.Remove(tmpName) + + if _, err := tmp.Write(data); err != nil { + tmp.Close() + return err + } + if err := tmp.Sync(); err != nil { + tmp.Close() + return err + } + if err := tmp.Close(); err != nil { + return err + } + return os.Rename(tmpName, path) +} + +func pruneExpiredUploadCache(cache uploadCache, now time.Time) { + for path, entry := range cache.Entries { + expiresAt, err := time.Parse(time.RFC3339, entry.ExpiresAt) + if err != nil || !now.Before(expiresAt) { + delete(cache.Entries, path) + } + } +} + +func acquireUploadCacheLock(home string) (*cacheLock, error) { + if err := cliconfig.EnsureConfigDir(home); err != nil { + return nil, err + } + lockPath := cliconfig.UploadCacheFile(home) + ".lock" + deadline := time.Now().Add(uploadLockTimeout) + for { + file, err := os.OpenFile(lockPath, os.O_CREATE|os.O_EXCL|os.O_WRONLY, 0o600) + if err == nil { + _, _ = file.WriteString(time.Now().UTC().Format(time.RFC3339Nano)) + return &cacheLock{path: lockPath, file: file}, nil + } + if !errors.Is(err, os.ErrExist) { + return nil, err + } + if isStaleUploadLock(lockPath) { + _ = os.Remove(lockPath) + continue + } + if time.Now().After(deadline) { + return nil, errors.New("等待上传缓存锁超时") + } + time.Sleep(50 * time.Millisecond) + } +} + +func isStaleUploadLock(path string) bool { + info, err := os.Stat(path) + if err != nil { + return false + } + return time.Since(info.ModTime()) > uploadLockStaleAge +} + +func (lock *cacheLock) Release() { + if lock == nil { + return + } + if lock.file != nil { + _ = lock.file.Close() + } + if lock.path != "" { + _ = os.Remove(lock.path) + } +} diff --git a/internal/commands/registry.go b/internal/commands/registry.go index 22d03e4..a11440e 100644 --- a/internal/commands/registry.go +++ b/internal/commands/registry.go @@ -10,6 +10,7 @@ import ( "strings" "github.com/spf13/cobra" + "github.com/spf13/pflag" cliconfig "mediakit-cli/internal/config" "mediakit-cli/internal/local" @@ -54,6 +55,8 @@ type CapabilityMeta struct { var generatedDomains = []DomainMeta{ {Name: "video", Description: "视频处理,涵盖视频画质增强、视频理解、字幕擦除等能力"}, {Name: "editing", Description: "音视频剪辑,涵盖音视频拼接、裁剪、合成等能力"}, + {Name: "audio", Description: "音频处理,涵盖音频处理和增强、内容理解等能力"}, + {Name: "image", Description: "图像处理,涵盖图像压缩、图像增强、AI处理等能力"}, {Name: "shared", Description: "通用能力与任务查询"}, } @@ -80,7 +83,7 @@ var generatedCapabilities = []CapabilityMeta{ { Name: "mode", FlagName: "mode", - Description: "字幕擦除模式,取值如下:Subtitle:擦除OCR检测为字幕的文本。在此模式下,系统将启用 OCR 识别,并依据检测结果进行擦除操作,仅擦除下面50%画面的字幕。 Text:擦除OCR检测为字幕及其他的文本(如人物介绍等),不包含场景文字(如宫殿门牌匾等)。", + Description: "字幕擦除模式,取值如下:Subtitle:擦除OCR检测为字幕的文本。在此模式下,系统将启用 OCR 识别,并依据检测结果进行擦除操作,仅擦除下面50%画面的字幕。 Text:擦除OCR检测为字幕及其他的文本(如人物介绍等),不包含场景文字(如宫殿门牌匾等)。", Type: "string", Required: false, Enum: []string{"Subtitle", "Text"}, @@ -91,7 +94,7 @@ var generatedCapabilities = []CapabilityMeta{ { Name: "output_encode_mode", FlagName: "output-encode-mode", - Description: "输出视频编码模式,支持以下两种取值:Quality(默认值):画质优先模式。此模式下,系统会采用较高的目标码率进行编码,以确保高画质。这通常会导致输出文件的码率显著高于源文件,文件体积也相应增大。 Size:大小优先模式。在保证一定画质的前提下,使输出码率尽量向源视频码率对齐。", + Description: "输出视频编码模式,支持以下两种取值:Quality(默认值):画质优先模式。此模式下,系统会采用较高的目标码率进行编码,以确保高画质。这通常会导致输出文件的码率显著高于源文件,文件体积也相应增大。 Size:大小优先模式。在保证一定画质的前提下,使输出码率尽量向源视频码率对齐。", Type: "string", Required: false, Enum: []string{"Quality", "Size"}, @@ -151,7 +154,7 @@ var generatedCapabilities = []CapabilityMeta{ { Name: "scene", FlagName: "scene", - Description: "场景化模板类型。用于选择一个针对特定业务场景的预设画质增强模板。支持的取值如下:common(默认值): 通用模板;ugc: UGC 短视频;short_series: 短剧;aigc: AIGC 内容;old_film: 老片修复", + Description: "场景化模板类型。用于选择一个针对特定业务场景的预设画质增强模板。支持的取值如下:common(默认值): 通用模板;ugc: UGC 短视频;short_series: 短剧;aigc: AIGC 内容;old_film: 老片修复", Type: "string", Required: false, Enum: []string{"common", "ugc", "short_series", "aigc", "old_film"}, @@ -162,7 +165,7 @@ var generatedCapabilities = []CapabilityMeta{ { Name: "tool_version", FlagName: "tool-version", - Description: "工具版本,标准版:standard,专业版:professional,默认为标准版", + Description: "工具版本,标准版:standard,专业版:professional,默认为标准版", Type: "string", Required: false, Enum: []string{"standard", "professional"}, @@ -189,6 +192,17 @@ var generatedCapabilities = []CapabilityMeta{ HasDefault: false, JSONEncoded: false, }, + { + Name: "bitrate_level", + FlagName: "bitrate-level", + Description: "码率档位。输出视频的目标平均码率。该参数将决定视频的视觉质量和最终的文件体积。参数取值:高码率、中码率(推荐码率)、低码率。非必填,默认为中码率。", + Type: "string", + Required: false, + Enum: []string{"low", "medium", "high"}, + HasDefault: true, + DefaultValue: "medium", + JSONEncoded: false, + }, { Name: "fps", FlagName: "fps", @@ -294,7 +308,7 @@ var generatedCapabilities = []CapabilityMeta{ { Name: "format", FlagName: "format", - Description: "输出音频的格式,支持 mp3、m4a 格式。 默认m4a", + Description: "输出音频的格式,支持 mp3、m4a 格式。 默认m4a", Type: "string", Required: false, Enum: []string{"mp3", "m4a"}, @@ -356,7 +370,7 @@ var generatedCapabilities = []CapabilityMeta{ { Name: "sub_image_height", FlagName: "sub-image-height", - Description: "图片的高度,字符串类型,支持具体像素值(如 '100')或百分比(如 '20%',相对于视频高度)。", + Description: "图片的高度,字符串类型,支持具体像素值(如 '100')或百分比(如 '20%',相对于视频高度)。", Type: "string", Required: false, HasDefault: true, @@ -366,7 +380,7 @@ var generatedCapabilities = []CapabilityMeta{ { Name: "sub_image_width", FlagName: "sub-image-width", - Description: "图片的宽度,字符串类型,支持具体像素值(如 '100')或百分比(如 '20%',相对于视频高度)。", + Description: "图片的宽度,字符串类型,支持具体像素值(如 '100')或百分比(如 '20%',相对于视频高度)。", Type: "string", Required: false, HasDefault: true, @@ -376,7 +390,7 @@ var generatedCapabilities = []CapabilityMeta{ { Name: "sub_image_pos_x", FlagName: "sub-image-pos-x", - Description: "图片在水平方向(X 轴)的位置,以视频左上角为原点,字符串类型,支持具体像素值(如 '100')或百分比(如 '20%')。例如值为 '0' 时,表示处于最左侧。", + Description: "图片在水平方向(X 轴)的位置,以视频左上角为原点,字符串类型,支持具体像素值(如 '100')或百分比(如 '20%')。例如值为 '0' 时,表示处于最左侧。", Type: "string", Required: false, HasDefault: true, @@ -386,7 +400,7 @@ var generatedCapabilities = []CapabilityMeta{ { Name: "sub_image_pos_y", FlagName: "sub-image-pos-y", - Description: "图片在垂直方向(Y 轴)的位置,以视频左上角为原点,字符串类型,支持具体像素值(如 '100')或百分比(如 '20%')。例如值为 '0' 时,表示处于最上侧。", + Description: "图片在垂直方向(Y 轴)的位置,以视频左上角为原点,字符串类型,支持具体像素值(如 '100')或百分比(如 '20%')。例如值为 '0' 时,表示处于最上侧。", Type: "string", Required: false, HasDefault: true, @@ -475,7 +489,7 @@ var generatedCapabilities = []CapabilityMeta{ { Name: "subtitle_pos_preset", FlagName: "subtitle-pos-preset", - Description: "预设字幕位置。底部居中(默认常用) bottom_center;顶部居中 top_center;画面正中央 center;偏下三分之一处 lower_third", + Description: "预设字幕位置。底部居中(默认常用) bottom_center;顶部居中 top_center;画面正中央 center;偏下三分之一处 lower_third", Type: "string", Required: false, Enum: []string{"bottom_center", "top_center", "center", "lower_third"}, @@ -486,7 +500,7 @@ var generatedCapabilities = []CapabilityMeta{ { Name: "subtitle_font_size", FlagName: "subtitle-font-size", - Description: "字幕的字体大小,单位:像素。", + Description: "字幕的字体大小,单位:像素。", Type: "integer", Required: false, HasDefault: true, @@ -496,7 +510,7 @@ var generatedCapabilities = []CapabilityMeta{ { Name: "subtitle_font_color", FlagName: "subtitle-font-color", - Description: "字幕的字体颜色,RGBA 格式。默认#FFFFFFFF", + Description: "字幕的字体颜色,RGBA 格式。默认#FFFFFFFF", Type: "string", Required: false, HasDefault: true, @@ -506,7 +520,7 @@ var generatedCapabilities = []CapabilityMeta{ { Name: "subtitle_font_type", FlagName: "subtitle-font-type", - Description: "字幕的字体 ID。 思源黑体:sy_black (经典无衬线黑体,端正百搭,正文首选) 庞门正道标题体:pm_zhengdao (粗壮有力,硬汉气场,大标题/封面神器) 阿里巴巴普惠体:ali_puhui (现代感极强,结构饱满,屏幕阅读体验极佳) 站酷快乐体:zhanku_kuaile (圆润活泼,带手写感,适合轻松搞笑的 Vlog 氛围)", + Description: "字幕的字体 ID。 思源黑体:sy_black (经典无衬线黑体,端正百搭,正文首选) 庞门正道标题体:pm_zhengdao (粗壮有力,硬汉气场,大标题/封面神器) 阿里巴巴普惠体:ali_puhui (现代感极强,结构饱满,屏幕阅读体验极佳) 站酷快乐体:zhanku_kuaile (圆润活泼,带手写感,适合轻松搞笑的 Vlog 氛围)", Type: "string", Required: false, Enum: []string{"sy_black", "pm_zhengdao", "ali_puhui", "zhanku_kuaile"}, @@ -568,7 +582,7 @@ var generatedCapabilities = []CapabilityMeta{ { Name: "is_audio_reserve", FlagName: "is-audio-reserve", - Description: "Boolean 类型,是否保留原视频流中的音频。默认值 true:保留。false:不保留。", + Description: "Boolean 类型,是否保留原视频流中的音频。默认值 true:保留。false:不保留。", Type: "boolean", Required: false, HasDefault: true, @@ -578,7 +592,7 @@ var generatedCapabilities = []CapabilityMeta{ { Name: "is_video_audio_sync", FlagName: "is-video-audio-sync", - Description: "Boolean 类型,是否对齐音频和视频时长。 true:通过 output_sync 配置,对齐音频和视频时长。 false(默认值):保持原样输出,不做音视频对齐。最终合成的视频时长,以较长的流为准。", + Description: "Boolean 类型,是否对齐音频和视频时长。 true:通过 output_sync 配置,对齐音频和视频时长。 false(默认值):保持原样输出,不做音视频对齐。最终合成的视频时长,以较长的流为准。", Type: "boolean", Required: false, HasDefault: true, @@ -588,7 +602,7 @@ var generatedCapabilities = []CapabilityMeta{ { Name: "sync_mode", FlagName: "sync-mode", - Description: "String 类型,设置 is_video_audio_sync 为 true 时生效;当音频和视频时长不相等时,可指定对齐基准,可选项:video、audio。 video:【默认值】以视频的时长为准。 audio:以音频的时长为准。", + Description: "String 类型,设置 is_video_audio_sync 为 true 时生效;当音频和视频时长不相等时,可指定对齐基准,可选项:video、audio。 video:【默认值】以视频的时长为准。 audio:以音频的时长为准。", Type: "string", Required: false, Enum: []string{"video", "audio"}, @@ -599,7 +613,7 @@ var generatedCapabilities = []CapabilityMeta{ { Name: "sync_method", FlagName: "sync-method", - Description: "String 类型,设置 is_video_audio_sync 为 true 时生效;指定对齐方式,支持通过裁剪或加速的方式,对齐音频和视频的时长。可选项:speed、trim。 speed:通过加快音频或视频的速度,对齐音频和视频的时长。 trim:【默认值】通过裁剪音频或视频,对齐音频和视频的时长。从头开始计算并裁剪。", + Description: "String 类型,设置 is_video_audio_sync 为 true 时生效;指定对齐方式,支持通过裁剪或加速的方式,对齐音频和视频的时长。可选项:speed、trim。 speed:通过加快音频或视频的速度,对齐音频和视频的时长。 trim:【默认值】通过裁剪音频或视频,对齐音频和视频的时长。从头开始计算并裁剪。", Type: "string", Required: false, Enum: []string{"speed", "trim"}, @@ -706,7 +720,7 @@ var generatedCapabilities = []CapabilityMeta{ { Name: "is_flip_vertical", FlagName: "is-flip-vertical", - Description: "是否进行垂直翻转。Boolean 类型,默认值为 false, 表示不翻转。", + Description: "是否进行垂直翻转。Boolean 类型,默认值为 false, 表示不翻转。", Type: "boolean", Required: false, HasDefault: true, @@ -716,7 +730,7 @@ var generatedCapabilities = []CapabilityMeta{ { Name: "is_flip_horizontal", FlagName: "is-flip-horizontal", - Description: "是否进行水平翻转。Boolean 类型,默认值为 false, 表示不翻转。", + Description: "是否进行水平翻转。Boolean 类型,默认值为 false, 表示不翻转。", Type: "boolean", Required: false, HasDefault: true, @@ -768,7 +782,7 @@ var generatedCapabilities = []CapabilityMeta{ { Name: "start_time", FlagName: "start-time", - Description: "裁剪开始时间,默认为 0, 表示从头开始裁剪。支持设置为 2 位小数,单位:秒。", + Description: "裁剪开始时间,默认为 0, 表示从头开始裁剪。支持设置为 2 位小数,单位:秒。", Type: "number", Required: false, HasDefault: true, @@ -829,7 +843,7 @@ var generatedCapabilities = []CapabilityMeta{ { Name: "speed", FlagName: "speed", - Description: "调整速度的倍数,Float类型,取值范围为0.1~4。", + Description: "调整速度的倍数,Float类型,取值范围为0.1~4。", Type: "number", Required: false, HasDefault: true, @@ -924,7 +938,7 @@ var generatedCapabilities = []CapabilityMeta{ { Name: "start_time", FlagName: "start-time", - Description: "裁剪开始时间,默认为 0, 表示从头开始裁剪。支持设置为 2 位小数,单位:秒。", + Description: "裁剪开始时间,默认为 0, 表示从头开始裁剪。支持设置为 2 位小数,单位:秒。", Type: "number", Required: false, HasDefault: true, @@ -982,166 +996,1686 @@ var generatedCapabilities = []CapabilityMeta{ }, }, { - Name: "query-task", - DisplayName: "查询任务", - Domain: "shared", - Description: "异步任务结果查询通过task_id查询任务信息", + Name: "erase-video-subtitle", + DisplayName: "字幕擦除(标准版)", + Domain: "video", + Description: "智能检测并擦除视频画面中已有的硬字幕,保留原始背景。\n支持格式:主流视频格式如mp4、flv、ts、avi、mov、wmv、mkv。", LocalSupported: false, - Async: false, - AsyncQueryCommand: "", - OutputType: "query-task", + Async: true, + AsyncQueryCommand: "query-task", + OutputType: "video", Params: []ParamMeta{ { - Name: "task_id", - FlagName: "task-id", - Description: "Path parameter `task_id`.", + Name: "video_url", + FlagName: "video-url", + Description: "输入视频。String 类型,支持http://xxx或https://xxx格式 Url", Type: "string", Required: true, HasDefault: false, JSONEncoded: false, }, { - Name: "poll_interval_seconds", - FlagName: "poll-interval-seconds", - Description: "轮询间隔秒数;仅 query-task 使用。", - Type: "number", + Name: "callback_args", + FlagName: "callback-args", + Description: "可选,回调参数", + Type: "string", + Required: false, + HasDefault: false, + JSONEncoded: false, + }, + { + Name: "client_token", + FlagName: "client-token", + Description: "可选,用于幂等,默认幂等,用户可根据需求进行调整", + Type: "string", + Required: false, + HasDefault: false, + JSONEncoded: false, + }, + }, + }, + { + Name: "video-ocr", + DisplayName: "视频识别字幕(OCR)", + Domain: "video", + Description: "识别视频画面中的字幕/文字内容,输出带时间戳的字幕片段。\n支持格式:主流视频格式如 mp4、flv、ts、avi、mov、wmv、mkv。", + LocalSupported: false, + Async: true, + AsyncQueryCommand: "query-task", + OutputType: "file", + Params: []ParamMeta{ + { + Name: "video_url", + FlagName: "video-url", + Description: "输入视频 Url(需公网可访问)", + Type: "string", + Required: true, + HasDefault: false, + JSONEncoded: false, + }, + { + Name: "mode", + FlagName: "mode", + Description: "工作模式(Subtitle: 识别字幕文本;Detailed: 识别更详细文本信息)", + Type: "string", Required: false, + Enum: []string{"Subtitle", "Detailed"}, HasDefault: true, - DefaultValue: "10", + DefaultValue: "Subtitle", JSONEncoded: false, }, { - Name: "max_poll_attempts", - FlagName: "max-poll-attempts", - Description: "最大轮询次数;0 表示不自动轮询。", - Type: "integer", + Name: "callback_args", + FlagName: "callback-args", + Description: "可选,回调参数", + Type: "string", Required: false, HasDefault: false, JSONEncoded: false, }, { - Name: "poll_complete", - FlagName: "poll-complete", - Description: "是否持续轮询直到任务完成。", - Type: "boolean", + Name: "client_token", + FlagName: "client-token", + Description: "可选,用于幂等,默认幂等,用户可根据需求进行调整", + Type: "string", Required: false, HasDefault: false, JSONEncoded: false, }, }, }, -} - -func printDomains(cmd *cobra.Command) error { - _, err := fmt.Fprintln(cmd.OutOrStdout(), renderDomainsIndex()) - return err -} - -func printHelpFull(cmd *cobra.Command) error { - _, err := fmt.Fprintln(cmd.OutOrStdout(), renderHelpFullIndex()) - return err -} - -func renderDomainsIndex() string { - domains := domainList() - if len(domains) == 0 { - return "Available domains:\n" - } - - lines := []string{"Available domains:"} - for _, domain := range domains { - lines = append(lines, fmt.Sprintf("- %s %s", domain.Name, domain.Description)) - } - return strings.Join(lines, "\n") -} - -func renderHelpFullIndex() string { - domains := domainList() - if len(domains) == 0 { - return "MediaKit CLI Full Help\n\n" - } - - grouped := capabilitiesByDomain() - lines := []string{"MediaKit CLI Full Help", ""} - for _, domain := range domains { - lines = append(lines, fmt.Sprintf("[%s]", domain.Name)) - lines = append(lines, domain.Description) - capabilities := grouped[domain.Name] - if len(capabilities) == 0 { - lines = append(lines, "- ") - lines = append(lines, "") - continue - } - for _, capability := range capabilities { - lines = append(lines, fmt.Sprintf("- %s %s", capability.Name, capability.Description)) - lines = append(lines, fmt.Sprintf(" 查看详情: %s --help", capabilityInvocation(capability))) - } - lines = append(lines, "") - } - - return strings.TrimRight(strings.Join(lines, "\n"), "\n") -} - -func newGeneratedDomainCommands() []*cobra.Command { - domains := domainList() - grouped := capabilitiesByDomain() - cmds := make([]*cobra.Command, 0, len(domains)) - - for _, meta := range domains { - domainMeta := meta - domainCmd := &cobra.Command{ - Use: domainMeta.Name, - Short: domainMeta.Description, - Long: renderDomainHelp(domainMeta, grouped[domainMeta.Name]), - Args: cobra.NoArgs, - DisableAutoGenTag: true, - RunE: func(cmd *cobra.Command, args []string) error { - return cmd.Help() + { + Name: "asr-subtitles", + DisplayName: "语音转字幕(ASR)", + Domain: "video", + Description: "对输入视频或音频进行语音识别,输出带时间戳的字幕片段。\n支持格式:主流音视频格式(如mp4、mov、mp3、m4a、wav等)。\n输入:video_url和audio_url二选一。", + LocalSupported: false, + Async: true, + AsyncQueryCommand: "query-task", + OutputType: "file", + Params: []ParamMeta{ + { + Name: "video_url", + FlagName: "video-url", + Description: "输入视频 Url(需公网可访问),与audio_url二选一,都存在时优先取video_url", + Type: "string", + Required: false, + HasDefault: false, + JSONEncoded: false, + }, + { + Name: "audio_url", + FlagName: "audio-url", + Description: "输入音频 Url(需公网可访问),与video_url二选一,不能都为空", + Type: "string", + Required: false, + HasDefault: false, + JSONEncoded: false, + }, + { + Name: "content_type", + FlagName: "content-type", + Description: "识别类型,默认值为空,算法会自动探测类型,speech: 对话,singing: 歌唱", + Type: "string", + Required: false, + Enum: []string{"speech", "singing"}, + HasDefault: false, + JSONEncoded: false, + }, + { + Name: "language", + FlagName: "language", + Description: "识别提示语言 ID (默认值为空,算法会自动探测语种)\n分类:简体中文,ID:cmn-Hans-CN\n分类:英语,ID:eng-US\n", + Type: "string", + Required: false, + Enum: []string{"cmn-Hans-CN", "eng-US"}, + HasDefault: false, + JSONEncoded: false, + }, + { + Name: "enable_speaker_info", + FlagName: "enable-speaker-info", + Description: "是否开启说话人识别", + Type: "boolean", + Required: false, + HasDefault: true, + DefaultValue: "false", + JSONEncoded: false, + }, + { + Name: "enable_confidence", + FlagName: "enable-confidence", + Description: "是否返回置信度", + Type: "boolean", + Required: false, + HasDefault: true, + DefaultValue: "false", + JSONEncoded: false, + }, + { + Name: "callback_args", + FlagName: "callback-args", + Description: "可选,回调参数", + Type: "string", + Required: false, + HasDefault: false, + JSONEncoded: false, + }, + { + Name: "client_token", + FlagName: "client-token", + Description: "可选,用于幂等,默认幂等,用户可根据需求进行调整", + Type: "string", + Required: false, + HasDefault: false, + JSONEncoded: false, }, - } - for _, capability := range grouped[domainMeta.Name] { - domainCmd.AddCommand(newCapabilityCommand(capability)) - } - cmds = append(cmds, domainCmd) - } - - return cmds -} - -func newCapabilityCommand(meta CapabilityMeta) *cobra.Command { - capabilityMeta := meta - cmd := &cobra.Command{ - Use: capabilityMeta.Name, - Short: capabilityMeta.Description, - Long: renderCapabilityHelp(capabilityMeta), - Args: cobra.NoArgs, - DisableAutoGenTag: true, - RunE: func(cmd *cobra.Command, args []string) error { - // --schema: 输出工具 schema 后退出 - if schemaFlag, _ := cmd.Flags().GetBool("schema"); schemaFlag { - resolvedMode := resolveSchemaMode(cmd) - return writeJSON(cmd.OutOrStdout(), buildCapabilitySchema(capabilityMeta, resolvedMode)) - } - params, err := collectCapabilityParams(cmd, capabilityMeta) - if err != nil { - return err - } - if err := modes.Dispatch(cmd, capabilityMeta.runtimeMeta(), params); err != nil { - return writeCapabilityError(cmd.OutOrStdout(), err) - } - return nil }, - } - bindCapabilityFlags(cmd, capabilityMeta) - cmd.Flags().String("output-path", "", "本地文件输出目录(覆盖 config/env 设置)") - cmd.Flags().Bool("schema", false, "输出该工具的 JSON Schema 描述(供 Agent 使用)") - - return cmd -} - -func renderDomainHelp(domain DomainMeta, capabilities []CapabilityMeta) string { - lines := []string{fmt.Sprintf("%s — %s", domain.Name, domain.Description), "", "Available commands:"} - if len(capabilities) == 0 { + }, + { + Name: "separate-voice", + DisplayName: "人声背景音分离", + Domain: "audio", + Description: "将音频中的人声与背景音精准分离,输出为两个独立的音轨文件。\n支持格式:主流音视频格式(如mp4、mov、mp3、m4a、wav等)。\n输入:video_url和audio_url二选一。\n输出格式:AAC。", + LocalSupported: false, + Async: true, + AsyncQueryCommand: "query-task", + OutputType: "audio", + Params: []ParamMeta{ + { + Name: "video_url", + FlagName: "video-url", + Description: "输入视频 Url(需公网可访问),与audio_url二选一,都存在时优先取video_url", + Type: "string", + Required: false, + HasDefault: false, + JSONEncoded: false, + }, + { + Name: "audio_url", + FlagName: "audio-url", + Description: "输入音频 Url(需公网可访问),与video_url二选一,不能都为空", + Type: "string", + Required: false, + HasDefault: false, + JSONEncoded: false, + }, + { + Name: "callback_args", + FlagName: "callback-args", + Description: "可选,回调参数", + Type: "string", + Required: false, + HasDefault: false, + JSONEncoded: false, + }, + { + Name: "client_token", + FlagName: "client-token", + Description: "可选,用于幂等,默认幂等,用户可根据需求进行调整", + Type: "string", + Required: false, + HasDefault: false, + JSONEncoded: false, + }, + }, + }, + { + Name: "enhance-video-generative", + DisplayName: "生成式画质增强", + Domain: "video", + Description: "生成式视频增强修复(generative_video_restoration)是基于扩散大模型(Diffusion-based Large Model)的生成式视频修复技术。不仅可以还原被破坏的像素,更借助大规模预训练积累的丰富视觉先验,主动补全细节、理解语义,生成真实、自然、高保真的视频内容。", + LocalSupported: false, + Async: true, + AsyncQueryCommand: "query-task", + OutputType: "video", + Params: []ParamMeta{ + { + Name: "video_url", + FlagName: "video-url", + Description: "输入视频。String 类型,支持http://xxx或https://xxx格式 URL", + Type: "string", + Required: true, + HasDefault: false, + JSONEncoded: false, + }, + { + Name: "resolution", + FlagName: "resolution", + Description: "目标分辨率。支持的取值如下所示。", + Type: "string", + Required: false, + Enum: []string{"720p", "1080p"}, + HasDefault: true, + DefaultValue: "720p", + JSONEncoded: false, + }, + { + Name: "bitrate_level", + FlagName: "bitrate-level", + Description: "码率档位。输出视频的目标平均码率。该参数将决定视频的视觉质量和最终的文件体积。参数取值:高码率、中码率(推荐码率)、低码率。非必填,默认为中码率。", + Type: "string", + Required: false, + Enum: []string{"low", "medium", "high"}, + HasDefault: true, + DefaultValue: "medium", + JSONEncoded: false, + }, + { + Name: "fps", + FlagName: "fps", + Description: "目标帧率,单位为 fps。若未指定 fps 参数,输出视频将保持与原始片源一致的帧率。取值范围为 [15, 120]。建议不超过原片的 4 倍。", + Type: "number", + Required: false, + HasDefault: false, + JSONEncoded: false, + }, + { + Name: "callback_args", + FlagName: "callback-args", + Description: "可选,回调参数", + Type: "string", + Required: false, + HasDefault: false, + JSONEncoded: false, + }, + { + Name: "client_token", + FlagName: "client-token", + Description: "可选,用于幂等,默认幂等,用户可根据需求进行调整", + Type: "string", + Required: false, + HasDefault: false, + JSONEncoded: false, + }, + }, + }, + { + Name: "generate-highlights-minigame", + DisplayName: "高光智剪-小游戏", + Domain: "video", + Description: "识别小游戏录屏视频中的核心玩法与高光事件(如连击、通关、极限操作等),\n快速生成用于买量的视频素材。支持提供游戏名称、玩法描述、高光定义以辅助模型更精准识别。\n使用限制:本期仅支持单视频输入。", + LocalSupported: false, + Async: true, + AsyncQueryCommand: "query-task", + OutputType: "file", + Params: []ParamMeta{ + { + Name: "video_urls", + FlagName: "video-urls", + Description: "待处理的小游戏视频 URL 列表,本期仅支持单视频输入\n子项说明:视频 URL,支持 http:// 或 https:// 格式", + Type: "array", + ItemType: "string", + Required: true, + HasDefault: false, + JSONEncoded: false, + }, + { + Name: "mode", + FlagName: "mode", + Description: "高光提取模式,本期支持 HighlightExtract", + Type: "string", + Required: false, + Enum: []string{"HighlightExtract"}, + HasDefault: true, + DefaultValue: "HighlightExtract", + JSONEncoded: false, + }, + { + Name: "enable_generate_video", + FlagName: "enable-generate-video", + Description: "是否生成混剪成片视频。true(默认)= 同时输出混剪视频(Edit.Mode=HighlightClips)与高光片段信息;false = 仅输出高光片段信息(clips),底层请求不携带 Edit 字段,也不会生成任何混剪视频。", + Type: "boolean", + Required: false, + HasDefault: true, + DefaultValue: "true", + JSONEncoded: false, + }, + { + Name: "minigame_info", + FlagName: "minigame-info", + Description: "小游戏描述信息,建议填写以辅助模型更精准识别高光内容", + Type: "object", + Required: false, + HasDefault: false, + JSONEncoded: true, + }, + { + Name: "callback_args", + FlagName: "callback-args", + Description: "可选,回调参数", + Type: "string", + Required: false, + HasDefault: false, + JSONEncoded: false, + }, + { + Name: "client_token", + FlagName: "client-token", + Description: "可选,用于幂等,默认幂等,用户可根据需求进行调整", + Type: "string", + Required: false, + HasDefault: false, + JSONEncoded: false, + }, + }, + }, + { + Name: "generate-highlights-microdrama", + DisplayName: "高光智剪-短剧", + Domain: "video", + Description: "深度理解短剧角色、剧情与故事线,自动提取高光片段并混剪成投流视频。\n支持故事线混剪模式(StorylineCuts),可选\"短剧三要素\"视觉模板,输出高光集锦、单集预告等。\n支持输出详细分镜信息(storyboard)。\n使用限制:单次最多 100 个视频,累计时长不超过 300 分钟。", + LocalSupported: false, + Async: true, + AsyncQueryCommand: "query-task", + OutputType: "file", + Params: []ParamMeta{ + { + Name: "video_urls", + FlagName: "video-urls", + Description: "待处理的短剧原片视频 URL 列表,支持 1-100 个视频\n子项说明:视频 URL,支持 http:// 或 https:// 格式", + Type: "array", + ItemType: "string", + Required: true, + HasDefault: false, + JSONEncoded: false, + }, + { + Name: "mode", + FlagName: "mode", + Description: "短剧高光智剪模式,本期固定为 StorylineCuts(故事线混剪模式)", + Type: "string", + Required: false, + Enum: []string{"StorylineCuts"}, + HasDefault: true, + DefaultValue: "StorylineCuts", + JSONEncoded: false, + }, + { + Name: "enable_generate_video", + FlagName: "enable-generate-video", + Description: "是否生成混剪成片视频。true(默认)= 同时输出混剪视频与分镜信息;false = 仅输出高光分镜信息(clips/storyboard),不生成混剪视频,此时底层请求不会携带 Edit 字段,且传入的 edit_param 将被忽略。", + Type: "boolean", + Required: false, + HasDefault: true, + DefaultValue: "true", + JSONEncoded: false, + }, + { + Name: "enable_return_poster", + FlagName: "enable-return-poster", + Description: "是否在结果中返回混剪视频封面图 URL。false(默认)= 不返回封面图;true = 若底层存在封面则返回 poster_url。", + Type: "boolean", + Required: false, + HasDefault: true, + DefaultValue: "false", + JSONEncoded: false, + }, + { + Name: "edit_param", + FlagName: "edit-param", + Description: "成片剪辑参数配置", + Type: "object", + Required: false, + HasDefault: false, + JSONEncoded: true, + }, + { + Name: "highlight_cuts_param", + FlagName: "highlight-cuts-param", + Description: "高光混剪参数配置", + Type: "object", + Required: false, + HasDefault: false, + JSONEncoded: true, + }, + { + Name: "opening_hook_param", + FlagName: "opening-hook-param", + Description: "精彩前置功能参数配置(可选)", + Type: "object", + Required: false, + HasDefault: false, + JSONEncoded: true, + }, + { + Name: "callback_args", + FlagName: "callback-args", + Description: "可选,回调参数", + Type: "string", + Required: false, + HasDefault: false, + JSONEncoded: false, + }, + { + Name: "client_token", + FlagName: "client-token", + Description: "可选,用于幂等,默认幂等,用户可根据需求进行调整", + Type: "string", + Required: false, + HasDefault: false, + JSONEncoded: false, + }, + }, + }, + { + Name: "segment-scenes", + DisplayName: "场景切分", + Domain: "video", + Description: "依据视频转场与画面变化自动切分场景,输出切片时间轴和(可选)切片文件。\n支持格式:MP4、FLV、ASF、RM、RMVB、MPEG、MOV、AVI、MPEGTS、M4S、WMV、3GP、TS、MPG、WEBM、MKV、WM、MPE、VOB、DAT、MP4V、M4V、F4V、MXF、QT 等主流视频格式。\n使用限制:单个视频时长不超过 2 小时。", + LocalSupported: false, + Async: true, + AsyncQueryCommand: "query-task", + OutputType: "file", + Params: []ParamMeta{ + { + Name: "video_url", + FlagName: "video-url", + Description: "待处理视频 Url,必须是公网可直接访问的 HTTP/HTTPS 链接", + Type: "string", + Required: true, + HasDefault: false, + JSONEncoded: false, + }, + { + Name: "enable_clip_fade", + FlagName: "enable-clip-fade", + Description: "是否将检测到的淡入/淡出片段作为独立切片输出", + Type: "boolean", + Required: false, + HasDefault: true, + DefaultValue: "false", + JSONEncoded: false, + }, + { + Name: "segment_threshold", + FlagName: "segment-threshold", + Description: "场景切分敏感度阈值,范围 [0, 100),100 不可取。数值越低切得越细,参考经验值10", + Type: "number", + Required: false, + HasDefault: false, + JSONEncoded: false, + }, + { + Name: "min_duration", + FlagName: "min-duration", + Description: "单个切片最小时长(秒),参考经验值3,应小于等于max_duration", + Type: "number", + Required: false, + HasDefault: false, + JSONEncoded: false, + }, + { + Name: "max_duration", + FlagName: "max-duration", + Description: "单个切片最大时长(秒),参考经验值30,应大于等于min_duration", + Type: "number", + Required: false, + HasDefault: false, + JSONEncoded: false, + }, + { + Name: "callback_args", + FlagName: "callback-args", + Description: "可选,回调参数", + Type: "string", + Required: false, + HasDefault: false, + JSONEncoded: false, + }, + { + Name: "client_token", + FlagName: "client-token", + Description: "可选,用于幂等,默认幂等,用户可根据需求进行调整", + Type: "string", + Required: false, + HasDefault: false, + JSONEncoded: false, + }, + }, + }, + { + Name: "analyze-video-storyline", + DisplayName: "剧情故事线分析", + Domain: "video", + Description: "智能解析影视剧内容,生成结构化剧情线,供智能剪辑、内容检索与互动播放等场景使用。\n基于大模型视频理解能力,对输入的单个或多个长视频(如电影、电视剧)进行分析,提取并组织成一份完整的故事线。\n该故事线由一系列按时间顺序排列的剧情片段(Clips)和基于片段聚合的高光故事线(Highlights)组成。\n使用限制:单次最多 30 个视频,单个视频时长不超过 2.5 小时。", + LocalSupported: false, + Async: true, + AsyncQueryCommand: "query-task", + OutputType: "file", + Params: []ParamMeta{ + { + Name: "video_urls", + FlagName: "video-urls", + Description: "待处理的视频 URL 列表,支持 HTTP/HTTPS 公网可访问链接,最多 30 个视频\n子项说明:视频 URL,支持 http:// 或 https:// 格式", + Type: "array", + ItemType: "string", + Required: true, + HasDefault: false, + JSONEncoded: false, + }, + { + Name: "enable_snapshot", + FlagName: "enable-snapshot", + Description: "是否为每个剧情片段生成关键帧快照。默认为 false。开启后,结果中将包含 clip_snapshot_url 字段", + Type: "boolean", + Required: false, + HasDefault: true, + DefaultValue: "false", + JSONEncoded: false, + }, + { + Name: "callback_args", + FlagName: "callback-args", + Description: "可选,回调参数", + Type: "string", + Required: false, + HasDefault: false, + JSONEncoded: false, + }, + { + Name: "client_token", + FlagName: "client-token", + Description: "可选,用于幂等,默认幂等,用户可根据需求进行调整", + Type: "string", + Required: false, + HasDefault: false, + JSONEncoded: false, + }, + }, + }, + { + Name: "analyze-video-highlights", + DisplayName: "高光片段提取", + Domain: "video", + Description: "智能捕捉视频\"情绪波峰\"与\"关键动作\",输出精准时间戳、高光打分、OCR 文本和画面描述等元数据,供下游进行更灵活的二次开发。\n支持短剧(Miniseries)和小游戏(Game)两种分析模型。\n使用限制:单次最多 100 个视频,累计时长不超过 300 分钟。", + LocalSupported: false, + Async: true, + AsyncQueryCommand: "query-task", + OutputType: "file", + Params: []ParamMeta{ + { + Name: "video_urls", + FlagName: "video-urls", + Description: "待处理的视频 URL 列表,支持 1-100 个视频\n子项说明:视频 URL,支持 http:// 或 https:// 格式", + Type: "array", + ItemType: "string", + Required: true, + HasDefault: false, + JSONEncoded: false, + }, + { + Name: "model", + FlagName: "model", + Description: "分析场景模型,Miniseries(短剧)或 Game(小游戏)", + Type: "string", + Required: true, + Enum: []string{"Miniseries", "Game"}, + HasDefault: false, + JSONEncoded: false, + }, + { + Name: "mode", + FlagName: "mode", + Description: "高光提取模式。固定组合为:model=Miniseries 时 mode 只能传 StorylineCuts;model=Game 时 mode 只能传 HighlightExtract", + Type: "string", + Required: true, + Enum: []string{"StorylineCuts", "HighlightExtract"}, + HasDefault: false, + JSONEncoded: false, + }, + { + Name: "minigame_info", + FlagName: "minigame-info", + Description: "小游戏描述信息,当 model=Game 时可选填,可辅助模型更精准识别高光内容", + Type: "object", + Required: false, + HasDefault: false, + JSONEncoded: true, + }, + { + Name: "callback_args", + FlagName: "callback-args", + Description: "可选,回调参数", + Type: "string", + Required: false, + HasDefault: false, + JSONEncoded: false, + }, + { + Name: "client_token", + FlagName: "client-token", + Description: "可选,用于幂等,默认幂等,用户可根据需求进行调整", + Type: "string", + Required: false, + HasDefault: false, + JSONEncoded: false, + }, + }, + }, + { + Name: "matte-portrait-video", + DisplayName: "视频人像抠图", + Domain: "video", + Description: "自动识别人物主体,同时移除背景,生成背景透明的视频,适用于背景替换等场景。\n输出格式为 WEBM(默认)或 MOV,分辨率与原片对齐。\n支持的格式:主流视频格式如 mp4、flv、ts、avi、mov、mkv、wmv。", + LocalSupported: false, + Async: true, + AsyncQueryCommand: "query-task", + OutputType: "video", + Params: []ParamMeta{ + { + Name: "video_url", + FlagName: "video-url", + Description: "输入视频 Url。支持 http://xxx 或 https://xxx 格式。", + Type: "string", + Required: true, + HasDefault: false, + JSONEncoded: false, + }, + { + Name: "format", + FlagName: "format", + Description: "输出视频格式:MOV / WEBM(默认)", + Type: "string", + Required: false, + Enum: []string{"MOV", "WEBM"}, + HasDefault: true, + DefaultValue: "WEBM", + JSONEncoded: false, + }, + { + Name: "callback_args", + FlagName: "callback-args", + Description: "可选,回调参数", + Type: "string", + Required: false, + HasDefault: false, + JSONEncoded: false, + }, + { + Name: "client_token", + FlagName: "client-token", + Description: "可选,用于幂等,默认幂等,用户可根据需求进行调整", + Type: "string", + Required: false, + HasDefault: false, + JSONEncoded: false, + }, + }, + }, + { + Name: "matte-greenscreen-video", + DisplayName: "视频绿幕抠图", + Domain: "video", + Description: "对以绿幕或纯色为背景的视频进行抠图,自动识别主体(人物、物品、动物等),同时移除背景,生成背景透明的视频。\n输出视频格式为 WEBM(默认)或 MOV,分辨率与原片对齐。\n支持的格式:主流视频格式如 mp4、flv、ts、avi、mov、mkv、wmv。", + LocalSupported: true, + LocalSource: "generated", + LocalDeps: []string{"ffmpeg", "prores_ks"}, + LocalLimitations: []string{"本地模式使用标准绿幕色键抠图,并使用 ProRes 4444 MOV 透明输出;仅支持 --format MOV,WEBM 透明输出需使用 cloud 模式。callback/client_token 本地忽略。"}, + Async: true, + AsyncQueryCommand: "query-task", + OutputType: "video", + Params: []ParamMeta{ + { + Name: "video_url", + FlagName: "video-url", + Description: "输入视频 Url。支持 http://xxx 或 https://xxx 格式。", + Type: "string", + Required: true, + HasDefault: false, + JSONEncoded: false, + }, + { + Name: "format", + FlagName: "format", + Description: "输出视频格式:MOV / WEBM(默认)", + Type: "string", + Required: false, + Enum: []string{"MOV", "WEBM"}, + HasDefault: true, + DefaultValue: "WEBM", + JSONEncoded: false, + }, + { + Name: "callback_args", + FlagName: "callback-args", + Description: "可选,回调参数", + Type: "string", + Required: false, + HasDefault: false, + JSONEncoded: false, + }, + { + Name: "client_token", + FlagName: "client-token", + Description: "可选,用于幂等,默认幂等,用户可根据需求进行调整", + Type: "string", + Required: false, + HasDefault: false, + JSONEncoded: false, + }, + }, + }, + { + Name: "probe-video-metadata", + DisplayName: "视频元信息获取", + Domain: "video", + Description: "对输入视频 URL 进行探测,输出标准化媒资元信息,覆盖容器层(format_meta)、视频流层(video_stream_meta)与音频流层(audio_stream_meta)。\n字段分类参考 ffprobe,并对 VOD 原始返回做精简与统一,便于上层做分辨率/帧率/码率/编码等策略判断。\n使用限制:仅支持公网 HTTP/HTTPS URL;输入视频分辨率最高支持 4K。", + LocalSupported: true, + LocalSource: "generated", + LocalDeps: []string{"ffprobe"}, + LocalLimitations: []string{"支持本地 FFprobe 探测视频容器、视频流和音频流元信息;callback/client_token 本地忽略。"}, + Async: true, + AsyncQueryCommand: "query-task", + OutputType: "file", + Params: []ParamMeta{ + { + Name: "video_url", + FlagName: "video-url", + Description: "待探测的视频公网 HTTP/HTTPS URL。", + Type: "string", + Required: true, + HasDefault: false, + JSONEncoded: false, + }, + { + Name: "callback_args", + FlagName: "callback-args", + Description: "可选,回调参数", + Type: "string", + Required: false, + HasDefault: false, + JSONEncoded: false, + }, + { + Name: "client_token", + FlagName: "client-token", + Description: "可选,用于幂等,默认幂等,用户可根据需求进行调整", + Type: "string", + Required: false, + HasDefault: false, + JSONEncoded: false, + }, + }, + }, + { + Name: "fade-video-audio", + DisplayName: "视频声音淡入淡出", + Domain: "editing", + Description: "对输入视频的声轨实现淡入淡出效果。\n输出 mp4,分辨率与原片一致。", + LocalSupported: true, + LocalSource: "generated", + LocalDeps: []string{"ffmpeg"}, + LocalLimitations: []string{"支持本地 FFmpeg 对视频音轨做淡入淡出;callback/client_token 本地忽略。"}, + Async: true, + AsyncQueryCommand: "query-task", + OutputType: "video", + Params: []ParamMeta{ + { + Name: "video_url", + FlagName: "video-url", + Description: "输入视频。支持http://xxx或https://xxx格式 URL,支持 mp4、mov、flv、ts、avi、wmv、mkv 等格式,最高 4K", + Type: "string", + Required: true, + HasDefault: false, + JSONEncoded: false, + }, + { + Name: "fade_in_duration", + FlagName: "fade-in-duration", + Description: "声音淡入时长。单位:秒,可传小数(最多3位小数)。0 表示不淡入。", + Type: "number", + Required: false, + HasDefault: true, + DefaultValue: "1", + JSONEncoded: false, + }, + { + Name: "fade_out_duration", + FlagName: "fade-out-duration", + Description: "声音淡出时长。单位:秒,可传小数(最多3位小数)。0 表示不淡出。", + Type: "number", + Required: false, + HasDefault: true, + DefaultValue: "1", + JSONEncoded: false, + }, + { + Name: "callback_args", + FlagName: "callback-args", + Description: "可选,回调参数", + Type: "string", + Required: false, + HasDefault: false, + JSONEncoded: false, + }, + { + Name: "client_token", + FlagName: "client-token", + Description: "可选,用于幂等,默认幂等,用户可根据需求进行调整", + Type: "string", + Required: false, + HasDefault: false, + JSONEncoded: false, + }, + }, + }, + { + Name: "apply-video-filter", + DisplayName: "视频添加滤镜", + Domain: "editing", + Description: "为视频添加指定滤镜效果,输出mp4,分辨率与原片一致。", + LocalSupported: false, + Async: true, + AsyncQueryCommand: "query-task", + OutputType: "video", + Params: []ParamMeta{ + { + Name: "video_url", + FlagName: "video-url", + Description: "输入视频。支持http://xxx或https://xxx格式 URL,支持 mp4、mov、flv、ts、avi、wmv、mkv 等格式,最高 4K", + Type: "string", + Required: true, + HasDefault: false, + JSONEncoded: false, + }, + { + Name: "filter_style", + FlagName: "filter-style", + Description: "滤镜风格。根据用户想要的视频画面效果选择:\n- spring:春日滤镜\n- sunset:晚霞滤镜\n- vivid:鲜亮滤镜\n- fair_skin:白皙滤镜\n- food:食物滤镜\n", + Type: "string", + Required: false, + Enum: []string{"spring", "sunset", "vivid", "fair_skin", "food"}, + HasDefault: true, + DefaultValue: "spring", + JSONEncoded: false, + }, + { + Name: "callback_args", + FlagName: "callback-args", + Description: "可选,回调参数", + Type: "string", + Required: false, + HasDefault: false, + JSONEncoded: false, + }, + { + Name: "client_token", + FlagName: "client-token", + Description: "可选,用于幂等,默认幂等,用户可根据需求进行调整", + Type: "string", + Required: false, + HasDefault: false, + JSONEncoded: false, + }, + }, + }, + { + Name: "adjust-video-volume", + DisplayName: "调整视频音量", + Domain: "editing", + Description: "调整视频音量大小,支持静音;输出 mp4,分辨率与原片一致。", + LocalSupported: true, + LocalSource: "generated", + LocalDeps: []string{"ffmpeg"}, + LocalLimitations: []string{"支持本地 FFmpeg 调整视频音轨音量;callback/client_token 本地忽略。"}, + Async: true, + AsyncQueryCommand: "query-task", + OutputType: "video", + Params: []ParamMeta{ + { + Name: "video_url", + FlagName: "video-url", + Description: "输入视频。支持http://xxx或https://xxx格式 URL,支持 mp4、mov、flv、ts、avi、wmv、mkv 等格式,最高 4K", + Type: "string", + Required: true, + HasDefault: false, + JSONEncoded: false, + }, + { + Name: "volume", + FlagName: "volume", + Description: "音量倍数。Float 类型,取值范围 0~4。0=静音,1=原音量,4=放大 4 倍。", + Type: "number", + Required: false, + HasDefault: true, + DefaultValue: "1", + JSONEncoded: false, + }, + { + Name: "callback_args", + FlagName: "callback-args", + Description: "可选,回调参数", + Type: "string", + Required: false, + HasDefault: false, + JSONEncoded: false, + }, + { + Name: "client_token", + FlagName: "client-token", + Description: "可选,用于幂等,默认幂等,用户可根据需求进行调整", + Type: "string", + Required: false, + HasDefault: false, + JSONEncoded: false, + }, + }, + }, + { + Name: "fade-audio", + DisplayName: "音频声音淡入淡出", + Domain: "editing", + Description: "对输入音频实现淡入淡出效果,输出 mp3。", + LocalSupported: true, + LocalSource: "generated", + LocalDeps: []string{"ffmpeg", "libmp3lame"}, + LocalLimitations: []string{"支持本地 FFmpeg 对音频做淡入淡出并输出 mp3;callback/client_token 本地忽略。"}, + Async: true, + AsyncQueryCommand: "query-task", + OutputType: "audio", + Params: []ParamMeta{ + { + Name: "audio_url", + FlagName: "audio-url", + Description: "输入音频。支持http://xxx或https://xxx格式 URL,支持 mp3、m4a、wav、flac 等格式", + Type: "string", + Required: true, + HasDefault: false, + JSONEncoded: false, + }, + { + Name: "fade_in_duration", + FlagName: "fade-in-duration", + Description: "声音淡入时长。单位:秒,可传小数(最多3位小数)。0 表示不淡入。", + Type: "number", + Required: false, + HasDefault: true, + DefaultValue: "1", + JSONEncoded: false, + }, + { + Name: "fade_out_duration", + FlagName: "fade-out-duration", + Description: "声音淡出时长。单位:秒,可传小数(最多3位小数)。0 表示不淡出。", + Type: "number", + Required: false, + HasDefault: true, + DefaultValue: "1", + JSONEncoded: false, + }, + { + Name: "callback_args", + FlagName: "callback-args", + Description: "可选,回调参数", + Type: "string", + Required: false, + HasDefault: false, + JSONEncoded: false, + }, + { + Name: "client_token", + FlagName: "client-token", + Description: "可选,用于幂等,默认幂等,用户可根据需求进行调整", + Type: "string", + Required: false, + HasDefault: false, + JSONEncoded: false, + }, + }, + }, + { + Name: "mix-audio", + DisplayName: "音频混合", + Domain: "editing", + Description: "将多个音频文件(如背景音乐、音效、人声)进行混音,生成一个新的音频文件。\n处理耗时:处理耗时与视频时长正相关。视频时长越长,处理耗时越长。平均 RTF(处理耗时/原片时长)为 1。\n输出音频的时长以最长的音频为准。\n输出视频格式:mp3", + LocalSupported: true, + LocalSource: "generated", + LocalDeps: []string{"ffmpeg", "libmp3lame"}, + LocalLimitations: []string{"支持本地 FFmpeg 混合 1 到 100 个音频并输出 mp3;callback/client_token 本地忽略。"}, + Async: true, + AsyncQueryCommand: "query-task", + OutputType: "audio", + Params: []ParamMeta{ + { + Name: "audio_urls", + FlagName: "audio-urls", + Description: "待混合的音频列表,Array类型。最少传入1个,最多传入100个。\n子项说明:待混合的输入音频。支持http://xxx或https://xxx格式 URL,支持 mp3、wav、flac 等格式", + Type: "array", + ItemType: "string", + Required: true, + HasDefault: false, + JSONEncoded: false, + }, + { + Name: "callback_args", + FlagName: "callback-args", + Description: "可选,回调参数", + Type: "string", + Required: false, + HasDefault: false, + JSONEncoded: false, + }, + { + Name: "client_token", + FlagName: "client-token", + Description: "可选,用于幂等,默认幂等,用户可根据需求进行调整", + Type: "string", + Required: false, + HasDefault: false, + JSONEncoded: false, + }, + }, + }, + { + Name: "adjust-audio-speed", + DisplayName: "音频调速", + Domain: "editing", + Description: "调整音频的播放倍速,实现快放或慢放效果。", + LocalSupported: true, + LocalSource: "generated", + LocalDeps: []string{"ffmpeg"}, + LocalLimitations: []string{"支持本地 FFmpeg 调整音频播放倍速并输出 m4a;callback/client_token 本地忽略。"}, + Async: true, + AsyncQueryCommand: "query-task", + OutputType: "audio", + Params: []ParamMeta{ + { + Name: "audio_url", + FlagName: "audio-url", + Description: "输入音频。支持http://xxx或https://xxx格式 Url,支持 mp3、m4a、wav 等格式", + Type: "string", + Required: true, + HasDefault: false, + JSONEncoded: false, + }, + { + Name: "speed", + FlagName: "speed", + Description: "调整速度的倍数,Float类型,取值范围为0.1~4。0.1=放慢至原速的 0.1 倍,1=原速,4=加速至原速的 4 倍。", + Type: "number", + Required: false, + HasDefault: true, + DefaultValue: "1", + JSONEncoded: false, + }, + { + Name: "callback_args", + FlagName: "callback-args", + Description: "可选,回调参数", + Type: "string", + Required: false, + HasDefault: false, + JSONEncoded: false, + }, + { + Name: "client_token", + FlagName: "client-token", + Description: "可选,用于幂等,默认幂等,用户可根据需求进行调整", + Type: "string", + Required: false, + HasDefault: false, + JSONEncoded: false, + }, + }, + }, + { + Name: "probe-audio-metadata", + DisplayName: "音频元信息获取", + Domain: "audio", + Description: "获取指定音频的详细元信息,输出容器层信息(format_meta)与音频流元信息(audio_stream_meta)。\n字段分类参考 ffprobe,并对 VOD 原始返回做精简与统一。\n使用限制:支持公网 HTTP/HTTPS URL。", + LocalSupported: true, + LocalSource: "generated", + LocalDeps: []string{"ffprobe"}, + LocalLimitations: []string{"支持本地 FFprobe 探测音频元信息;callback/client_token 本地忽略。"}, + Async: true, + AsyncQueryCommand: "query-task", + OutputType: "file", + Params: []ParamMeta{ + { + Name: "audio_url", + FlagName: "audio-url", + Description: "待探测的音频公网 HTTP/HTTPS URL。", + Type: "string", + Required: true, + HasDefault: false, + JSONEncoded: false, + }, + { + Name: "callback_args", + FlagName: "callback-args", + Description: "可选,回调参数", + Type: "string", + Required: false, + HasDefault: false, + JSONEncoded: false, + }, + { + Name: "client_token", + FlagName: "client-token", + Description: "可选,用于幂等,默认幂等,用户可根据需求进行调整", + Type: "string", + Required: false, + HasDefault: false, + JSONEncoded: false, + }, + }, + }, + { + Name: "image-ocr", + DisplayName: "图像文字识别OCR", + Domain: "image", + Description: "识别图片中的通用印刷体文字,返回可编辑文本、文字框坐标和置信度。\n本期支持简体中文和英文通用场景识别。", + LocalSupported: false, + Async: false, + OutputType: "file", + Params: []ParamMeta{ + { + Name: "image_url", + FlagName: "image-url", + Description: "输入图片 URL,需为公网可访问的 png/jpg/jpeg/webp/heic/avif 图片,单图不超过 10MB。", + Type: "string", + Required: true, + HasDefault: false, + JSONEncoded: false, + }, + { + Name: "callback_args", + FlagName: "callback-args", + Description: "可选,回调参数", + Type: "string", + Required: false, + HasDefault: false, + JSONEncoded: false, + }, + { + Name: "client_token", + FlagName: "client-token", + Description: "可选,用于幂等,默认幂等,用户可根据需求进行调整", + Type: "string", + Required: false, + HasDefault: false, + JSONEncoded: false, + }, + }, + }, + { + Name: "erase-image", + DisplayName: "图像擦除修复", + Domain: "image", + Description: "自动检测并擦除图片中的常见图标、文字或指定区域内容,并对擦除区域进行背景智能填充。", + LocalSupported: false, + Async: false, + OutputType: "file", + Params: []ParamMeta{ + { + Name: "image_url", + FlagName: "image-url", + Description: "输入图片 URL,需为公网可访问的 png/jpg/jpeg/webp/tiff/bmp/heic 图片,单图不超过 10MB。", + Type: "string", + Required: true, + HasDefault: false, + JSONEncoded: false, + }, + { + Name: "tool_version", + FlagName: "tool-version", + Description: "图像擦除修复选用的模型版本。- standard:标准版。基于明确的规则(如文本匹配、矩形框坐标)擦除指定内容。适用于简单、明确的擦除任务。默认 standard。", + Type: "string", + Required: false, + Enum: []string{"standard"}, + HasDefault: true, + DefaultValue: "standard", + JSONEncoded: false, + }, + { + Name: "standard_scene", + FlagName: "standard-scene", + Description: "标准版擦除场景,仅 standard 版本生效。full_screen_text_erase:全屏文字擦除,可通过standard_erase_text字段指定要擦除的文字,不指定则默认擦除所有文字内容。full_screen_icon_erase:全屏图标擦除。", + Type: "string", + Required: false, + Enum: []string{"full_screen_text_erase", "full_screen_icon_erase"}, + HasDefault: true, + DefaultValue: "full_screen_text_erase", + JSONEncoded: false, + }, + { + Name: "standard_erase_text", + FlagName: "standard-erase-text", + Description: "标准版文字擦除,指定要擦除的文字,不指定则默认擦除所有文字内容。", + Type: "string", + Required: false, + HasDefault: false, + JSONEncoded: false, + }, + { + Name: "output_format", + FlagName: "output-format", + Description: "输出图片格式;默认 webp。", + Type: "string", + Required: false, + Enum: []string{"png", "jpeg", "webp"}, + HasDefault: true, + DefaultValue: "webp", + JSONEncoded: false, + }, + { + Name: "callback_args", + FlagName: "callback-args", + Description: "可选,回调参数", + Type: "string", + Required: false, + HasDefault: false, + JSONEncoded: false, + }, + { + Name: "client_token", + FlagName: "client-token", + Description: "可选,用于幂等,默认幂等,用户可根据需求进行调整", + Type: "string", + Required: false, + HasDefault: false, + JSONEncoded: false, + }, + }, + }, + { + Name: "remove-image-background", + DisplayName: "图像背景移除", + Domain: "image", + Description: "自动识别并保留图像主体,移除背景并生成透明背景图片。\n支持通用、人像、商品场景,可在人像/商品场景中生成主体描边或裁剪透明背景。", + LocalSupported: false, + Async: false, + OutputType: "file", + Params: []ParamMeta{ + { + Name: "image_url", + FlagName: "image-url", + Description: "输入图片 URL,需为公网可访问的 png/jpg/jpeg/webp/tiff/bmp/ico 图片,单图不超过 10MB。", + Type: "string", + Required: true, + HasDefault: false, + JSONEncoded: false, + }, + { + Name: "scene", + FlagName: "scene", + Description: "背景移除场景:general 为通用场景,适用于期望抠出图像主体但不确定该主体所属分类的场景。human 为人像抠图场景,适用于仅需抠出图像中的人像主体的场景,product 为商品抠图场景,适用于仅需抠出图像中的商品主体的场景。", + Type: "string", + Required: true, + Enum: []string{"general", "human", "product"}, + HasDefault: false, + JSONEncoded: false, + }, + { + Name: "need_contour", + FlagName: "need-contour", + Description: "是否为主体生成描边;默认 false,仅 human/product 场景生效,general 场景忽略。", + Type: "boolean", + Required: false, + HasDefault: true, + DefaultValue: "false", + JSONEncoded: false, + }, + { + Name: "contour_color", + FlagName: "contour-color", + Description: "主体描边颜色,十六进制 RGB;默认 #FFFFFF,仅 need_contour=true 且 human/product 场景生效。", + Type: "string", + Required: false, + HasDefault: true, + DefaultValue: "#FFFFFF", + JSONEncoded: false, + }, + { + Name: "contour_size", + FlagName: "contour-size", + Description: "主体描边宽度,单位 px;默认 10,仅 need_contour=true 且 human/product 场景生效。", + Type: "integer", + Required: false, + HasDefault: true, + DefaultValue: "10", + JSONEncoded: false, + }, + { + Name: "need_crop_background", + FlagName: "need-crop-background", + Description: "是否裁剪透明背景到刚好包住主体;默认 false,仅 human/product 场景生效,general 场景忽略。", + Type: "boolean", + Required: false, + HasDefault: true, + DefaultValue: "false", + JSONEncoded: false, + }, + { + Name: "output_format", + FlagName: "output-format", + Description: "输出图片格式;默认 png。", + Type: "string", + Required: false, + Enum: []string{"png", "jpeg", "webp"}, + HasDefault: true, + DefaultValue: "png", + JSONEncoded: false, + }, + { + Name: "callback_args", + FlagName: "callback-args", + Description: "可选,回调参数", + Type: "string", + Required: false, + HasDefault: false, + JSONEncoded: false, + }, + { + Name: "client_token", + FlagName: "client-token", + Description: "可选,用于幂等,默认幂等,用户可根据需求进行调整", + Type: "string", + Required: false, + HasDefault: false, + JSONEncoded: false, + }, + }, + }, + { + Name: "enhance-image", + DisplayName: "图像画质增强", + Domain: "image", + Description: "基于图像内容理解智能决策,全方位提升图片分辨率、清晰度与色彩表现。", + LocalSupported: false, + Async: false, + OutputType: "file", + Params: []ParamMeta{ + { + Name: "image_url", + FlagName: "image-url", + Description: "输入图片。String 类型,支持http://xxx或https://xxx格式 URL", + Type: "string", + Required: true, + HasDefault: false, + JSONEncoded: false, + }, + { + Name: "tool_version", + FlagName: "tool-version", + Description: "画质增强选用的模型版本,标准版:standard;专业版:professional。默认为标准版", + Type: "string", + Required: false, + Enum: []string{"standard", "professional"}, + HasDefault: true, + DefaultValue: "standard", + JSONEncoded: false, + }, + { + Name: "multiple", + FlagName: "multiple", + Description: "图像处理后较原图的分辨率倍数,支持 2 位小数。取值最大不超过 30,取值范围[1,30]。注意:图像处理后的宽度和高度不能超过target_width、target_height的上限值。standard模式下,取值最大不超过 8。", + Type: "number", + Required: false, + HasDefault: false, + JSONEncoded: false, + }, + { + Name: "target_width", + FlagName: "target-width", + Description: "图像处理后的宽度,单位为 px,取值不能超过 10240。注意:standard模式下,取值最大不超过 6144,且图像处理后较原图的分辨率倍数不能超过 8。", + Type: "integer", + Required: false, + HasDefault: false, + JSONEncoded: false, + }, + { + Name: "target_height", + FlagName: "target-height", + Description: "图像处理后的高度,单位为 px,取值不能超过 10240。注意:standard模式下,取值最大不超过 6144,且图像处理后较原图的分辨率倍数不能超过 8。", + Type: "integer", + Required: false, + HasDefault: false, + JSONEncoded: false, + }, + { + Name: "callback_args", + FlagName: "callback-args", + Description: "可选,回调参数", + Type: "string", + Required: false, + HasDefault: false, + JSONEncoded: false, + }, + { + Name: "client_token", + FlagName: "client-token", + Description: "可选,用于幂等,默认幂等,用户可根据需求进行调整", + Type: "string", + Required: false, + HasDefault: false, + JSONEncoded: false, + }, + }, + }, + { + Name: "evaluate-image-quality", + DisplayName: "图像画质评估", + Domain: "image", + Description: "对输入图片进行主客观画质和美学评分,适用于质量监控、低质图筛查、内容审核、推荐排序和训练数据清洗等场景。\n支持标准版多维评分与专业版大模型评分。", + LocalSupported: false, + Async: false, + OutputType: "file", + Params: []ParamMeta{ + { + Name: "image_url", + FlagName: "image-url", + Description: "输入图片 URL,需为公网可访问的 png/jpeg/webp/heic 图片,单图不超过 10MB。", + Type: "string", + Required: true, + HasDefault: false, + JSONEncoded: false, + }, + { + Name: "tool_version", + FlagName: "tool-version", + Description: "画质评估模型版本,standard 为标准版,professional 为专业版;默认 standard。", + Type: "string", + Required: false, + Enum: []string{"standard", "professional"}, + HasDefault: true, + DefaultValue: "standard", + JSONEncoded: false, + }, + { + Name: "standard_evaluate_items", + FlagName: "standard-evaluate-items", + Description: "标准版选用的评估工具\n子项说明:评估工具。", + Type: "array", + ItemType: "string", + Required: false, + HasDefault: true, + DefaultValue: "[\"vqscore\",\"noise\",\"aesthetic\",\"blur\"]", + JSONEncoded: false, + }, + { + Name: "callback_args", + FlagName: "callback-args", + Description: "可选,回调参数", + Type: "string", + Required: false, + HasDefault: false, + JSONEncoded: false, + }, + { + Name: "client_token", + FlagName: "client-token", + Description: "可选,用于幂等,默认幂等,用户可根据需求进行调整", + Type: "string", + Required: false, + HasDefault: false, + JSONEncoded: false, + }, + }, + }, + { + Name: "query-task", + DisplayName: "查询任务", + Domain: "shared", + Description: "异步任务结果查询通过task_id查询任务信息", + LocalSupported: false, + Async: false, + AsyncQueryCommand: "", + OutputType: "query-task", + Params: []ParamMeta{ + { + Name: "task_id", + FlagName: "task-id", + Description: "Path parameter `task_id`.", + Type: "string", + Required: true, + HasDefault: false, + JSONEncoded: false, + }, + { + Name: "poll_interval_seconds", + FlagName: "poll-interval-seconds", + Description: "轮询间隔秒数;仅 query-task 使用。", + Type: "number", + Required: false, + HasDefault: true, + DefaultValue: "10", + JSONEncoded: false, + }, + { + Name: "max_poll_attempts", + FlagName: "max-poll-attempts", + Description: "最大轮询次数;0 表示不自动轮询。", + Type: "integer", + Required: false, + HasDefault: false, + JSONEncoded: false, + }, + { + Name: "poll_complete", + FlagName: "poll-complete", + Description: "是否持续轮询直到任务完成。", + Type: "boolean", + Required: false, + HasDefault: false, + JSONEncoded: false, + }, + }, + }, +} + +func printDomains(cmd *cobra.Command) error { + _, err := fmt.Fprintln(cmd.OutOrStdout(), renderDomainsIndex()) + return err +} + +func printHelpFull(cmd *cobra.Command) error { + _, err := fmt.Fprintln(cmd.OutOrStdout(), renderHelpFullIndex()) + return err +} + +func renderDomainsIndex() string { + domains := domainList() + if len(domains) == 0 { + return "Available domains:\n" + } + + lines := []string{"Available domains:"} + for _, domain := range domains { + lines = append(lines, fmt.Sprintf("- %s %s", domain.Name, domain.Description)) + } + return strings.Join(lines, "\n") +} + +func renderHelpFullIndex() string { + domains := domainList() + if len(domains) == 0 { + return "MediaKit CLI Full Help\n\n" + } + + grouped := capabilitiesByDomain() + lines := []string{"MediaKit CLI Full Help", ""} + for _, domain := range domains { + lines = append(lines, fmt.Sprintf("[%s]", domain.Name)) + lines = append(lines, domain.Description) + capabilities := grouped[domain.Name] + if len(capabilities) == 0 { + lines = append(lines, "- ") + lines = append(lines, "") + continue + } + for _, capability := range capabilities { + lines = append(lines, fmt.Sprintf("- %s %s", capability.Name, capability.Description)) + lines = append(lines, fmt.Sprintf(" 查看详情: %s --help", capabilityInvocation(capability))) + } + lines = append(lines, "") + } + + return strings.TrimRight(strings.Join(lines, "\n"), "\n") +} + +func newGeneratedDomainCommands() []*cobra.Command { + domains := domainList() + grouped := capabilitiesByDomain() + cmds := make([]*cobra.Command, 0, len(domains)) + + for _, meta := range domains { + domainMeta := meta + domainCmd := &cobra.Command{ + Use: domainMeta.Name, + Short: domainMeta.Description, + Long: renderDomainHelp(domainMeta, grouped[domainMeta.Name]), + Args: cobra.NoArgs, + DisableAutoGenTag: true, + RunE: func(cmd *cobra.Command, args []string) error { + return cmd.Help() + }, + } + for _, capability := range grouped[domainMeta.Name] { + domainCmd.AddCommand(newCapabilityCommand(capability)) + } + cmds = append(cmds, domainCmd) + } + + return cmds +} + +func newCapabilityCommand(meta CapabilityMeta) *cobra.Command { + capabilityMeta := meta + cmd := &cobra.Command{ + Use: capabilityMeta.Name, + Short: capabilityMeta.Description, + Long: renderCapabilityHelp(capabilityMeta), + Args: cobra.NoArgs, + DisableAutoGenTag: true, + RunE: func(cmd *cobra.Command, args []string) error { + // --schema: 输出工具 schema 后退出 + if schemaFlag, _ := cmd.Flags().GetBool("schema"); schemaFlag { + resolvedMode := resolveSchemaMode(cmd) + return writeJSON(cmd.OutOrStdout(), buildCapabilitySchema(capabilityMeta, resolvedMode)) + } + params, err := collectCapabilityParams(cmd, capabilityMeta) + if err != nil { + return err + } + if err := modes.Dispatch(cmd, capabilityMeta.runtimeMeta(), params); err != nil { + return writeCapabilityError(cmd.OutOrStdout(), err) + } + return nil + }, + } + bindCapabilityFlags(cmd, capabilityMeta) + cmd.Flags().String("output-path", "", "本地文件输出目录(覆盖 config/env 设置)") + cmd.Flags().Bool("schema", false, "输出该工具的 JSON Schema 描述(供 Agent 使用)") + configureCapabilityHelp(cmd, capabilityMeta) + + return cmd +} + +func renderDomainHelp(domain DomainMeta, capabilities []CapabilityMeta) string { + lines := []string{fmt.Sprintf("%s — %s", domain.Name, domain.Description), "", "Available commands:"} + if len(capabilities) == 0 { lines = append(lines, "- ") } else { for _, capability := range capabilities { @@ -1209,6 +2743,53 @@ func renderCapabilityHelp(meta CapabilityMeta) string { return strings.Join(lines, "\n") } +func configureCapabilityHelp(cmd *cobra.Command, meta CapabilityMeta) { + businessFlags := capabilityBusinessFlagNames(meta) + cmd.SetHelpFunc(func(helpCmd *cobra.Command, args []string) { + helpCmd.InitDefaultHelpFlag() + fmt.Fprint(helpCmd.OutOrStdout(), renderCapabilityCommandHelp(helpCmd, businessFlags)) + }) +} + +func capabilityBusinessFlagNames(meta CapabilityMeta) map[string]struct{} { + names := make(map[string]struct{}, len(meta.Params)) + for _, param := range meta.Params { + names[param.FlagName] = struct{}{} + } + return names +} + +func renderCapabilityCommandHelp(cmd *cobra.Command, businessFlags map[string]struct{}) string { + lines := []string{ + strings.TrimRight(cmd.Long, "\n"), + "", + "Usage:", + " " + cmd.UseLine(), + } + if localFlagUsages := filteredFlagUsages(cmd.LocalFlags(), businessFlags); localFlagUsages != "" { + lines = append(lines, "", "Flags:", localFlagUsages) + } + if inheritedFlagUsages := strings.TrimRight(cmd.InheritedFlags().FlagUsages(), "\n"); inheritedFlagUsages != "" { + lines = append(lines, "", "Global Flags:", inheritedFlagUsages) + } + return strings.TrimRight(strings.Join(lines, "\n"), "\n") + "\n" +} + +func filteredFlagUsages(source *pflag.FlagSet, excluded map[string]struct{}) string { + flags := pflag.NewFlagSet(source.Name(), pflag.ContinueOnError) + flags.SortFlags = source.SortFlags + source.VisitAll(func(flag *pflag.Flag) { + if _, ok := excluded[flag.Name]; ok { + return + } + flags.AddFlag(flag) + }) + if !flags.HasAvailableFlags() { + return "" + } + return strings.TrimRight(flags.FlagUsages(), "\n") +} + func (m CapabilityMeta) runtimeMeta() modes.CapabilityRuntimeMeta { return modes.CapabilityRuntimeMeta{ Name: m.Name, diff --git a/internal/config/config.go b/internal/config/config.go index c76504d..805ef15 100644 --- a/internal/config/config.go +++ b/internal/config/config.go @@ -19,6 +19,7 @@ const ( ConfigDirName = ".mediakit" ConfigFileName = "config.json" EnvCacheName = "env_cache.json" + UploadCacheName = "upload_cache.json" DefaultOutputDirName = "temp" DefaultEndpoint = "https://amk.cn-beijing.volces.com" EnvAPIKey = "MEDIAKIT_API_KEY" @@ -87,6 +88,10 @@ func EnvCacheFile(home string) string { return filepath.Join(ConfigDir(home), EnvCacheName) } +func UploadCacheFile(home string) string { + return filepath.Join(ConfigDir(home), UploadCacheName) +} + func DefaultOutputPath(home string) string { return filepath.Join(ConfigDir(home), DefaultOutputDirName) } diff --git a/internal/config/envcheck.go b/internal/config/envcheck.go index 19d472c..9bbcc42 100644 --- a/internal/config/envcheck.go +++ b/internal/config/envcheck.go @@ -99,6 +99,8 @@ func probeAllowedFFmpegDependency(ffmpegAvailable bool, dep string) ToolStatus { return probeFFmpegOutput(dep, []string{"-hide_banner", "-demuxers"}, " concat") case "libmp3lame": return probeFFmpegOutput(dep, []string{"-hide_banner", "-encoders"}, "libmp3lame") + case "prores_ks": + return probeFFmpegOutput(dep, []string{"-hide_banner", "-encoders"}, "prores_ks") case "libass": return probeFFmpegOutput(dep, []string{"-hide_banner", "-filters"}, " ass") case "libpng": diff --git a/internal/local/admission/policy.go b/internal/local/admission/policy.go index d77d1c6..ef3938c 100644 --- a/internal/local/admission/policy.go +++ b/internal/local/admission/policy.go @@ -77,6 +77,7 @@ func AllowedDependencyNames() []string { "h264_videotoolbox", "demuxer", "libmp3lame", + "prores_ks", "libass", "libfreetype", "libfontconfig", diff --git a/internal/local/core/security.go b/internal/local/core/security.go index 713c867..bcb02c9 100644 --- a/internal/local/core/security.go +++ b/internal/local/core/security.go @@ -42,6 +42,7 @@ func DefaultFFmpegPolicy() FFmpegPolicy { "-ar": {}, "-ac": {}, "-pix_fmt": {}, + "-profile:v": {}, "-movflags": {}, "-preset": {}, "-crf": {}, diff --git a/internal/local/generated/local_plans.go b/internal/local/generated/local_plans.go index 653de36..58469b0 100644 --- a/internal/local/generated/local_plans.go +++ b/internal/local/generated/local_plans.go @@ -1,8 +1,11 @@ package generated import ( + "crypto/md5" + "encoding/hex" "encoding/json" "fmt" + "io" "os" "path/filepath" "strconv" @@ -22,6 +25,46 @@ type LocalInputRef struct { LocalPath string `json:"local_path"` } +type ffprobeAudioMetadata struct { + Format struct { + FormatName string `json:"format_name"` + BitRate string `json:"bit_rate"` + Duration string `json:"duration"` + Size string `json:"size"` + } `json:"format"` + Streams []struct { + CodecName string `json:"codec_name"` + Duration string `json:"duration"` + SampleRate string `json:"sample_rate"` + BitRate string `json:"bit_rate"` + Channels int `json:"channels"` + } `json:"streams"` +} + +type ffprobeMediaMetadata struct { + Format struct { + FormatName string `json:"format_name"` + BitRate string `json:"bit_rate"` + Duration string `json:"duration"` + Size string `json:"size"` + } `json:"format"` + Streams []struct { + CodecType string `json:"codec_type"` + CodecName string `json:"codec_name"` + Width int `json:"width"` + Height int `json:"height"` + Duration string `json:"duration"` + BitRate string `json:"bit_rate"` + AvgFrameRate string `json:"avg_frame_rate"` + RFrameRate string `json:"r_frame_rate"` + ColorTransfer string `json:"color_transfer"` + ColorPrimaries string `json:"color_primaries"` + ColorSpace string `json:"color_space"` + SampleRate string `json:"sample_rate"` + Channels int `json:"channels"` + } `json:"streams"` +} + func materializeLocalInput(ctx *core.ExecContext, value string) (LocalInputRef, error) { localPath, err := core.MaterializeInput(ctx, value) if err != nil { @@ -273,6 +316,362 @@ func buildMuxAudioVideoPlan(ctx *core.ExecContext) (*FFmpegPlan, error) { return &FFmpegPlan{Args: args, Result: videoResponse("mux-audio-video", out, []LocalInputRef{video, audio}, warnings, map[string]any{})}, nil } +func buildFadeVideoAudioPlan(ctx *core.ExecContext) (*FFmpegPlan, error) { + warnings := []LocalWarning{} + warnIfLocalNoop(ctx.Params, "callback_args", &warnings) + warnIfLocalNoop(ctx.Params, "client_token", &warnings) + + videoURL, err := requiredStringParam(ctx.Params, "video_url") + if err != nil { + return nil, err + } + input, err := materializeLocalInput(ctx, videoURL) + if err != nil { + return nil, err + } + if !hasAudioStream(input.LocalPath) { + return nil, fmt.Errorf("fade-video-audio 需要输入视频包含音频流") + } + fadeIn, hasFadeIn, err := optionalFloatParam(ctx.Params, "fade_in_duration") + if err != nil { + return nil, err + } + if !hasFadeIn { + fadeIn = 1 + } + fadeOut, hasFadeOut, err := optionalFloatParam(ctx.Params, "fade_out_duration") + if err != nil { + return nil, err + } + if !hasFadeOut { + fadeOut = 1 + } + if fadeIn < 0 { + return nil, fmt.Errorf("fade_in_duration 必须大于等于 0") + } + if fadeOut < 0 { + return nil, fmt.Errorf("fade_out_duration 必须大于等于 0") + } + out, err := outputPath(ctx, "fade-video-audio", ".mp4") + if err != nil { + return nil, err + } + if fadeIn == 0 && fadeOut == 0 { + args := []string{"-y", "-hide_banner", "-i", input.LocalPath, "-map", "0:v:0", "-map", "0:a:0", "-c", "copy", out} + return &FFmpegPlan{Args: args, Result: videoResponse("fade-video-audio", out, []LocalInputRef{input}, warnings, map[string]any{"fade_in_duration": fadeIn, "fade_out_duration": fadeOut})}, nil + } + duration, err := mediaDuration(input.LocalPath) + if err != nil { + return nil, err + } + filter := audioFadeFilter(fadeIn, fadeOut, duration) + args := []string{"-y", "-hide_banner", "-i", input.LocalPath, "-filter_complex", "[0:a]" + filter + "[outa]", "-map", "0:v:0", "-map", "[outa]", "-c:v", "copy", "-c:a", "aac", "-b:a", "128k", out} + return &FFmpegPlan{Args: args, Result: videoResponse("fade-video-audio", out, []LocalInputRef{input}, warnings, map[string]any{"fade_in_duration": fadeIn, "fade_out_duration": fadeOut})}, nil +} + +func buildAdjustVideoVolumePlan(ctx *core.ExecContext) (*FFmpegPlan, error) { + warnings := []LocalWarning{} + warnIfLocalNoop(ctx.Params, "callback_args", &warnings) + warnIfLocalNoop(ctx.Params, "client_token", &warnings) + + videoURL, err := requiredStringParam(ctx.Params, "video_url") + if err != nil { + return nil, err + } + input, err := materializeLocalInput(ctx, videoURL) + if err != nil { + return nil, err + } + if !hasAudioStream(input.LocalPath) { + return nil, fmt.Errorf("adjust-video-volume 需要输入视频包含音频流") + } + volume, hasVolume, err := optionalFloatParam(ctx.Params, "volume") + if err != nil { + return nil, err + } + if !hasVolume { + volume = 1 + } + if volume < 0 || volume > 4 { + return nil, fmt.Errorf("volume 取值范围为 0 到 4") + } + out, err := outputPath(ctx, "adjust-video-volume", ".mp4") + if err != nil { + return nil, err + } + if volume == 1 { + args := []string{"-y", "-hide_banner", "-i", input.LocalPath, "-map", "0:v:0", "-map", "0:a:0", "-c", "copy", out} + return &FFmpegPlan{Args: args, Result: videoResponse("adjust-video-volume", out, []LocalInputRef{input}, warnings, map[string]any{"volume": volume})}, nil + } + args := []string{"-y", "-hide_banner", "-i", input.LocalPath, "-filter_complex", "[0:a]volume=" + formatFloat(volume) + "[outa]", "-map", "0:v:0", "-map", "[outa]", "-c:v", "copy", "-c:a", "aac", "-b:a", "128k", out} + return &FFmpegPlan{Args: args, Result: videoResponse("adjust-video-volume", out, []LocalInputRef{input}, warnings, map[string]any{"volume": volume})}, nil +} + +func buildFadeAudioPlan(ctx *core.ExecContext) (*FFmpegPlan, error) { + warnings := []LocalWarning{} + warnIfLocalNoop(ctx.Params, "callback_args", &warnings) + warnIfLocalNoop(ctx.Params, "client_token", &warnings) + + audioURL, err := requiredStringParam(ctx.Params, "audio_url") + if err != nil { + return nil, err + } + input, err := materializeLocalInput(ctx, audioURL) + if err != nil { + return nil, err + } + fadeIn, hasFadeIn, err := optionalFloatParam(ctx.Params, "fade_in_duration") + if err != nil { + return nil, err + } + if !hasFadeIn { + fadeIn = 1 + } + fadeOut, hasFadeOut, err := optionalFloatParam(ctx.Params, "fade_out_duration") + if err != nil { + return nil, err + } + if !hasFadeOut { + fadeOut = 1 + } + if fadeIn < 0 { + return nil, fmt.Errorf("fade_in_duration 必须大于等于 0") + } + if fadeOut < 0 { + return nil, fmt.Errorf("fade_out_duration 必须大于等于 0") + } + out, err := outputPath(ctx, "fade-audio", ".mp3") + if err != nil { + return nil, err + } + if fadeIn == 0 && fadeOut == 0 { + args := []string{"-y", "-hide_banner", "-i", input.LocalPath, "-vn", "-c:a", "libmp3lame", "-b:a", "128k", out} + return &FFmpegPlan{Args: args, Result: audioResponse("fade-audio", out, []LocalInputRef{input}, warnings, map[string]any{"fade_in_duration": fadeIn, "fade_out_duration": fadeOut})}, nil + } + duration, err := mediaDuration(input.LocalPath) + if err != nil { + return nil, err + } + filter := audioFadeFilter(fadeIn, fadeOut, duration) + args := []string{"-y", "-hide_banner", "-i", input.LocalPath, "-filter_complex", "[0:a]" + filter + "[outa]", "-map", "[outa]", "-vn", "-c:a", "libmp3lame", "-b:a", "128k", out} + return &FFmpegPlan{Args: args, Result: audioResponse("fade-audio", out, []LocalInputRef{input}, warnings, map[string]any{"fade_in_duration": fadeIn, "fade_out_duration": fadeOut})}, nil +} + +func buildAdjustAudioSpeedPlan(ctx *core.ExecContext) (*FFmpegPlan, error) { + warnings := []LocalWarning{} + warnIfLocalNoop(ctx.Params, "callback_args", &warnings) + warnIfLocalNoop(ctx.Params, "client_token", &warnings) + + audioURL, err := requiredStringParam(ctx.Params, "audio_url") + if err != nil { + return nil, err + } + input, err := materializeLocalInput(ctx, audioURL) + if err != nil { + return nil, err + } + speed, hasSpeed, err := optionalFloatParam(ctx.Params, "speed") + if err != nil { + return nil, err + } + if !hasSpeed { + speed = 1 + } + if speed < 0.1 || speed > 4 { + return nil, fmt.Errorf("speed 取值范围为 0.1 到 4") + } + out, err := outputPath(ctx, "adjust-audio-speed", ".m4a") + if err != nil { + return nil, err + } + if speed == 1 { + args := []string{"-y", "-hide_banner", "-i", input.LocalPath, "-vn", "-c:a", "aac", "-b:a", "128k", out} + return &FFmpegPlan{Args: args, Result: audioResponse("adjust-audio-speed", out, []LocalInputRef{input}, warnings, map[string]any{"speed": speed})}, nil + } + args := []string{"-y", "-hide_banner", "-i", input.LocalPath, "-filter_complex", "[0:a]" + atempoChain(speed) + "[outa]", "-map", "[outa]", "-vn", "-c:a", "aac", "-b:a", "128k", out} + return &FFmpegPlan{Args: args, Result: audioResponse("adjust-audio-speed", out, []LocalInputRef{input}, warnings, map[string]any{"speed": speed})}, nil +} + +func buildMixAudioPlan(ctx *core.ExecContext) (*FFmpegPlan, error) { + warnings := []LocalWarning{} + warnIfLocalNoop(ctx.Params, "callback_args", &warnings) + warnIfLocalNoop(ctx.Params, "client_token", &warnings) + + urls, err := requiredStringListParam(ctx.Params, "audio_urls") + if err != nil { + return nil, err + } + if len(urls) > 100 { + return nil, fmt.Errorf("audio_urls 最多支持 100 个元素") + } + inputs := make([]LocalInputRef, 0, len(urls)) + for _, url := range urls { + input, err := materializeLocalInput(ctx, url) + if err != nil { + return nil, err + } + inputs = append(inputs, input) + } + out, err := outputPath(ctx, "mix-audio", ".mp3") + if err != nil { + return nil, err + } + args := []string{"-y", "-hide_banner"} + filterInputs := make([]string, 0, len(inputs)) + for index, input := range inputs { + args = append(args, "-i", input.LocalPath) + filterInputs = append(filterInputs, fmt.Sprintf("[%d:a]", index)) + } + filter := strings.Join(filterInputs, "") + fmt.Sprintf("amix=inputs=%d:duration=longest:dropout_transition=0[outa]", len(inputs)) + args = append(args, "-filter_complex", filter, "-map", "[outa]", "-vn", "-c:a", "libmp3lame", "-b:a", "128k", out) + return &FFmpegPlan{Args: args, Result: audioResponse("mix-audio", out, inputs, warnings, map[string]any{"input_count": len(inputs)})}, nil +} + +func buildProbeAudioMetadataResult(ctx *core.ExecContext) (map[string]any, error) { + warnings := []LocalWarning{} + warnIfLocalNoop(ctx.Params, "callback_args", &warnings) + warnIfLocalNoop(ctx.Params, "client_token", &warnings) + + audioURL, err := requiredStringParam(ctx.Params, "audio_url") + if err != nil { + return nil, err + } + input, err := materializeLocalInput(ctx, audioURL) + if err != nil { + return nil, err + } + metadata, err := probeAudioMetadata(input.LocalPath) + if err != nil { + return nil, err + } + md5Value, _ := fileMD5(input.LocalPath) + formatMeta := map[string]any{ + "md5": nullableString(md5Value), + "container": nullableString(metadata.Format.FormatName), + "bitrate": nullableFloatString(metadata.Format.BitRate), + "duration": nullableFloatString(metadata.Format.Duration), + "size": nullableFloatString(metadata.Format.Size), + } + var audioStreamMeta any + if len(metadata.Streams) > 0 { + stream := metadata.Streams[0] + audioStreamMeta = map[string]any{ + "codec": nullableString(stream.CodecName), + "duration": nullableFloatString(stream.Duration), + "sample_rate": nullableFloatString(stream.SampleRate), + "bitrate": nullableFloatString(stream.BitRate), + "channels": nullableInt(stream.Channels), + } + } + return map[string]any{ + "format_meta": formatMeta, + "audio_stream_meta": audioStreamMeta, + }, nil +} + +func buildProbeVideoMetadataResult(ctx *core.ExecContext) (map[string]any, error) { + warnings := []LocalWarning{} + warnIfLocalNoop(ctx.Params, "callback_args", &warnings) + warnIfLocalNoop(ctx.Params, "client_token", &warnings) + + videoURL, err := requiredStringParam(ctx.Params, "video_url") + if err != nil { + return nil, err + } + input, err := materializeLocalInput(ctx, videoURL) + if err != nil { + return nil, err + } + metadata, err := probeMediaMetadata(input.LocalPath) + if err != nil { + return nil, err + } + md5Value, _ := fileMD5(input.LocalPath) + formatMeta := map[string]any{ + "md5": nullableString(md5Value), + "container": nullableString(metadata.Format.FormatName), + "bitrate": nullableFloatString(metadata.Format.BitRate), + "duration": nullableFloatString(metadata.Format.Duration), + "size": nullableFloatString(metadata.Format.Size), + } + var videoStreamMeta any + var audioStreamMeta any + for _, stream := range metadata.Streams { + switch stream.CodecType { + case "video": + if videoStreamMeta == nil { + videoStreamMeta = map[string]any{ + "codec": nullableString(stream.CodecName), + "width": nullableInt(stream.Width), + "height": nullableInt(stream.Height), + "duration": nullableFloatString(stream.Duration), + "bitrate": nullableFloatString(stream.BitRate), + "fps": nullableFrameRate(stream.AvgFrameRate, stream.RFrameRate), + "dynamic_range": dynamicRange(stream.ColorTransfer, stream.ColorPrimaries, stream.ColorSpace), + } + } + case "audio": + if audioStreamMeta == nil { + audioStreamMeta = map[string]any{ + "codec": nullableString(stream.CodecName), + "duration": nullableFloatString(stream.Duration), + "sample_rate": nullableFloatString(stream.SampleRate), + "bitrate": nullableFloatString(stream.BitRate), + "channels": nullableInt(stream.Channels), + } + } + } + } + return map[string]any{ + "format_meta": formatMeta, + "video_stream_meta": videoStreamMeta, + "audio_stream_meta": audioStreamMeta, + }, nil +} + +func buildMatteGreenscreenVideoResult(ctx *core.ExecContext) (map[string]any, error) { + warnings := []LocalWarning{} + warnIfLocalNoop(ctx.Params, "callback_args", &warnings) + warnIfLocalNoop(ctx.Params, "client_token", &warnings) + + videoURL, err := requiredStringParam(ctx.Params, "video_url") + if err != nil { + return nil, err + } + input, err := materializeLocalInput(ctx, videoURL) + if err != nil { + return nil, err + } + format := strings.ToUpper(valueOrDefault(ctx.Params, "format", "WEBM")) + if format != "MOV" { + return nil, fmt.Errorf("matte-greenscreen-video 本地模式仅支持 --format MOV;WEBM 透明输出请使用 cloud 模式") + } + out, err := outputPath(ctx, "matte-greenscreen-video", ".mov") + if err != nil { + return nil, err + } + args := []string{ + "-y", + "-hide_banner", + "-i", input.LocalPath, + "-vf", "chromakey=0x00ff00:0.18:0.08,format=yuva444p10le", + "-map", "0:v:0", + "-an", + "-c:v", "prores_ks", + "-profile:v", "4", + "-pix_fmt", "yuva444p10le", + out, + } + if _, err := core.RunFFmpeg(args...); err != nil { + return nil, err + } + result := map[string]any{"video_url": out} + if duration, err := mediaDuration(out); err == nil { + result["duration"] = duration + } + return result, nil +} + func buildExtractAudioPlan(ctx *core.ExecContext) (*FFmpegPlan, error) { warnings := []LocalWarning{} warnIfLocalNoop(ctx.Params, "callback_args", &warnings) @@ -399,6 +798,144 @@ func hasAudioStream(filePath string) bool { return strings.TrimSpace(string(raw)) == "audio" } +func mediaDuration(filePath string) (float64, error) { + info, err := probeMediaInfo(filePath) + if err != nil { + return 0, err + } + duration, err := strconv.ParseFloat(strings.TrimSpace(info.Format.Duration), 64) + if err != nil || duration <= 0 { + return 0, fmt.Errorf("无法读取输入媒体时长") + } + return duration, nil +} + +func audioFadeFilter(fadeIn float64, fadeOut float64, duration float64) string { + filters := []string{} + if fadeIn > 0 { + filters = append(filters, "afade=t=in:st=0:d="+formatFloat(fadeIn)) + } + if fadeOut > 0 { + start := duration - fadeOut + if start < 0 { + start = 0 + } + filters = append(filters, "afade=t=out:st="+formatFloat(start)+":d="+formatFloat(fadeOut)) + } + return strings.Join(filters, ",") +} + +func probeAudioMetadata(filePath string) (ffprobeAudioMetadata, error) { + var metadata ffprobeAudioMetadata + raw, err := core.RunFFprobe( + "-v", "error", + "-select_streams", "a:0", + "-show_entries", "format=format_name,bit_rate,duration,size:stream=codec_name,duration,sample_rate,bit_rate,channels", + "-of", "json", + filePath, + ) + if err != nil { + return metadata, err + } + if err := json.Unmarshal(raw, &metadata); err != nil { + return metadata, err + } + return metadata, nil +} + +func probeMediaMetadata(filePath string) (ffprobeMediaMetadata, error) { + var metadata ffprobeMediaMetadata + raw, err := core.RunFFprobe( + "-v", "error", + "-show_entries", "format=format_name,bit_rate,duration,size:stream=codec_type,codec_name,width,height,duration,bit_rate,avg_frame_rate,r_frame_rate,color_transfer,color_primaries,color_space,sample_rate,channels", + "-of", "json", + filePath, + ) + if err != nil { + return metadata, err + } + if err := json.Unmarshal(raw, &metadata); err != nil { + return metadata, err + } + return metadata, nil +} + +func fileMD5(filePath string) (string, error) { + file, err := os.Open(filePath) + if err != nil { + return "", err + } + defer file.Close() + hash := md5.New() + if _, err := io.Copy(hash, file); err != nil { + return "", err + } + return hex.EncodeToString(hash.Sum(nil)), nil +} + +func nullableString(value string) any { + value = strings.TrimSpace(value) + if value == "" { + return nil + } + return value +} + +func nullableFloatString(value string) any { + value = strings.TrimSpace(value) + if value == "" || strings.EqualFold(value, "N/A") { + return nil + } + parsed, err := strconv.ParseFloat(value, 64) + if err != nil { + return nil + } + return parsed +} + +func nullableInt(value int) any { + if value <= 0 { + return nil + } + return value +} + +func nullableFrameRate(primary string, fallback string) any { + if value := parseFrameRate(primary); value != nil { + return value + } + return parseFrameRate(fallback) +} + +func parseFrameRate(value string) any { + value = strings.TrimSpace(value) + if value == "" || value == "0/0" || strings.EqualFold(value, "N/A") { + return nil + } + parts := strings.Split(value, "/") + if len(parts) == 2 { + numerator, numErr := strconv.ParseFloat(parts[0], 64) + denominator, denErr := strconv.ParseFloat(parts[1], 64) + if numErr != nil || denErr != nil || denominator == 0 { + return nil + } + return numerator / denominator + } + return nullableFloatString(value) +} + +func dynamicRange(colorTransfer string, colorPrimaries string, colorSpace string) any { + value := strings.ToLower(strings.Join([]string{colorTransfer, colorPrimaries, colorSpace}, " ")) + value = strings.TrimSpace(value) + if value == "" { + return nil + } + if strings.Contains(value, "smpte2084") || strings.Contains(value, "arib-std-b67") || strings.Contains(value, "bt2020") { + return "HDR" + } + return "SDR" +} + func atempoChain(speed float64) string { parts := []string{} for speed > 2.0 { diff --git a/internal/local/generated/registry.go b/internal/local/generated/registry.go index e4d3344..9768622 100644 --- a/internal/local/generated/registry.go +++ b/internal/local/generated/registry.go @@ -34,6 +34,54 @@ func Registrations() []core.Registration { Dependencies: []string{"ffmpeg"}, Handler: NewFFmpegHandler(buildMuxAudioVideoPlan), }, + { + Command: "fade-video-audio", + Source: "generated", + Dependencies: []string{"ffmpeg"}, + Handler: NewFFmpegHandler(buildFadeVideoAudioPlan), + }, + { + Command: "adjust-video-volume", + Source: "generated", + Dependencies: []string{"ffmpeg"}, + Handler: NewFFmpegHandler(buildAdjustVideoVolumePlan), + }, + { + Command: "fade-audio", + Source: "generated", + Dependencies: []string{"ffmpeg", "libmp3lame"}, + Handler: NewFFmpegHandler(buildFadeAudioPlan), + }, + { + Command: "adjust-audio-speed", + Source: "generated", + Dependencies: []string{"ffmpeg"}, + Handler: NewFFmpegHandler(buildAdjustAudioSpeedPlan), + }, + { + Command: "mix-audio", + Source: "generated", + Dependencies: []string{"ffmpeg", "libmp3lame"}, + Handler: NewFFmpegHandler(buildMixAudioPlan), + }, + { + Command: "probe-audio-metadata", + Source: "generated", + Dependencies: []string{"ffprobe"}, + Handler: core.HandlerFunc(buildProbeAudioMetadataResult), + }, + { + Command: "probe-video-metadata", + Source: "generated", + Dependencies: []string{"ffprobe"}, + Handler: core.HandlerFunc(buildProbeVideoMetadataResult), + }, + { + Command: "matte-greenscreen-video", + Source: "generated", + Dependencies: []string{"ffmpeg", "prores_ks"}, + Handler: core.HandlerFunc(buildMatteGreenscreenVideoResult), + }, { Command: "concat-video", Source: "generated", diff --git a/internal/modes/resolver.go b/internal/modes/resolver.go index 43280ce..69240f2 100644 --- a/internal/modes/resolver.go +++ b/internal/modes/resolver.go @@ -214,6 +214,8 @@ func localDependencyInstallHint(dep string) string { return "请安装或切换到包含 concat demuxer 的 FFmpeg" case "libmp3lame": return "请安装或切换到包含 libmp3lame 编码器的 FFmpeg" + case "prores_ks": + return "请安装或切换到包含 prores_ks 编码器的 FFmpeg" case "libass": return "请安装或切换到包含 subtitles/ass 字幕滤镜的 FFmpeg" case "libfreetype", "libfontconfig", "libfribidi", "libharfbuzz": diff --git a/mediakit-cli b/mediakit-cli new file mode 100755 index 0000000..9a75793 Binary files /dev/null and b/mediakit-cli differ diff --git a/package.json b/package.json index ea497a0..80a125b 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "@volcengine/mediakit-cli", - "version": "0.1.4", + "version": "0.1.5", "description": "MediaKit CLI with multi-platform binary distribution via npm", "license": "MIT", "bin": { diff --git a/skills/byted-mediakit-audio/LICENSE b/skills/byted-mediakit-audio/LICENSE new file mode 100644 index 0000000..c1edc01 --- /dev/null +++ b/skills/byted-mediakit-audio/LICENSE @@ -0,0 +1,24 @@ +# The MIT License (MIT) + +Copyright © 2025 Beijing Volcano Engine Technology Ltd. + +Permission is hereby granted, free of charge, to any person +obtaining a copy of this software and associated documentation +files (the "Software"), to deal in the Software without +restriction, including without limitation the rights to use, +copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the +Software is furnished to do so, subject to the following +conditions: + +The above copyright notice and this permission notice shall be +included in all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +OTHER DEALINGS IN THE SOFTWARE \ No newline at end of file diff --git a/skills/byted-mediakit-audio/SKILL.md b/skills/byted-mediakit-audio/SKILL.md new file mode 100644 index 0000000..bf93ed1 --- /dev/null +++ b/skills/byted-mediakit-audio/SKILL.md @@ -0,0 +1,27 @@ +--- +name: byted-mediakit-audio +version: "1.0.0" +license: "MIT" +description: "音频处理,涵盖音频处理和增强、内容理解等能力。包含能力:separate-voice, probe-audio-metadata。当用户需要使用 audio 域的 MediaKit CLI 能力时触发。" +permissions: + - shell +metadata: + requires: + bins: ["mediakit-cli"] + cliHelp: "mediakit-cli audio --help" + product: mediakit-cli/skills + domain: audio + capability_count: 2 +--- +# Audio Skills + +## 前置说明 + +开始前必须先读取 `./reference/shared.md` 的内容,其中包含前置检查、异步任务机制、结果查询等说明。 + +## 工具列表 + +| 工具 | 说明 | 参数声明 | 参考文档 | +|------|------|----------|----------| +| separate-voice | 将音频中的人声与背景音精准分离,输出为两个独立的音轨文件 | `video_url?:string, audio_url?:string, callback_args?:string, client_token?:string` | [reference/separate-voice.md](reference/separate-voice.md) | +| probe-audio-metadata | 获取指定音频的详细元信息,输出容器层信息与音频流元信息 | `audio_url:string, callback_args?:string, client_token?:string` | [reference/probe-audio-metadata.md](reference/probe-audio-metadata.md) | diff --git a/skills/byted-mediakit-audio/reference/probe-audio-metadata.md b/skills/byted-mediakit-audio/reference/probe-audio-metadata.md new file mode 100644 index 0000000..100dbd4 --- /dev/null +++ b/skills/byted-mediakit-audio/reference/probe-audio-metadata.md @@ -0,0 +1,46 @@ +# 音频元信息获取 + +## 能力描述 +获取指定音频的详细元信息,输出容器层信息(format_meta)与音频流元信息(audio_stream_meta)。 +字段分类参考 ffprobe,并对 VOD 原始返回做精简与统一。 +使用限制:支持公网 HTTP/HTTPS URL + +## 执行方式 + +| 项目 | 说明 | +|------|------| +| Domain | `audio` | +| Tool | `probe-audio-metadata` | +| 是否异步 | `是` | +| 是否支持 local | `是` | +| 模式说明 | 支持 local / cloud;可通过 `--local` 或 `--cloud` 覆盖当前命令 | +| 幂等行为 | 如命令支持 `client_token` 与 `callback_args`,重试时复用同一组值;强制重跑时更换新的 `client_token` | + +## 参数 +| 参数 | CLI flag | 类型 | 必填 | 默认值 | 说明 | +|------|----------|------|------|--------|------| +| audio_url | `--audio-url` | string | 是 | - | 输入音频。待探测的音频 | +| callback_args | `--callback-args` | string | 否 | - | 可选,回调参数 | +| client_token | `--client-token` | string | 否 | - | 可选,用于幂等,默认幂等,用户可根据需求进行调整 | + +## 调用示例 +```bash +mediakit-cli audio probe-audio-metadata \ + --audio-url https://example.com/audio_url \ + --callback-args sample-callback-args \ + --client-token demo-client-token +``` + +## 输出格式 +```json +{ + "task_id": "task_demo_001", + "request_id": "req_demo_001" +} +``` + +## 任务结果查询 +提交成功后会返回 `task_id`,再执行 `mediakit-cli shared query-task --task-id ` 查询。 + +- 当前命令:`mediakit-cli audio probe-audio-metadata` +- 推荐查询:`mediakit-cli shared query-task --task-id ` diff --git a/skills/byted-mediakit-audio/reference/separate-voice.md b/skills/byted-mediakit-audio/reference/separate-voice.md new file mode 100644 index 0000000..e9bf1e9 --- /dev/null +++ b/skills/byted-mediakit-audio/reference/separate-voice.md @@ -0,0 +1,48 @@ +# 人声背景音分离 + +## 能力描述 +将音频中的人声与背景音精准分离,输出为两个独立的音轨文件。 +支持格式:主流音视频格式(如mp4、mov、mp3、m4a、wav等)。 +输入:video_url 和 audio_url 二选一。 +输出格式:AAC。 + +## 执行方式 + +| 项目 | 说明 | +|------|------| +| Domain | `audio` | +| Tool | `separate-voice` | +| 是否异步 | `是` | +| 是否支持 local | `否` | +| 模式说明 | cloud only;可通过 `--cloud` 强制当前调用 | +| 幂等行为 | 如命令支持 `client_token` 与 `callback_args`,重试时复用同一组值;强制重跑时更换新的 `client_token` | + +## 参数 +| 参数 | CLI flag | 类型 | 必填 | 默认值 | 说明 | +|------|----------|------|------|--------|------| +| video_url | `--video-url` | string | 否 | - | 输入视频 Url(需公网可访问),与audio_url二选一,都存在时优先取video_url | +| audio_url | `--audio-url` | string | 否 | - | 输入音频 Url(需公网可访问),与video_url二选一,不能都为空 | +| callback_args | `--callback-args` | string | 否 | - | 可选,回调参数 | +| client_token | `--client-token` | string | 否 | - | 可选,用于幂等,默认幂等,用户可根据需求进行调整 | + +## 调用示例 +```bash +mediakit-cli audio separate-voice \ + --video-url https://example.com/video_url \ + --callback-args sample-callback-args \ + --client-token demo-client-token +``` + +## 输出格式 +```json +{ + "task_id": "task_demo_001", + "request_id": "req_demo_001" +} +``` + +## 任务结果查询 +提交成功后会返回 `task_id`,再执行 `mediakit-cli shared query-task --task-id ` 查询。 + +- 当前命令:`mediakit-cli audio separate-voice` +- 推荐查询:`mediakit-cli shared query-task --task-id ` diff --git a/skills/byted-mediakit-audio/reference/shared.md b/skills/byted-mediakit-audio/reference/shared.md new file mode 100644 index 0000000..4315f8e --- /dev/null +++ b/skills/byted-mediakit-audio/reference/shared.md @@ -0,0 +1,202 @@ +# MediaKit 共享规则 + +本技能指导你如何通过 mediakit-cli 操作媒体资源,以及调用过程中的通用规则和注意事项。 + +## 前置检查 + +### 依赖安装 + +首次使用前,确认 CLI 已安装: + +```bash +# 安装 +npm install -g @volcengine/mediakit-cli + +# 验证 +mediakit-cli --version +``` + +### 鉴权信息检查 + +优先级:环境变量 > 配置文件(文件路径 `~/.mediakit/config.json`) + +#### 字段说明 + +- 环境变量/配置文件:`MEDIAKIT_API_KEY`、`MEDIAKIT_ENDPOINT`、`MEDIAKIT_SURFACE`、`MEDIAKIT_RUNTIME` + +| 变量 | 必填 | 说明 | +|------|------|------| +| `MEDIAKIT_API_KEY` | 云端模式必填 | API 认证 Token | +| `MEDIAKIT_ENDPOINT` | 否 | API 访问点 | +| `MEDIAKIT_SURFACE` | 否 | 请求来源 Header `x-surface`;默认 `cli`,Skill 建议 `skill`,Plugin 建议 `plugin`,最终上报 `cli/skill` 或 `cli/plugin` | +| `MEDIAKIT_RUNTIME` | 否 | 请求来源 Header `x-runtime`;按宿主设置为 `claude`、`arkclaw` 等,未配置时回退环境探测或 `unknown` | + +任一必填项缺失时,终止执行并输出所有缺失项的列表及修复建议。 + +云端调用会自动携带 `x-surface` / `x-runtime`。Header 优先级为:环境变量 > `~/.mediakit/config.json` > 默认值/环境探测。当本 Skill/Plugin 通过 `mediakit-cli` 调用云端能力时,运行环境应注入 `MEDIAKIT_SURFACE=skill|plugin` 与 `MEDIAKIT_RUNTIME=<宿主>`;CLI 会保留原始产物前缀并上报 `x-surface=cli/skill|cli/plugin`。若未显式配置,CLI 默认按 `x-surface=cli`,`x-runtime` 依次回退 `IDENTITY_NAME` / `OPENCLAW_SERVICE_MARKER` 环境探测,最后为 `unknown`。 + +### 来源上报约束 + +- Skill 调用 `mediakit-cli` 时,必须显式设置 `MEDIAKIT_SURFACE=skill`,不能依赖用户已有环境变量。 +- Plugin 调用 `mediakit-cli` 时,必须显式设置 `MEDIAKIT_SURFACE=plugin`,不能复用 Skill 的取值。 +- 宿主环境标识建议同时显式设置 `MEDIAKIT_RUNTIME=<宿主>`;若未设置,CLI 会回退为环境探测值或 `unknown`。 + +```bash +MEDIAKIT_SURFACE=skill MEDIAKIT_RUNTIME= mediakit-cli editing add-image-to-video + +MEDIAKIT_SURFACE=plugin MEDIAKIT_RUNTIME= mediakit-cli editing add-image-to-video +``` + +## CLI 使用方式 + +### 初始化配置 + +首次使用建议先运行初始化向导: + +```bash +mediakit-cli init +``` + +Agent 非交互初始化可显式写入请求来源与运行时配置: + +```bash +mediakit-cli init --mode cloud-first --api-key --runtime --surface cli --yes +mediakit-cli init --mode local-first --api-key --endpoint --output-path ~/mediakit-output --runtime --surface cli --credential-store config --yes +``` + +初始化后常用命令如下: + +```bash +# 查看当前配置 +mediakit-cli config show + +# 切换默认模式到本地优先 +mediakit-cli config set mode local-first + +# 切换默认模式到云端优先 +mediakit-cli config set mode cloud-first + +# 刷新环境检查并查看依赖状态 +mediakit-cli doctor +``` + +### 命令结构 + +MediaKit CLI 统一使用 `domain + tool` 的调用方式: + +```bash +mediakit-cli {domain} {tool} [flags] +``` + +常见帮助命令: + +```bash +# 查看所有 domain +mediakit-cli --domains + +# 查看某个分组下的工具列表 +mediakit-cli {domain} --help + +# 查看具体工具的参数 +mediakit-cli {domain} {tool} --help + +# 动态发现工具能力与返回结构 +mediakit-cli {domain} {tool} --schema +mediakit-cli --local {domain} {tool} --schema +``` + +当前产物覆盖的 domain 包括:`editing`, `video`, `audio`, `image`。 + +### Schema 发现 + +每个 capability 命令都支持 `--schema`,用于 Agent 动态读取工具能力,不要求传必填业务参数。 + +返回结构包含: + +- `name`:工具名,使用 snake_case,如 `add_image_to_video` +- `description`:工具描述,自动包含 `Mode` 与 `Async` 信息 +- `input_schema`:输入参数 JSON Schema +- `output_schema`:当前执行模式下的返回结构 + +输出区分规则: + +- 默认按全局 `mode` 配置解析返回面 +- `--local ... --schema` 输出本地模式返回面,本地模式直接返回最终结果字段 +- 云端异步工具输出 `task_id` / `request_id`,并在 `final_result` 中描述 `query-task` 完成态结果 +- `query-task` 是 cloud only,schema 描述任务状态与完成态结果 + +示例: + +```bash +mediakit-cli editing trim-video --schema +mediakit-cli --local editing trim-video --schema +``` + +### 单次调用模式覆盖 + +除 `config set mode` 设置默认模式外,还支持仅对当前命令生效的临时覆盖: + +```bash +mediakit-cli --local editing add-image-to-video + +mediakit-cli --cloud editing add-image-to-video +``` + +补充规则: + +- `--local` / `--cloud` 只影响当前命令,不修改全局 `config.mode` +- `--local` 与 `--cloud` 互斥,不能同时传入 + +## 异步任务 + +提交异步媒体处理任务成功后会返回 `task_id` 字段。通过 `shared query-task` 命令查询结果。 + +```bash +mediakit-cli shared query-task --task-id +``` + +## local / cloud 约束 + +- `query-task` 是 **cloud only** 工具 +- local 模式下不支持 query-task +- 当前本轮能力以云端执行为主;如需显式声明,请优先使用 `--cloud` + +### Cloud 模式媒体输入补充 + +- 当命令以 `--cloud` 或 `cloud-first` 策略执行时,媒体输入参数(如 `video_url`、`audio_url`、`image_url`、`subtitle_url`、`sub_image_url` 及对应数组/对象子字段)可传入 `http://` / `https://` URL、`mediakit://...` file_id 或本地文件路径 +- `http://` / `https://` URL 与 `mediakit://...` file_id 会原样提交;本地文件路径会由 CLI 先上传为 `mediakit://...` file_id,再提交给云端工具 +- 各工具 reference 中的参数说明来自 APIHub/OpenAPI 原始字段描述;若其中写有公网 URL 或 HTTP/HTTPS URL,表示云端 API 最终接收的资源形态,不限制 CLI cloud 模式的本地路径预处理能力 + +### Local 模式补充 + +- 本地输出目录优先级:`--output-path` > `MEDIAKIT_OUTPUT_PATH` > config `output_path` > `~/.mediakit/temp` +- 当 `--output-path` 指向具体媒体文件名时,直接作为最终输出文件;否则按输入文件名生成 `{原文件名}_{工具名}.{ext}`,重复时追加 6 位随机数 +- 无法从输入 URL 或路径提取文件名时,退回 `{工具名}-{UnixNano}.{ext}` +- local 模式依赖 `ffmpeg` / `ffprobe`,缺失时错误中会给出 `install_guide` +- local 模式媒体处理输出必须贴合接口 response schema,禁止输出内部执行元数据 + +### 错误响应 + +- CLI cloud 模式直接透传 API 返回的原始 error 对象,不提取 `message` +- CLI local 模式返回结构化错误:`{"error":{"type":"...","code":"...","message":"..."}}` +- MCP error_response 直接透传原始 error 内容,dict 原样作为 `error` 字段值 + +## 幂等参数维护 + +| 参数 | 作用 | 维护建议 | +|------|------|----------| +| `client_token` | 主动控制幂等 | 请求重试时复用同一值;强制重新执行时传新的唯一值 | +| `callback_args` | 透传回调参数 | 建议与 `client_token` 一起维护,便于回调对账与重试追踪 | + +补充规则: + +- `client_token` 长度不超过 64 个字符 +- `callback_args` 可用于回调透传与对账追踪 + +## 轮询策略 + +| 参数 | 描述 | 默认值 | +|------|------|--------| +| `poll-interval-seconds` | 轮询间隔 | 10s | +| `max-poll-attempts` | 轮询次数,0 代表不查询 | 0 | +| `poll-complete` | 阻塞至终态 | - | diff --git a/skills/byted-mediakit-editing/reference/adjust-audio-speed.md b/skills/byted-mediakit-editing/reference/adjust-audio-speed.md new file mode 100644 index 0000000..2a8476b --- /dev/null +++ b/skills/byted-mediakit-editing/reference/adjust-audio-speed.md @@ -0,0 +1,46 @@ +# 音频调速 + +## 能力描述 +调整音频的播放倍速,实现快放或慢放效果。 + +## 执行方式 + +| 项目 | 说明 | +|------|------| +| Domain | `editing` | +| Tool | `adjust-audio-speed` | +| 是否异步 | `是` | +| 是否支持 local | `是` | +| 模式说明 | 支持 local / cloud;可通过 `--local` 或 `--cloud` 覆盖当前命令 | +| 幂等行为 | 如命令支持 `client_token` 与 `callback_args`,重试时复用同一组值;强制重跑时更换新的 `client_token` | + +## 参数 +| 参数 | CLI flag | 类型 | 必填 | 默认值 | 说明 | +|------|----------|------|------|--------|------| +| audio_url | `--audio-url` | string | 是 | - | 输入音频,支持 mp3、m4a、wav 等格式 | +| speed | `--speed` | number | 否 | 1 | 调整速度的倍数,Float类型,取值范围为0.1~4。0.1=放慢至原速的 0.1 倍,1=原速,4=加速至原速的 4 倍 | +| callback_args | `--callback-args` | string | 否 | - | 可选,回调参数 | +| client_token | `--client-token` | string | 否 | - | 可选,用于幂等,默认幂等,用户可根据需求进行调整 | + +## 调用示例 +```bash +mediakit-cli editing adjust-audio-speed \ + --audio-url https://example.com/audio_url \ + --speed 1 \ + --callback-args sample-callback-args \ + --client-token demo-client-token +``` + +## 输出格式 +```json +{ + "task_id": "task_demo_001", + "request_id": "req_demo_001" +} +``` + +## 任务结果查询 +提交成功后会返回 `task_id`,再执行 `mediakit-cli shared query-task --task-id ` 查询。 + +- 当前命令:`mediakit-cli editing adjust-audio-speed` +- 推荐查询:`mediakit-cli shared query-task --task-id ` diff --git a/skills/byted-mediakit-editing/reference/adjust-video-volume.md b/skills/byted-mediakit-editing/reference/adjust-video-volume.md new file mode 100644 index 0000000..ec05219 --- /dev/null +++ b/skills/byted-mediakit-editing/reference/adjust-video-volume.md @@ -0,0 +1,46 @@ +# 调整视频音量 + +## 能力描述 +调整视频音量大小,支持静音;输出 mp4,分辨率与原片一致。 + +## 执行方式 + +| 项目 | 说明 | +|------|------| +| Domain | `editing` | +| Tool | `adjust-video-volume` | +| 是否异步 | `是` | +| 是否支持 local | `是` | +| 模式说明 | 支持 local / cloud;可通过 `--local` 或 `--cloud` 覆盖当前命令 | +| 幂等行为 | 如命令支持 `client_token` 与 `callback_args`,重试时复用同一组值;强制重跑时更换新的 `client_token` | + +## 参数 +| 参数 | CLI flag | 类型 | 必填 | 默认值 | 说明 | +|------|----------|------|------|--------|------| +| video_url | `--video-url` | string | 是 | - | 输入视频,支持 mp4、mov、flv、ts、avi、wmv、mkv 等格式,最高 4K | +| volume | `--volume` | number | 否 | 1 | 音量倍数。Float 类型,取值范围 0~4。0=静音,1=原音量,4=放大 4 倍 | +| callback_args | `--callback-args` | string | 否 | - | 可选,回调参数 | +| client_token | `--client-token` | string | 否 | - | 可选,用于幂等,默认幂等,用户可根据需求进行调整 | + +## 调用示例 +```bash +mediakit-cli editing adjust-video-volume \ + --video-url https://example.com/video_url \ + --volume 1 \ + --callback-args sample-callback-args \ + --client-token demo-client-token +``` + +## 输出格式 +```json +{ + "task_id": "task_demo_001", + "request_id": "req_demo_001" +} +``` + +## 任务结果查询 +提交成功后会返回 `task_id`,再执行 `mediakit-cli shared query-task --task-id ` 查询。 + +- 当前命令:`mediakit-cli editing adjust-video-volume` +- 推荐查询:`mediakit-cli shared query-task --task-id ` diff --git a/skills/byted-mediakit-editing/reference/apply-video-filter.md b/skills/byted-mediakit-editing/reference/apply-video-filter.md new file mode 100644 index 0000000..01ad530 --- /dev/null +++ b/skills/byted-mediakit-editing/reference/apply-video-filter.md @@ -0,0 +1,46 @@ +# 视频添加滤镜 + +## 能力描述 +为视频添加指定滤镜效果,输出mp4,分辨率与原片一致。 + +## 执行方式 + +| 项目 | 说明 | +|------|------| +| Domain | `editing` | +| Tool | `apply-video-filter` | +| 是否异步 | `是` | +| 是否支持 local | `否` | +| 模式说明 | cloud only;可通过 `--cloud` 强制当前调用 | +| 幂等行为 | 如命令支持 `client_token` 与 `callback_args`,重试时复用同一组值;强制重跑时更换新的 `client_token` | + +## 参数 +| 参数 | CLI flag | 类型 | 必填 | 默认值 | 说明 | +|------|----------|------|------|--------|------| +| video_url | `--video-url` | string | 是 | - | 输入视频,支持 mp4、mov、flv、ts、avi、wmv、mkv 等格式,最高 4K | +| filter_style | `--filter-style` | string | 否 | spring | 滤镜风格。可选值:spring(春日滤镜)、sunset(晚霞滤镜)、vivid(鲜亮滤镜)、fair_skin(白皙滤镜)、food(食物滤镜) | +| callback_args | `--callback-args` | string | 否 | - | 可选,回调参数 | +| client_token | `--client-token` | string | 否 | - | 可选,用于幂等,默认幂等,用户可根据需求进行调整 | + +## 调用示例 +```bash +mediakit-cli editing apply-video-filter \ + --video-url https://example.com/video_url \ + --filter-style spring \ + --callback-args sample-callback-args \ + --client-token demo-client-token +``` + +## 输出格式 +```json +{ + "task_id": "task_demo_001", + "request_id": "req_demo_001" +} +``` + +## 任务结果查询 +提交成功后会返回 `task_id`,再执行 `mediakit-cli shared query-task --task-id ` 查询。 + +- 当前命令:`mediakit-cli editing apply-video-filter` +- 推荐查询:`mediakit-cli shared query-task --task-id ` diff --git a/skills/byted-mediakit-editing/reference/fade-audio.md b/skills/byted-mediakit-editing/reference/fade-audio.md new file mode 100644 index 0000000..e20a4c5 --- /dev/null +++ b/skills/byted-mediakit-editing/reference/fade-audio.md @@ -0,0 +1,48 @@ +# 音频声音淡入淡出 + +## 能力描述 +对输入音频实现淡入淡出效果,输出 mp3。 + +## 执行方式 + +| 项目 | 说明 | +|------|------| +| Domain | `editing` | +| Tool | `fade-audio` | +| 是否异步 | `是` | +| 是否支持 local | `是` | +| 模式说明 | 支持 local / cloud;可通过 `--local` 或 `--cloud` 覆盖当前命令 | +| 幂等行为 | 如命令支持 `client_token` 与 `callback_args`,重试时复用同一组值;强制重跑时更换新的 `client_token` | + +## 参数 +| 参数 | CLI flag | 类型 | 必填 | 默认值 | 说明 | +|------|----------|------|------|--------|------| +| audio_url | `--audio-url` | string | 是 | - | 输入音频,支持 mp3、m4a、wav、flac 等格式 | +| fade_in_duration | `--fade-in-duration` | number | 否 | 1 | 声音淡入时长。单位:秒,可传小数(最多3位小数)。0 表示不淡入 | +| fade_out_duration | `--fade-out-duration` | number | 否 | 1 | 声音淡出时长。单位:秒,可传小数(最多3位小数)。0 表示不淡出 | +| callback_args | `--callback-args` | string | 否 | - | 可选,回调参数 | +| client_token | `--client-token` | string | 否 | - | 可选,用于幂等,默认幂等,用户可根据需求进行调整 | + +## 调用示例 +```bash +mediakit-cli editing fade-audio \ + --audio-url https://example.com/audio_url \ + --fade-in-duration 1 \ + --fade-out-duration 1 \ + --callback-args sample-callback-args \ + --client-token demo-client-token +``` + +## 输出格式 +```json +{ + "task_id": "task_demo_001", + "request_id": "req_demo_001" +} +``` + +## 任务结果查询 +提交成功后会返回 `task_id`,再执行 `mediakit-cli shared query-task --task-id ` 查询。 + +- 当前命令:`mediakit-cli editing fade-audio` +- 推荐查询:`mediakit-cli shared query-task --task-id ` diff --git a/skills/byted-mediakit-editing/reference/fade-video-audio.md b/skills/byted-mediakit-editing/reference/fade-video-audio.md new file mode 100644 index 0000000..b5690cb --- /dev/null +++ b/skills/byted-mediakit-editing/reference/fade-video-audio.md @@ -0,0 +1,49 @@ +# 视频声音淡入淡出 + +## 能力描述 +对输入视频的声轨实现淡入淡出效果。 +输出 mp4,分辨率与原片一致。 + +## 执行方式 + +| 项目 | 说明 | +|------|------| +| Domain | `editing` | +| Tool | `fade-video-audio` | +| 是否异步 | `是` | +| 是否支持 local | `是` | +| 模式说明 | 支持 local / cloud;可通过 `--local` 或 `--cloud` 覆盖当前命令 | +| 幂等行为 | 如命令支持 `client_token` 与 `callback_args`,重试时复用同一组值;强制重跑时更换新的 `client_token` | + +## 参数 +| 参数 | CLI flag | 类型 | 必填 | 默认值 | 说明 | +|------|----------|------|------|--------|------| +| video_url | `--video-url` | string | 是 | - | 输入视频,支持 mp4、mov、flv、ts、avi、wmv、mkv 等格式,最高 4K | +| fade_in_duration | `--fade-in-duration` | number | 否 | 1 | 声音淡入时长。单位:秒,可传小数(最多3位小数)。0 表示不淡入 | +| fade_out_duration | `--fade-out-duration` | number | 否 | 1 | 声音淡出时长。单位:秒,可传小数(最多3位小数)。0 表示不淡出 | +| callback_args | `--callback-args` | string | 否 | - | 可选,回调参数 | +| client_token | `--client-token` | string | 否 | - | 可选,用于幂等,默认幂等,用户可根据需求进行调整 | + +## 调用示例 +```bash +mediakit-cli editing fade-video-audio \ + --video-url https://example.com/video_url \ + --fade-in-duration 1 \ + --fade-out-duration 1 \ + --callback-args sample-callback-args \ + --client-token demo-client-token +``` + +## 输出格式 +```json +{ + "task_id": "task_demo_001", + "request_id": "req_demo_001" +} +``` + +## 任务结果查询 +提交成功后会返回 `task_id`,再执行 `mediakit-cli shared query-task --task-id ` 查询。 + +- 当前命令:`mediakit-cli editing fade-video-audio` +- 推荐查询:`mediakit-cli shared query-task --task-id ` diff --git a/skills/byted-mediakit-editing/reference/mix-audio.md b/skills/byted-mediakit-editing/reference/mix-audio.md new file mode 100644 index 0000000..8093d90 --- /dev/null +++ b/skills/byted-mediakit-editing/reference/mix-audio.md @@ -0,0 +1,47 @@ +# 音频混合 + +## 能力描述 +将多个音频文件(如背景音乐、音效、人声)进行混音,生成一个新的音频文件。 +处理耗时与原片时长正相关,平均 RTF(处理耗时/原片时长)为 1。 +输出音频的时长以最长的音频为准。 +输出格式:mp3。 + +## 执行方式 + +| 项目 | 说明 | +|------|------| +| Domain | `editing` | +| Tool | `mix-audio` | +| 是否异步 | `是` | +| 是否支持 local | `是` | +| 模式说明 | 支持 local / cloud;可通过 `--local` 或 `--cloud` 覆盖当前命令 | +| 幂等行为 | 如命令支持 `client_token` 与 `callback_args`,重试时复用同一组值;强制重跑时更换新的 `client_token` | + +## 参数 +| 参数 | CLI flag | 类型 | 必填 | 默认值 | 说明 | +|------|----------|------|------|--------|------| +| audio_urls | `--audio-urls` | array | 是 | - | 输入音频列表。 CLI 传参时请使用 JSON 字符串,并用单引号包裹整个值 | +| callback_args | `--callback-args` | string | 否 | - | 可选,回调参数 | +| client_token | `--client-token` | string | 否 | - | 可选,用于幂等,默认幂等,用户可根据需求进行调整 | + +## 调用示例 +```bash +mediakit-cli editing mix-audio \ + --audio-urls '["https://example.com/a.mp3","https://example.com/b.mp3"]' \ + --callback-args sample-callback-args \ + --client-token demo-client-token +``` + +## 输出格式 +```json +{ + "task_id": "task_demo_001", + "request_id": "req_demo_001" +} +``` + +## 任务结果查询 +提交成功后会返回 `task_id`,再执行 `mediakit-cli shared query-task --task-id ` 查询。 + +- 当前命令:`mediakit-cli editing mix-audio` +- 推荐查询:`mediakit-cli shared query-task --task-id ` diff --git a/skills/byted-mediakit-editing/reference/shared.md b/skills/byted-mediakit-editing/reference/shared.md index f6b98d8..2429788 100644 --- a/skills/byted-mediakit-editing/reference/shared.md +++ b/skills/byted-mediakit-editing/reference/shared.md @@ -161,6 +161,12 @@ mediakit-cli shared query-task --task-id - local 模式下不支持 query-task - 当前本轮能力以云端执行为主;如需显式声明,请优先使用 `--cloud` +### Cloud 模式媒体输入补充 + +- 当命令以 `--cloud` 或 `cloud-first` 策略执行时,媒体输入参数(如 `video_url`、`audio_url`、`image_url`、`subtitle_url`、`sub_image_url` 及对应数组/对象子字段)可传入 `http://` / `https://` URL、`mediakit://...` file_id 或本地文件路径 +- `http://` / `https://` URL 与 `mediakit://...` file_id 会原样提交;本地文件路径会由 CLI 先上传为 `mediakit://...` file_id,再提交给云端工具 +- 各工具 reference 中的参数说明来自 APIHub/OpenAPI 原始字段描述;若其中写有公网 URL 或 HTTP/HTTPS URL,表示云端 API 最终接收的资源形态,不限制 CLI cloud 模式的本地路径预处理能力 + ### Local 模式补充 - 本地输出目录优先级:`--output-path` > `MEDIAKIT_OUTPUT_PATH` > config `output_path` > `~/.mediakit/temp` diff --git a/skills/byted-mediakit-image/LICENSE b/skills/byted-mediakit-image/LICENSE new file mode 100644 index 0000000..c1edc01 --- /dev/null +++ b/skills/byted-mediakit-image/LICENSE @@ -0,0 +1,24 @@ +# The MIT License (MIT) + +Copyright © 2025 Beijing Volcano Engine Technology Ltd. + +Permission is hereby granted, free of charge, to any person +obtaining a copy of this software and associated documentation +files (the "Software"), to deal in the Software without +restriction, including without limitation the rights to use, +copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the +Software is furnished to do so, subject to the following +conditions: + +The above copyright notice and this permission notice shall be +included in all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +OTHER DEALINGS IN THE SOFTWARE \ No newline at end of file diff --git a/skills/byted-mediakit-image/SKILL.md b/skills/byted-mediakit-image/SKILL.md new file mode 100644 index 0000000..bef6ab5 --- /dev/null +++ b/skills/byted-mediakit-image/SKILL.md @@ -0,0 +1,32 @@ +--- +name: byted-mediakit-image +version: "1.0.0" +license: "MIT" +description: "图像处理,涵盖图像压缩、图像增强、AI处理等能力。包含能力:image-ocr, erase-image, remove-image-background, enhance-image, evaluate-image-quality。当用户需要使用 image 域的 MediaKit CLI 能力时触发。" +permissions: + - shell +metadata: + requires: + bins: ["mediakit-cli"] + cliHelp: "mediakit-cli image --help" + product: mediakit-cli/skills + domain: image + capability_count: 5 +--- +# Image Skills + +## 前置说明 + +开始前必须先读取 `./reference/shared.md` 的内容,其中包含前置检查、结果处理等说明。 + +> 本域工具均为同步执行,调用成功后直接返回最终结果,无需 `query-task` 轮询。 + +## 工具列表 + +| 工具 | 说明 | 参数声明 | 参考文档 | +|------|------|----------|----------| +| image-ocr | 识别图片中的通用印刷体文字,返回可编辑文本、文字框坐标和置信度 | `image_url:string, callback_args?:string, client_token?:string` | [reference/image-ocr.md](reference/image-ocr.md) | +| erase-image | 自动检测并擦除图片中的常见图标、文字或指定区域内容,并对擦除区域进行背景智能填充 | `image_url:string, tool_version?:string, standard_scene?:string, standard_erase_text?:string, output_format?:string, callback_args?:string, client_token?:string` | [reference/erase-image.md](reference/erase-image.md) | +| remove-image-background | 自动识别并保留图像主体,移除背景并生成透明背景图片 | `image_url:string, scene:string, need_contour?:boolean, contour_color?:string, contour_size?:integer, need_crop_background?:boolean, output_format?:string, callback_args?:string, client_token?:string` | [reference/remove-image-background.md](reference/remove-image-background.md) | +| enhance-image | 基于图像内容理解智能决策,全方位提升图片分辨率、清晰度与色彩表现 | `image_url:string, tool_version?:string, multiple?:number, target_width?:integer, target_height?:integer, callback_args?:string, client_token?:string` | [reference/enhance-image.md](reference/enhance-image.md) | +| evaluate-image-quality | 对输入图片进行主客观画质和美学评分 | `image_url:string, tool_version?:string, standard_evaluate_items?:array, callback_args?:string, client_token?:string` | [reference/evaluate-image-quality.md](reference/evaluate-image-quality.md) | diff --git a/skills/byted-mediakit-image/reference/enhance-image.md b/skills/byted-mediakit-image/reference/enhance-image.md new file mode 100644 index 0000000..e099f4b --- /dev/null +++ b/skills/byted-mediakit-image/reference/enhance-image.md @@ -0,0 +1,47 @@ +# 图像画质增强 + +## 能力描述 +基于图像内容理解智能决策,全方位提升图片分辨率、清晰度与色彩表现。 + +## 执行方式 + +| 项目 | 说明 | +|------|------| +| Domain | `image` | +| Tool | `enhance-image` | +| 是否异步 | `否` | +| 是否支持 local | `否` | +| 模式说明 | cloud only;同步执行,调用成功后直接返回最终结果 | +| 幂等行为 | 如命令支持 `client_token` 与 `callback_args`,重试时复用同一组值;强制重跑时更换新的 `client_token` | + +## 参数 +| 参数 | CLI flag | 类型 | 必填 | 默认值 | 说明 | +|------|----------|------|------|--------|------| +| image_url | `--image-url` | string | 是 | - | 输入图片。String 类型,支持http://xxx或https://xxx格式 URL | +| tool_version | `--tool-version` | string | 否 | standard | 画质增强选用的模型版本,标准版:standard;专业版:professional | +| multiple | `--multiple` | number | 否 | - | 图像处理后较原图的分辨率倍数,支持 2 位小数。取值范围 [1,30],最大不超过 30。standard 模式下最大不超过 8。处理后宽高不能超过 target_width、target_height 上限 | +| target_width | `--target-width` | integer | 否 | - | 图像处理后的宽度,单位 px,取值不能超过 10240。standard 模式下最大不超过 6144,且分辨率倍数不能超过 8 | +| target_height | `--target-height` | integer | 否 | - | 图像处理后的高度,单位 px,取值不能超过 10240。standard 模式下最大不超过 6144,且分辨率倍数不能超过 8 | +| callback_args | `--callback-args` | string | 否 | - | 可选,回调参数 | +| client_token | `--client-token` | string | 否 | - | 可选,用于幂等,默认幂等,用户可根据需求进行调整 | + +## 调用示例 +```bash +mediakit-cli image enhance-image \ + --image-url https://example.com/image_url \ + --tool-version standard \ + --multiple 2 \ + --callback-args sample-callback-args \ + --client-token demo-client-token +``` + +## 输出格式 +```json +{ + "image_url": "https://example.com/enhanced.png", + "image_size": 204800, + "image_format": "png", + "image_width": 2160, + "image_height": 3840 +} +``` diff --git a/skills/byted-mediakit-image/reference/erase-image.md b/skills/byted-mediakit-image/reference/erase-image.md new file mode 100644 index 0000000..e010a38 --- /dev/null +++ b/skills/byted-mediakit-image/reference/erase-image.md @@ -0,0 +1,48 @@ +# 图像擦除修复 + +## 能力描述 +自动检测并擦除图片中的常见图标、文字或指定区域内容,并对擦除区域进行背景智能填充。 + +## 执行方式 + +| 项目 | 说明 | +|------|------| +| Domain | `image` | +| Tool | `erase-image` | +| 是否异步 | `否` | +| 是否支持 local | `否` | +| 模式说明 | cloud only;同步执行,调用成功后直接返回最终结果 | +| 幂等行为 | 如命令支持 `client_token` 与 `callback_args`,重试时复用同一组值;强制重跑时更换新的 `client_token` | + +## 参数 +| 参数 | CLI flag | 类型 | 必填 | 默认值 | 说明 | +|------|----------|------|------|--------|------| +| image_url | `--image-url` | string | 是 | - | 输入图片 URL,需为公网可访问的 png/jpg/jpeg/webp/tiff/bmp/heic 图片,单图不超过 10MB。 | +| tool_version | `--tool-version` | string | 否 | standard | 图像擦除修复选用的模型版本。standard:标准版,基于明确的规则(如文本匹配、矩形框坐标)擦除指定内容。适用于简单、明确的擦除任务 | +| standard_scene | `--standard-scene` | string | 否 | full_screen_text_erase | 标准版擦除场景,仅 standard 版本生效。full_screen_text_erase:全屏文字擦除,可通过 standard_erase_text 指定要擦除的文字,不指定则默认擦除所有文字内容。full_screen_icon_erase:全屏图标擦除 | +| standard_erase_text | `--standard-erase-text` | string | 否 | - | 标准版文字擦除,指定要擦除的文字,不指定则默认擦除所有文字内容 | +| output_format | `--output-format` | string | 否 | webp | 输出图片格式,可选值:png、jpeg、webp | +| callback_args | `--callback-args` | string | 否 | - | 可选,回调参数 | +| client_token | `--client-token` | string | 否 | - | 可选,用于幂等,默认幂等,用户可根据需求进行调整 | + +## 调用示例 +```bash +mediakit-cli image erase-image \ + --image-url https://example.com/image_url \ + --tool-version standard \ + --standard-scene full_screen_text_erase \ + --output-format webp \ + --callback-args sample-callback-args \ + --client-token demo-client-token +``` + +## 输出格式 +```json +{ + "image_url": "https://example.com/erased.webp", + "image_size": 102400, + "image_format": "webp", + "image_width": 1080, + "image_height": 1920 +} +``` diff --git a/skills/byted-mediakit-image/reference/evaluate-image-quality.md b/skills/byted-mediakit-image/reference/evaluate-image-quality.md new file mode 100644 index 0000000..6125b6a --- /dev/null +++ b/skills/byted-mediakit-image/reference/evaluate-image-quality.md @@ -0,0 +1,45 @@ +# 图像画质评估 + +## 能力描述 +对输入图片进行主客观画质和美学评分,适用于质量监控、低质图筛查、内容审核、推荐排序和训练数据清洗等场景。 +支持标准版多维评分与专业版大模型评分。 + +## 执行方式 + +| 项目 | 说明 | +|------|------| +| Domain | `image` | +| Tool | `evaluate-image-quality` | +| 是否异步 | `否` | +| 是否支持 local | `否` | +| 模式说明 | cloud only;同步执行,调用成功后直接返回最终结果 | +| 幂等行为 | 如命令支持 `client_token` 与 `callback_args`,重试时复用同一组值;强制重跑时更换新的 `client_token` | + +## 参数 +| 参数 | CLI flag | 类型 | 必填 | 默认值 | 说明 | +|------|----------|------|------|--------|------| +| image_url | `--image-url` | string | 是 | - | 输入图片 URL,需为公网可访问的 png/jpeg/webp/heic 图片,单图不超过 10MB。 | +| tool_version | `--tool-version` | string | 否 | standard | 画质评估模型版本,standard 标准版,professional 专业版 | +| standard_evaluate_items | `--standard-evaluate-items` | array | 否 | ["vqscore","noise","aesthetic","blur"] | 标准版选用的评估工具。子项说明:评估工具。CLI 传参时请使用 JSON 字符串,并用单引号包裹整个值,例如 `--standard-evaluate-items '["vqscore","noise"]'` | +| callback_args | `--callback-args` | string | 否 | - | 可选,回调参数 | +| client_token | `--client-token` | string | 否 | - | 可选,用于幂等,默认幂等,用户可根据需求进行调整 | + +## 调用示例 +```bash +mediakit-cli image evaluate-image-quality \ + --image-url https://example.com/image_url \ + --tool-version standard \ + --standard-evaluate-items '["vqscore","noise","aesthetic","blur"]' \ + --callback-args sample-callback-args \ + --client-token demo-client-token +``` + +## 输出格式 +```json +{ + "vqscore": 78.5, + "aesthetic": 82.1, + "noise": 12.3, + "blur": 8.6 +} +``` diff --git a/skills/byted-mediakit-image/reference/image-ocr.md b/skills/byted-mediakit-image/reference/image-ocr.md new file mode 100644 index 0000000..9b4822a --- /dev/null +++ b/skills/byted-mediakit-image/reference/image-ocr.md @@ -0,0 +1,47 @@ +# 图像文字识别OCR + +## 能力描述 +识别图片中的通用印刷体文字,返回可编辑文本、文字框坐标和置信度。 +本期支持简体中文和英文通用场景识别。 + +## 执行方式 + +| 项目 | 说明 | +|------|------| +| Domain | `image` | +| Tool | `image-ocr` | +| 是否异步 | `否` | +| 是否支持 local | `否` | +| 模式说明 | cloud only;同步执行,调用成功后直接返回最终结果 | +| 幂等行为 | 如命令支持 `client_token` 与 `callback_args`,重试时复用同一组值;强制重跑时更换新的 `client_token` | + +## 参数 +| 参数 | CLI flag | 类型 | 必填 | 默认值 | 说明 | +|------|----------|------|------|--------|------| +| image_url | `--image-url` | string | 是 | - | 输入图片 URL,需为公网可访问的 png/jpg/jpeg/webp/heic/avif 图片,单图不超过 10MB。 | +| callback_args | `--callback-args` | string | 否 | - | 可选,回调参数 | +| client_token | `--client-token` | string | 否 | - | 可选,用于幂等,默认幂等,用户可根据需求进行调整 | + +## 调用示例 +```bash +mediakit-cli image image-ocr \ + --image-url https://example.com/image_url \ + --callback-args sample-callback-args \ + --client-token demo-client-token +``` + +## 输出格式 +```json +{ + "ocr_result": [ + { + "content": "示例文字", + "confidence": 0.98, + "top_left_x": 10, + "top_left_y": 20, + "bottom_right_x": 120, + "bottom_right_y": 60 + } + ] +} +``` diff --git a/skills/byted-mediakit-image/reference/remove-image-background.md b/skills/byted-mediakit-image/reference/remove-image-background.md new file mode 100644 index 0000000..afcde8b --- /dev/null +++ b/skills/byted-mediakit-image/reference/remove-image-background.md @@ -0,0 +1,54 @@ +# 图像背景移除 + +## 能力描述 +自动识别并保留图像主体,移除背景并生成透明背景图片。 +支持通用、人像、商品场景,可在人像/商品场景中生成主体描边或裁剪透明背景。 + +## 执行方式 + +| 项目 | 说明 | +|------|------| +| Domain | `image` | +| Tool | `remove-image-background` | +| 是否异步 | `否` | +| 是否支持 local | `否` | +| 模式说明 | cloud only;同步执行,调用成功后直接返回最终结果 | +| 幂等行为 | 如命令支持 `client_token` 与 `callback_args`,重试时复用同一组值;强制重跑时更换新的 `client_token` | + +## 参数 +| 参数 | CLI flag | 类型 | 必填 | 默认值 | 说明 | +|------|----------|------|------|--------|------| +| image_url | `--image-url` | string | 是 | - | 输入图片 URL,需为公网可访问的 png/jpg/jpeg/webp/tiff/bmp/ico 图片,单图不超过 10MB。 | +| scene | `--scene` | string | 是 | - | 背景移除场景:general 通用场景,适用于期望抠出图像主体但不确定主体分类的场景;human 人像抠图场景,仅需抠出人像主体;product 商品抠图场景,仅需抠出商品主体 | +| need_contour | `--need-contour` | boolean | 否 | false | 是否为主体生成描边;仅 human/product 场景生效,general 场景忽略 | +| contour_color | `--contour-color` | string | 否 | #FFFFFF | 主体描边颜色,十六进制 RGB;仅 need_contour=true 且 human/product 场景生效 | +| contour_size | `--contour-size` | integer | 否 | 10 | 主体描边宽度,单位 px;仅 need_contour=true 且 human/product 场景生效 | +| need_crop_background | `--need-crop-background` | boolean | 否 | false | 是否裁剪透明背景到刚好包住主体;仅 human/product 场景生效,general 场景忽略 | +| output_format | `--output-format` | string | 否 | png | 输出图片格式,可选值:png、jpeg、webp | +| callback_args | `--callback-args` | string | 否 | - | 可选,回调参数 | +| client_token | `--client-token` | string | 否 | - | 可选,用于幂等,默认幂等,用户可根据需求进行调整 | + +## 调用示例 +```bash +mediakit-cli image remove-image-background \ + --image-url https://example.com/image_url \ + --scene human \ + --need-contour false \ + --contour-color "#FFFFFF" \ + --contour-size 10 \ + --need-crop-background false \ + --output-format png \ + --callback-args sample-callback-args \ + --client-token demo-client-token +``` + +## 输出格式 +```json +{ + "image_url": "https://example.com/nobg.png", + "image_size": 102400, + "image_format": "png", + "image_width": 1080, + "image_height": 1920 +} +``` diff --git a/skills/byted-mediakit-image/reference/shared.md b/skills/byted-mediakit-image/reference/shared.md new file mode 100644 index 0000000..4315f8e --- /dev/null +++ b/skills/byted-mediakit-image/reference/shared.md @@ -0,0 +1,202 @@ +# MediaKit 共享规则 + +本技能指导你如何通过 mediakit-cli 操作媒体资源,以及调用过程中的通用规则和注意事项。 + +## 前置检查 + +### 依赖安装 + +首次使用前,确认 CLI 已安装: + +```bash +# 安装 +npm install -g @volcengine/mediakit-cli + +# 验证 +mediakit-cli --version +``` + +### 鉴权信息检查 + +优先级:环境变量 > 配置文件(文件路径 `~/.mediakit/config.json`) + +#### 字段说明 + +- 环境变量/配置文件:`MEDIAKIT_API_KEY`、`MEDIAKIT_ENDPOINT`、`MEDIAKIT_SURFACE`、`MEDIAKIT_RUNTIME` + +| 变量 | 必填 | 说明 | +|------|------|------| +| `MEDIAKIT_API_KEY` | 云端模式必填 | API 认证 Token | +| `MEDIAKIT_ENDPOINT` | 否 | API 访问点 | +| `MEDIAKIT_SURFACE` | 否 | 请求来源 Header `x-surface`;默认 `cli`,Skill 建议 `skill`,Plugin 建议 `plugin`,最终上报 `cli/skill` 或 `cli/plugin` | +| `MEDIAKIT_RUNTIME` | 否 | 请求来源 Header `x-runtime`;按宿主设置为 `claude`、`arkclaw` 等,未配置时回退环境探测或 `unknown` | + +任一必填项缺失时,终止执行并输出所有缺失项的列表及修复建议。 + +云端调用会自动携带 `x-surface` / `x-runtime`。Header 优先级为:环境变量 > `~/.mediakit/config.json` > 默认值/环境探测。当本 Skill/Plugin 通过 `mediakit-cli` 调用云端能力时,运行环境应注入 `MEDIAKIT_SURFACE=skill|plugin` 与 `MEDIAKIT_RUNTIME=<宿主>`;CLI 会保留原始产物前缀并上报 `x-surface=cli/skill|cli/plugin`。若未显式配置,CLI 默认按 `x-surface=cli`,`x-runtime` 依次回退 `IDENTITY_NAME` / `OPENCLAW_SERVICE_MARKER` 环境探测,最后为 `unknown`。 + +### 来源上报约束 + +- Skill 调用 `mediakit-cli` 时,必须显式设置 `MEDIAKIT_SURFACE=skill`,不能依赖用户已有环境变量。 +- Plugin 调用 `mediakit-cli` 时,必须显式设置 `MEDIAKIT_SURFACE=plugin`,不能复用 Skill 的取值。 +- 宿主环境标识建议同时显式设置 `MEDIAKIT_RUNTIME=<宿主>`;若未设置,CLI 会回退为环境探测值或 `unknown`。 + +```bash +MEDIAKIT_SURFACE=skill MEDIAKIT_RUNTIME= mediakit-cli editing add-image-to-video + +MEDIAKIT_SURFACE=plugin MEDIAKIT_RUNTIME= mediakit-cli editing add-image-to-video +``` + +## CLI 使用方式 + +### 初始化配置 + +首次使用建议先运行初始化向导: + +```bash +mediakit-cli init +``` + +Agent 非交互初始化可显式写入请求来源与运行时配置: + +```bash +mediakit-cli init --mode cloud-first --api-key --runtime --surface cli --yes +mediakit-cli init --mode local-first --api-key --endpoint --output-path ~/mediakit-output --runtime --surface cli --credential-store config --yes +``` + +初始化后常用命令如下: + +```bash +# 查看当前配置 +mediakit-cli config show + +# 切换默认模式到本地优先 +mediakit-cli config set mode local-first + +# 切换默认模式到云端优先 +mediakit-cli config set mode cloud-first + +# 刷新环境检查并查看依赖状态 +mediakit-cli doctor +``` + +### 命令结构 + +MediaKit CLI 统一使用 `domain + tool` 的调用方式: + +```bash +mediakit-cli {domain} {tool} [flags] +``` + +常见帮助命令: + +```bash +# 查看所有 domain +mediakit-cli --domains + +# 查看某个分组下的工具列表 +mediakit-cli {domain} --help + +# 查看具体工具的参数 +mediakit-cli {domain} {tool} --help + +# 动态发现工具能力与返回结构 +mediakit-cli {domain} {tool} --schema +mediakit-cli --local {domain} {tool} --schema +``` + +当前产物覆盖的 domain 包括:`editing`, `video`, `audio`, `image`。 + +### Schema 发现 + +每个 capability 命令都支持 `--schema`,用于 Agent 动态读取工具能力,不要求传必填业务参数。 + +返回结构包含: + +- `name`:工具名,使用 snake_case,如 `add_image_to_video` +- `description`:工具描述,自动包含 `Mode` 与 `Async` 信息 +- `input_schema`:输入参数 JSON Schema +- `output_schema`:当前执行模式下的返回结构 + +输出区分规则: + +- 默认按全局 `mode` 配置解析返回面 +- `--local ... --schema` 输出本地模式返回面,本地模式直接返回最终结果字段 +- 云端异步工具输出 `task_id` / `request_id`,并在 `final_result` 中描述 `query-task` 完成态结果 +- `query-task` 是 cloud only,schema 描述任务状态与完成态结果 + +示例: + +```bash +mediakit-cli editing trim-video --schema +mediakit-cli --local editing trim-video --schema +``` + +### 单次调用模式覆盖 + +除 `config set mode` 设置默认模式外,还支持仅对当前命令生效的临时覆盖: + +```bash +mediakit-cli --local editing add-image-to-video + +mediakit-cli --cloud editing add-image-to-video +``` + +补充规则: + +- `--local` / `--cloud` 只影响当前命令,不修改全局 `config.mode` +- `--local` 与 `--cloud` 互斥,不能同时传入 + +## 异步任务 + +提交异步媒体处理任务成功后会返回 `task_id` 字段。通过 `shared query-task` 命令查询结果。 + +```bash +mediakit-cli shared query-task --task-id +``` + +## local / cloud 约束 + +- `query-task` 是 **cloud only** 工具 +- local 模式下不支持 query-task +- 当前本轮能力以云端执行为主;如需显式声明,请优先使用 `--cloud` + +### Cloud 模式媒体输入补充 + +- 当命令以 `--cloud` 或 `cloud-first` 策略执行时,媒体输入参数(如 `video_url`、`audio_url`、`image_url`、`subtitle_url`、`sub_image_url` 及对应数组/对象子字段)可传入 `http://` / `https://` URL、`mediakit://...` file_id 或本地文件路径 +- `http://` / `https://` URL 与 `mediakit://...` file_id 会原样提交;本地文件路径会由 CLI 先上传为 `mediakit://...` file_id,再提交给云端工具 +- 各工具 reference 中的参数说明来自 APIHub/OpenAPI 原始字段描述;若其中写有公网 URL 或 HTTP/HTTPS URL,表示云端 API 最终接收的资源形态,不限制 CLI cloud 模式的本地路径预处理能力 + +### Local 模式补充 + +- 本地输出目录优先级:`--output-path` > `MEDIAKIT_OUTPUT_PATH` > config `output_path` > `~/.mediakit/temp` +- 当 `--output-path` 指向具体媒体文件名时,直接作为最终输出文件;否则按输入文件名生成 `{原文件名}_{工具名}.{ext}`,重复时追加 6 位随机数 +- 无法从输入 URL 或路径提取文件名时,退回 `{工具名}-{UnixNano}.{ext}` +- local 模式依赖 `ffmpeg` / `ffprobe`,缺失时错误中会给出 `install_guide` +- local 模式媒体处理输出必须贴合接口 response schema,禁止输出内部执行元数据 + +### 错误响应 + +- CLI cloud 模式直接透传 API 返回的原始 error 对象,不提取 `message` +- CLI local 模式返回结构化错误:`{"error":{"type":"...","code":"...","message":"..."}}` +- MCP error_response 直接透传原始 error 内容,dict 原样作为 `error` 字段值 + +## 幂等参数维护 + +| 参数 | 作用 | 维护建议 | +|------|------|----------| +| `client_token` | 主动控制幂等 | 请求重试时复用同一值;强制重新执行时传新的唯一值 | +| `callback_args` | 透传回调参数 | 建议与 `client_token` 一起维护,便于回调对账与重试追踪 | + +补充规则: + +- `client_token` 长度不超过 64 个字符 +- `callback_args` 可用于回调透传与对账追踪 + +## 轮询策略 + +| 参数 | 描述 | 默认值 | +|------|------|--------| +| `poll-interval-seconds` | 轮询间隔 | 10s | +| `max-poll-attempts` | 轮询次数,0 代表不查询 | 0 | +| `poll-complete` | 阻塞至终态 | - | diff --git a/skills/byted-mediakit-shared/SKILL.md b/skills/byted-mediakit-shared/SKILL.md index 53c44eb..36e9695 100644 --- a/skills/byted-mediakit-shared/SKILL.md +++ b/skills/byted-mediakit-shared/SKILL.md @@ -177,6 +177,12 @@ mediakit-cli shared query-task --task-id - local 模式下不支持 query-task - 当前本轮能力以云端执行为主;如需显式声明,请优先使用 `--cloud` +### Cloud 模式媒体输入补充 + +- 当命令以 `--cloud` 或 `cloud-first` 策略执行时,媒体输入参数(如 `video_url`、`audio_url`、`image_url`、`subtitle_url`、`sub_image_url` 及对应数组/对象子字段)可传入 `http://` / `https://` URL、`mediakit://...` file_id 或本地文件路径 +- `http://` / `https://` URL 与 `mediakit://...` file_id 会原样提交;本地文件路径会由 CLI 先上传为 `mediakit://...` file_id,再提交给云端工具 +- 各工具 reference 中的参数说明来自 APIHub/OpenAPI 原始字段描述;若其中写有公网 URL 或 HTTP/HTTPS URL,表示云端 API 最终接收的资源形态,不限制 CLI cloud 模式的本地路径预处理能力 + ### Local 模式补充 - 本地输出目录优先级:`--output-path` > `MEDIAKIT_OUTPUT_PATH` > config `output_path` > `~/.mediakit/temp` diff --git a/skills/byted-mediakit-video/reference/analyze-video-highlights.md b/skills/byted-mediakit-video/reference/analyze-video-highlights.md new file mode 100644 index 0000000..d13a54d --- /dev/null +++ b/skills/byted-mediakit-video/reference/analyze-video-highlights.md @@ -0,0 +1,51 @@ +# 高光片段提取 + +## 能力描述 +智能捕捉视频"情绪波峰"与"关键动作",输出精准时间戳、高光打分、OCR 文本和画面描述等元数据,供下游进行更灵活的二次开发。 +支持短剧(Miniseries)和小游戏(Game)两种分析模型。 +使用限制:单次最多 100 个视频,累计时长不超过 300 分钟。 + +## 执行方式 + +| 项目 | 说明 | +|------|------| +| Domain | `video` | +| Tool | `analyze-video-highlights` | +| 是否异步 | `是` | +| 是否支持 local | `否` | +| 模式说明 | cloud only;可通过 `--cloud` 强制当前调用 | +| 幂等行为 | 如命令支持 `client_token` 与 `callback_args`,重试时复用同一组值;强制重跑时更换新的 `client_token` | + +## 参数 +| 参数 | CLI flag | 类型 | 必填 | 默认值 | 说明 | +|------|----------|------|------|--------|------| +| video_urls | `--video-urls` | array | 是 | - | 输入视频列表。待处理的视频 URL 列表,支持 1-100 个视频。子项说明:视频 URL,CLI 传参时请使用 JSON 字符串,并用单引号包裹整个值。 CLI 传参时请使用 JSON 字符串,并用单引号包裹整个值 | +| model | `--model` | string | 是 | - | 分析场景模型,Miniseries(短剧)或 Game(小游戏) | +| mode | `--mode` | string | 是 | - | 高光提取模式。固定组合为:model=Miniseries 时 mode 只能传 StorylineCuts;model=Game 时 mode 只能传 HighlightExtract | +| minigame_info | `--minigame-info` | object | 否 | - | 小游戏描述信息,当 model=Game 时可选填,可辅助模型更精准识别高光内容。CLI 传参时请使用 JSON 字符串,并用单引号包裹整个值 | +| callback_args | `--callback-args` | string | 否 | - | 可选,回调参数 | +| client_token | `--client-token` | string | 否 | - | 可选,用于幂等,默认幂等,用户可根据需求进行调整 | + +## 调用示例 +```bash +mediakit-cli video analyze-video-highlights \ + --video-urls '["https://example.com/video_url"]' \ + --model Miniseries \ + --mode StorylineCuts \ + --callback-args sample-callback-args \ + --client-token demo-client-token +``` + +## 输出格式 +```json +{ + "task_id": "task_demo_001", + "request_id": "req_demo_001" +} +``` + +## 任务结果查询 +提交成功后会返回 `task_id`,再执行 `mediakit-cli shared query-task --task-id ` 查询。 + +- 当前命令:`mediakit-cli video analyze-video-highlights` +- 推荐查询:`mediakit-cli shared query-task --task-id ` diff --git a/skills/byted-mediakit-video/reference/analyze-video-storyline.md b/skills/byted-mediakit-video/reference/analyze-video-storyline.md new file mode 100644 index 0000000..20531ed --- /dev/null +++ b/skills/byted-mediakit-video/reference/analyze-video-storyline.md @@ -0,0 +1,49 @@ +# 剧情故事线分析 + +## 能力描述 +智能解析影视剧内容,生成结构化剧情线,供智能剪辑、内容检索与互动播放等场景使用。 +基于大模型视频理解能力,对输入的单个或多个长视频(如电影、电视剧)进行分析,提取并组织成一份完整的故事线。 +该故事线由一系列按时间顺序排列的剧情片段(Clips)和基于片段聚合的高光故事线(Highlights)组成。 +使用限制:单次最多 30 个视频,单个视频时长不超过 2.5 小时。 + +## 执行方式 + +| 项目 | 说明 | +|------|------| +| Domain | `video` | +| Tool | `analyze-video-storyline` | +| 是否异步 | `是` | +| 是否支持 local | `否` | +| 模式说明 | cloud only;可通过 `--cloud` 强制当前调用 | +| 幂等行为 | 如命令支持 `client_token` 与 `callback_args`,重试时复用同一组值;强制重跑时更换新的 `client_token` | + +## 参数 +| 参数 | CLI flag | 类型 | 必填 | 默认值 | 说明 | +|------|----------|------|------|--------|------| +| video_urls | `--video-urls` | array | 是 | - | 输入视频列表。待处理的视频 URL 列表,支持 ,最多 30 个视频。子项说明:视频 URL,CLI 传参时请使用 JSON 字符串,并用单引号包裹整个值。 CLI 传参时请使用 JSON 字符串,并用单引号包裹整个值 | +| enable_snapshot | `--enable-snapshot` | boolean | 否 | false | 是否为每个剧情片段生成关键帧快照。默认为 false。开启后,结果中将包含 clip_snapshot_url 字段 | +| callback_args | `--callback-args` | string | 否 | - | 可选,回调参数 | +| client_token | `--client-token` | string | 否 | - | 可选,用于幂等,默认幂等,用户可根据需求进行调整 | + +## 调用示例 +```bash +mediakit-cli video analyze-video-storyline \ + --video-urls '["https://example.com/video_url"]' \ + --enable-snapshot \ + --callback-args sample-callback-args \ + --client-token demo-client-token +``` + +## 输出格式 +```json +{ + "task_id": "task_demo_001", + "request_id": "req_demo_001" +} +``` + +## 任务结果查询 +提交成功后会返回 `task_id`,再执行 `mediakit-cli shared query-task --task-id ` 查询。 + +- 当前命令:`mediakit-cli video analyze-video-storyline` +- 推荐查询:`mediakit-cli shared query-task --task-id ` diff --git a/skills/byted-mediakit-video/reference/asr-subtitles.md b/skills/byted-mediakit-video/reference/asr-subtitles.md new file mode 100644 index 0000000..7e06b50 --- /dev/null +++ b/skills/byted-mediakit-video/reference/asr-subtitles.md @@ -0,0 +1,55 @@ +# 语音转字幕(ASR) + +## 能力描述 +对输入视频或音频进行语音识别,输出带时间戳的字幕片段。 +支持格式:主流音视频格式(如mp4、mov、mp3、m4a、wav等)。 +输入:video_url和audio_url二选一。 + +## 执行方式 + +| 项目 | 说明 | +|------|------| +| Domain | `video` | +| Tool | `asr-subtitles` | +| 是否异步 | `是` | +| 是否支持 local | `否` | +| 模式说明 | cloud only;可通过 `--cloud` 强制当前调用 | +| 幂等行为 | 如命令支持 `client_token` 与 `callback_args`,重试时复用同一组值;强制重跑时更换新的 `client_token` | + +## 参数 +| 参数 | CLI flag | 类型 | 必填 | 默认值 | 说明 | +|------|----------|------|------|--------|------| +| video_url | `--video-url` | string | 否 | - | 输入视频 Url(需公网可访问),与audio_url二选一,都存在时优先取video_url | +| audio_url | `--audio-url` | string | 否 | - | 输入音频 Url(需公网可访问),与video_url二选一,不能都为空 | +| content_type | `--content-type` | string | 否 | - | 识别类型,默认值为空,算法会自动探测类型,speech: 对话,singing: 歌唱 | +| language | `--language` | string | 否 | - | 识别提示语言 ID(默认值为空,算法会自动探测语种)。简体中文:cmn-Hans-CN;英语:eng-US | +| enable_speaker_info | `--enable-speaker-info` | boolean | 否 | false | 是否开启说话人识别 | +| enable_confidence | `--enable-confidence` | boolean | 否 | false | 是否返回置信度 | +| callback_args | `--callback-args` | string | 否 | - | 可选,回调参数 | +| client_token | `--client-token` | string | 否 | - | 可选,用于幂等,默认幂等,用户可根据需求进行调整 | + +## 调用示例 +```bash +mediakit-cli video asr-subtitles \ + --video-url https://example.com/video_url \ + --content-type speech \ + --language cmn-Hans-CN \ + --enable-speaker-info \ + --enable-confidence \ + --callback-args sample-callback-args \ + --client-token demo-client-token +``` + +## 输出格式 +```json +{ + "task_id": "task_demo_001", + "request_id": "req_demo_001" +} +``` + +## 任务结果查询 +提交成功后会返回 `task_id`,再执行 `mediakit-cli shared query-task --task-id ` 查询。 + +- 当前命令:`mediakit-cli video asr-subtitles` +- 推荐查询:`mediakit-cli shared query-task --task-id ` diff --git a/skills/byted-mediakit-video/reference/enhance-video-generative.md b/skills/byted-mediakit-video/reference/enhance-video-generative.md new file mode 100644 index 0000000..093d465 --- /dev/null +++ b/skills/byted-mediakit-video/reference/enhance-video-generative.md @@ -0,0 +1,50 @@ +# 生成式画质增强 + +## 能力描述 +生成式视频增强修复(generative_video_restoration)是基于扩散大模型(Diffusion-based Large Model)的生成式视频修复技术。不仅可以还原被破坏的像素,更借助大规模预训练积累的丰富视觉先验,主动补全细节、理解语义,生成真实、自然、高保真的视频内容。 + +## 执行方式 + +| 项目 | 说明 | +|------|------| +| Domain | `video` | +| Tool | `enhance-video-generative` | +| 是否异步 | `是` | +| 是否支持 local | `否` | +| 模式说明 | cloud only;可通过 `--cloud` 强制当前调用 | +| 幂等行为 | 如命令支持 `client_token` 与 `callback_args`,重试时复用同一组值;强制重跑时更换新的 `client_token` | + +## 参数 +| 参数 | CLI flag | 类型 | 必填 | 默认值 | 说明 | +|------|----------|------|------|--------|------| +| video_url | `--video-url` | string | 是 | - | 输入视频。String 类型,支持http://xxx或https://xxx格式 URL | +| resolution | `--resolution` | string | 否 | 720p | 目标分辨率。支持的取值:720p / 1080p | +| bitrate_level | `--bitrate-level` | string | 否 | medium | 码率档位。输出视频的目标平均码率。取值:low(低码率)/ medium(中码率,推荐)/ high(高码率)。默认为 medium | +| fps | `--fps` | number | 否 | - | 目标帧率,单位为 fps。若未指定,输出视频将保持与原始片源一致的帧率。取值范围为 [15, 120],建议不超过原片的 4 倍 | +| callback_args | `--callback-args` | string | 否 | - | 可选,回调参数 | +| client_token | `--client-token` | string | 否 | - | 可选,用于幂等,默认幂等,用户可根据需求进行调整 | + +## 调用示例 +```bash +mediakit-cli video enhance-video-generative \ + --video-url https://example.com/video_url \ + --resolution 720p \ + --bitrate-level medium \ + --fps 30 \ + --callback-args sample-callback-args \ + --client-token demo-client-token +``` + +## 输出格式 +```json +{ + "task_id": "task_demo_001", + "request_id": "req_demo_001" +} +``` + +## 任务结果查询 +提交成功后会返回 `task_id`,再执行 `mediakit-cli shared query-task --task-id ` 查询。 + +- 当前命令:`mediakit-cli video enhance-video-generative` +- 推荐查询:`mediakit-cli shared query-task --task-id ` diff --git a/skills/byted-mediakit-video/reference/erase-video-subtitle.md b/skills/byted-mediakit-video/reference/erase-video-subtitle.md new file mode 100644 index 0000000..b785efb --- /dev/null +++ b/skills/byted-mediakit-video/reference/erase-video-subtitle.md @@ -0,0 +1,45 @@ +# 字幕擦除(标准版) + +## 能力描述 +智能检测并擦除视频画面中已有的硬字幕,保留原始背景。 +支持格式:主流视频格式如mp4、flv、ts、avi、mov、wmv、mkv。 + +## 执行方式 + +| 项目 | 说明 | +|------|------| +| Domain | `video` | +| Tool | `erase-video-subtitle` | +| 是否异步 | `是` | +| 是否支持 local | `否` | +| 模式说明 | cloud only;可通过 `--cloud` 强制当前调用 | +| 幂等行为 | 如命令支持 `client_token` 与 `callback_args`,重试时复用同一组值;强制重跑时更换新的 `client_token` | + +## 参数 +| 参数 | CLI flag | 类型 | 必填 | 默认值 | 说明 | +|------|----------|------|------|--------|------| +| video_url | `--video-url` | string | 是 | - | 输入视频。String 类型,支持http://xxx或https://xxx格式 URL | +| callback_args | `--callback-args` | string | 否 | - | 可选,回调参数 | +| client_token | `--client-token` | string | 否 | - | 可选,用于幂等,默认幂等,用户可根据需求进行调整 | + +## 调用示例 +```bash +mediakit-cli video erase-video-subtitle \ + --video-url https://example.com/video_url \ + --callback-args sample-callback-args \ + --client-token demo-client-token +``` + +## 输出格式 +```json +{ + "task_id": "task_demo_001", + "request_id": "req_demo_001" +} +``` + +## 任务结果查询 +提交成功后会返回 `task_id`,再执行 `mediakit-cli shared query-task --task-id ` 查询。 + +- 当前命令:`mediakit-cli video erase-video-subtitle` +- 推荐查询:`mediakit-cli shared query-task --task-id ` diff --git a/skills/byted-mediakit-video/reference/generate-highlights-microdrama.md b/skills/byted-mediakit-video/reference/generate-highlights-microdrama.md new file mode 100644 index 0000000..156261e --- /dev/null +++ b/skills/byted-mediakit-video/reference/generate-highlights-microdrama.md @@ -0,0 +1,55 @@ +# 高光智剪-短剧 + +## 能力描述 +深度理解短剧角色、剧情与故事线,自动提取高光片段并混剪成投流视频。 +支持故事线混剪模式(StorylineCuts),可选"短剧三要素"视觉模板,输出高光集锦、单集预告等。 +支持输出详细分镜信息(storyboard)。 +使用限制:单次最多 100 个视频,累计时长不超过 300 分钟。 + +## 执行方式 + +| 项目 | 说明 | +|------|------| +| Domain | `video` | +| Tool | `generate-highlights-microdrama` | +| 是否异步 | `是` | +| 是否支持 local | `否` | +| 模式说明 | cloud only;可通过 `--cloud` 强制当前调用 | +| 幂等行为 | 如命令支持 `client_token` 与 `callback_args`,重试时复用同一组值;强制重跑时更换新的 `client_token` | + +## 参数 +| 参数 | CLI flag | 类型 | 必填 | 默认值 | 说明 | +|------|----------|------|------|--------|------| +| video_urls | `--video-urls` | array | 是 | - | 输入视频列表。待处理的短剧原片视频 URL 列表,支持 1-100 个视频。子项说明:视频 URL,CLI 传参时请使用 JSON 字符串,并用单引号包裹整个值。 CLI 传参时请使用 JSON 字符串,并用单引号包裹整个值 | +| mode | `--mode` | string | 否 | StorylineCuts | 短剧高光智剪模式,本期固定为 StorylineCuts(故事线混剪模式) | +| enable_generate_video | `--enable-generate-video` | boolean | 否 | true | 是否生成混剪成片视频。true(默认)= 同时输出混剪视频与分镜信息;false = 仅输出高光分镜信息,不生成混剪视频,此时底层请求不会携带 Edit 字段,且传入的 edit_param 将被忽略 | +| enable_return_poster | `--enable-return-poster` | boolean | 否 | false | 是否在结果中返回混剪视频封面图 URL。false(默认)= 不返回封面图;true = 若底层存在封面则返回 poster_url | +| edit_param | `--edit-param` | object | 否 | - | 成片剪辑参数配置。CLI 传参时请使用 JSON 字符串,并用单引号包裹整个值 | +| highlight_cuts_param | `--highlight-cuts-param` | object | 否 | - | 高光混剪参数配置。CLI 传参时请使用 JSON 字符串,并用单引号包裹整个值 | +| opening_hook_param | `--opening-hook-param` | object | 否 | - | 精彩前置功能参数配置(可选)。CLI 传参时请使用 JSON 字符串,并用单引号包裹整个值 | +| callback_args | `--callback-args` | string | 否 | - | 可选,回调参数 | +| client_token | `--client-token` | string | 否 | - | 可选,用于幂等,默认幂等,用户可根据需求进行调整 | + +## 调用示例 +```bash +mediakit-cli video generate-highlights-microdrama \ + --video-urls '["https://example.com/video_url"]' \ + --mode StorylineCuts \ + --enable-generate-video \ + --callback-args sample-callback-args \ + --client-token demo-client-token +``` + +## 输出格式 +```json +{ + "task_id": "task_demo_001", + "request_id": "req_demo_001" +} +``` + +## 任务结果查询 +提交成功后会返回 `task_id`,再执行 `mediakit-cli shared query-task --task-id ` 查询。 + +- 当前命令:`mediakit-cli video generate-highlights-microdrama` +- 推荐查询:`mediakit-cli shared query-task --task-id ` diff --git a/skills/byted-mediakit-video/reference/generate-highlights-minigame.md b/skills/byted-mediakit-video/reference/generate-highlights-minigame.md new file mode 100644 index 0000000..94bb4fc --- /dev/null +++ b/skills/byted-mediakit-video/reference/generate-highlights-minigame.md @@ -0,0 +1,52 @@ +# 高光智剪-小游戏 + +## 能力描述 +识别小游戏录屏视频中的核心玩法与高光事件(如连击、通关、极限操作等), +快速生成用于买量的视频素材。支持提供游戏名称、玩法描述、高光定义以辅助模型更精准识别。 +使用限制:本期仅支持单视频输入。 + +## 执行方式 + +| 项目 | 说明 | +|------|------| +| Domain | `video` | +| Tool | `generate-highlights-minigame` | +| 是否异步 | `是` | +| 是否支持 local | `否` | +| 模式说明 | cloud only;可通过 `--cloud` 强制当前调用 | +| 幂等行为 | 如命令支持 `client_token` 与 `callback_args`,重试时复用同一组值;强制重跑时更换新的 `client_token` | + +## 参数 +| 参数 | CLI flag | 类型 | 必填 | 默认值 | 说明 | +|------|----------|------|------|--------|------| +| video_urls | `--video-urls` | array | 是 | - | 输入视频列表。 CLI 传参时请使用 JSON 字符串,并用单引号包裹整个值 | +| mode | `--mode` | string | 否 | HighlightExtract | 高光提取模式,本期支持 HighlightExtract | +| enable_generate_video | `--enable-generate-video` | boolean | 否 | true | 是否生成混剪成片视频。true(默认)= 同时输出混剪视频与高光片段信息;false = 仅输出高光片段信息(clips),底层请求不携带 Edit 字段,也不会生成任何混剪视频 | +| minigame_info | `--minigame-info` | object | 否 | - | 小游戏描述信息,建议填写以辅助模型更精准识别高光内容。CLI 传参时请使用 JSON 字符串,并用单引号包裹整个值 | +| callback_args | `--callback-args` | string | 否 | - | 可选,回调参数 | +| client_token | `--client-token` | string | 否 | - | 可选,用于幂等,默认幂等,用户可根据需求进行调整 | + +## 调用示例 +```bash +mediakit-cli video generate-highlights-minigame \ + --video-urls '["https://example.com/video_url"]' \ + --mode HighlightExtract \ + --enable-generate-video \ + --minigame-info '{"game_name": "demo"}' \ + --callback-args sample-callback-args \ + --client-token demo-client-token +``` + +## 输出格式 +```json +{ + "task_id": "task_demo_001", + "request_id": "req_demo_001" +} +``` + +## 任务结果查询 +提交成功后会返回 `task_id`,再执行 `mediakit-cli shared query-task --task-id ` 查询。 + +- 当前命令:`mediakit-cli video generate-highlights-minigame` +- 推荐查询:`mediakit-cli shared query-task --task-id ` diff --git a/skills/byted-mediakit-video/reference/matte-greenscreen-video.md b/skills/byted-mediakit-video/reference/matte-greenscreen-video.md new file mode 100644 index 0000000..0485077 --- /dev/null +++ b/skills/byted-mediakit-video/reference/matte-greenscreen-video.md @@ -0,0 +1,48 @@ +# 视频绿幕抠图 + +## 能力描述 +对以绿幕或纯色为背景的视频进行抠图,自动识别主体(人物、物品、动物等),同时移除背景,生成背景透明的视频。 +输出视频格式为 WEBM(默认)或 MOV,分辨率与原片对齐。 +支持的格式:主流视频格式如 mp4、flv、ts、avi、mov、mkv、wmv。 + +## 执行方式 + +| 项目 | 说明 | +|------|------| +| Domain | `video` | +| Tool | `matte-greenscreen-video` | +| 是否异步 | `是` | +| 是否支持 local | `是` | +| 模式说明 | 支持 local / cloud;可通过 `--local` 或 `--cloud` 覆盖当前命令。本地模式使用 ProRes 4444 MOV 透明输出,仅支持 `--format MOV`,WEBM 请使用 cloud | +| 幂等行为 | 如命令支持 `client_token` 与 `callback_args`,重试时复用同一组值;强制重跑时更换新的 `client_token` | + +## 参数 +| 参数 | CLI flag | 类型 | 必填 | 默认值 | 说明 | +|------|----------|------|------|--------|------| +| video_url | `--video-url` | string | 是 | - | 输入视频 Url(需公网可访问) | +| format | `--format` | string | 否 | WEBM | 输出视频格式:MOV / WEBM(默认) | +| callback_args | `--callback-args` | string | 否 | - | 可选,回调参数 | +| client_token | `--client-token` | string | 否 | - | 可选,用于幂等,默认幂等,用户可根据需求进行调整 | + +## 调用示例 +```bash +mediakit-cli video matte-greenscreen-video \ + --video-url https://example.com/video_url \ + --format WEBM \ + --callback-args sample-callback-args \ + --client-token demo-client-token +``` + +## 输出格式 +```json +{ + "task_id": "task_demo_001", + "request_id": "req_demo_001" +} +``` + +## 任务结果查询 +提交成功后会返回 `task_id`,再执行 `mediakit-cli shared query-task --task-id ` 查询。 + +- 当前命令:`mediakit-cli video matte-greenscreen-video` +- 推荐查询:`mediakit-cli shared query-task --task-id ` diff --git a/skills/byted-mediakit-video/reference/matte-portrait-video.md b/skills/byted-mediakit-video/reference/matte-portrait-video.md new file mode 100644 index 0000000..9954181 --- /dev/null +++ b/skills/byted-mediakit-video/reference/matte-portrait-video.md @@ -0,0 +1,48 @@ +# 视频人像抠图 + +## 能力描述 +自动识别人物主体,同时移除背景,生成背景透明的视频,适用于背景替换等场景。 +输出格式为 WEBM(默认)或 MOV,分辨率与原片对齐。 +支持的格式:主流视频格式如 mp4、flv、ts、avi、mov、mkv、wmv。 + +## 执行方式 + +| 项目 | 说明 | +|------|------| +| Domain | `video` | +| Tool | `matte-portrait-video` | +| 是否异步 | `是` | +| 是否支持 local | `否` | +| 模式说明 | cloud only;可通过 `--cloud` 强制当前调用 | +| 幂等行为 | 如命令支持 `client_token` 与 `callback_args`,重试时复用同一组值;强制重跑时更换新的 `client_token` | + +## 参数 +| 参数 | CLI flag | 类型 | 必填 | 默认值 | 说明 | +|------|----------|------|------|--------|------| +| video_url | `--video-url` | string | 是 | - | 输入视频 Url(需公网可访问) | +| format | `--format` | string | 否 | WEBM | 输出视频格式:MOV / WEBM(默认) | +| callback_args | `--callback-args` | string | 否 | - | 可选,回调参数 | +| client_token | `--client-token` | string | 否 | - | 可选,用于幂等,默认幂等,用户可根据需求进行调整 | + +## 调用示例 +```bash +mediakit-cli video matte-portrait-video \ + --video-url https://example.com/video_url \ + --format WEBM \ + --callback-args sample-callback-args \ + --client-token demo-client-token +``` + +## 输出格式 +```json +{ + "task_id": "task_demo_001", + "request_id": "req_demo_001" +} +``` + +## 任务结果查询 +提交成功后会返回 `task_id`,再执行 `mediakit-cli shared query-task --task-id ` 查询。 + +- 当前命令:`mediakit-cli video matte-portrait-video` +- 推荐查询:`mediakit-cli shared query-task --task-id ` diff --git a/skills/byted-mediakit-video/reference/probe-video-metadata.md b/skills/byted-mediakit-video/reference/probe-video-metadata.md new file mode 100644 index 0000000..5a8c688 --- /dev/null +++ b/skills/byted-mediakit-video/reference/probe-video-metadata.md @@ -0,0 +1,46 @@ +# 视频元信息获取 + +## 能力描述 +对输入视频 Url(需公网可访问) 进行探测,输出标准化媒资元信息,覆盖容器层(format_meta)、视频流层(video_stream_meta)与音频流层(audio_stream_meta)。 +字段分类参考 ffprobe,并对 VOD 原始返回做精简与统一,便于上层做分辨率/帧率/码率/编码等策略判断。 +使用限制:仅支持公网 HTTP/HTTPS URL;输入视频分辨率最高支持 4K。 + +## 执行方式 + +| 项目 | 说明 | +|------|------| +| Domain | `video` | +| Tool | `probe-video-metadata` | +| 是否异步 | `是` | +| 是否支持 local | `是` | +| 模式说明 | 支持 local / cloud;可通过 `--local` 或 `--cloud` 覆盖当前命令 | +| 幂等行为 | 如命令支持 `client_token` 与 `callback_args`,重试时复用同一组值;强制重跑时更换新的 `client_token` | + +## 参数 +| 参数 | CLI flag | 类型 | 必填 | 默认值 | 说明 | +|------|----------|------|------|--------|------| +| video_url | `--video-url` | string | 是 | - | 输入视频。待探测的视频 | +| callback_args | `--callback-args` | string | 否 | - | 可选,回调参数 | +| client_token | `--client-token` | string | 否 | - | 可选,用于幂等,默认幂等,用户可根据需求进行调整 | + +## 调用示例 +```bash +mediakit-cli video probe-video-metadata \ + --video-url https://example.com/video_url \ + --callback-args sample-callback-args \ + --client-token demo-client-token +``` + +## 输出格式 +```json +{ + "task_id": "task_demo_001", + "request_id": "req_demo_001" +} +``` + +## 任务结果查询 +提交成功后会返回 `task_id`,再执行 `mediakit-cli shared query-task --task-id ` 查询。 + +- 当前命令:`mediakit-cli video probe-video-metadata` +- 推荐查询:`mediakit-cli shared query-task --task-id ` diff --git a/skills/byted-mediakit-video/reference/segment-scenes.md b/skills/byted-mediakit-video/reference/segment-scenes.md new file mode 100644 index 0000000..130f324 --- /dev/null +++ b/skills/byted-mediakit-video/reference/segment-scenes.md @@ -0,0 +1,54 @@ +# 场景切分 + +## 能力描述 +依据视频转场与画面变化自动切分场景,输出切片时间轴和(可选)切片文件。 +支持格式:MP4、FLV、ASF、RM、RMVB、MPEG、MOV、AVI、MPEGTS、M4S、WMV、3GP、TS、MPG、WEBM、MKV、WM、MPE、VOB、DAT、MP4V、M4V、F4V、MXF、QT 等主流视频格式。 +使用限制:单个视频时长不超过 2 小时。 + +## 执行方式 + +| 项目 | 说明 | +|------|------| +| Domain | `video` | +| Tool | `segment-scenes` | +| 是否异步 | `是` | +| 是否支持 local | `否` | +| 模式说明 | cloud only;可通过 `--cloud` 强制当前调用 | +| 幂等行为 | 如命令支持 `client_token` 与 `callback_args`,重试时复用同一组值;强制重跑时更换新的 `client_token` | + +## 参数 +| 参数 | CLI flag | 类型 | 必填 | 默认值 | 说明 | +|------|----------|------|------|--------|------| +| video_url | `--video-url` | string | 是 | - | 待处理视频 Url,必须是公网可直接访问的 HTTP/HTTPS 链接 | +| enable_clip_fade | `--enable-clip-fade` | boolean | 否 | false | 是否将检测到的淡入/淡出片段作为独立切片输出 | +| segment_threshold | `--segment-threshold` | number | 否 | - | 场景切分敏感度阈值,范围 [0, 100),100 不可取。数值越低切得越细,参考经验值10 | +| min_duration | `--min-duration` | number | 否 | - | 单个切片最小时长(秒),参考经验值3,应小于等于max_duration | +| max_duration | `--max-duration` | number | 否 | - | 单个切片最大时长(秒),参考经验值30,应大于等于min_duration | +| callback_args | `--callback-args` | string | 否 | - | 可选,回调参数 | +| client_token | `--client-token` | string | 否 | - | 可选,用于幂等,默认幂等,用户可根据需求进行调整 | + +## 调用示例 +```bash +mediakit-cli video segment-scenes \ + --video-url https://example.com/video_url \ + --enable-clip-fade \ + --segment-threshold 10 \ + --min-duration 3 \ + --max-duration 30 \ + --callback-args sample-callback-args \ + --client-token demo-client-token +``` + +## 输出格式 +```json +{ + "task_id": "task_demo_001", + "request_id": "req_demo_001" +} +``` + +## 任务结果查询 +提交成功后会返回 `task_id`,再执行 `mediakit-cli shared query-task --task-id ` 查询。 + +- 当前命令:`mediakit-cli video segment-scenes` +- 推荐查询:`mediakit-cli shared query-task --task-id ` diff --git a/skills/byted-mediakit-video/reference/shared.md b/skills/byted-mediakit-video/reference/shared.md index 74a8d93..2f08290 100644 --- a/skills/byted-mediakit-video/reference/shared.md +++ b/skills/byted-mediakit-video/reference/shared.md @@ -161,6 +161,12 @@ mediakit-cli shared query-task --task-id - local 模式下不支持 query-task - 当前本轮能力以云端执行为主;如需显式声明,请优先使用 `--cloud` +### Cloud 模式媒体输入补充 + +- 当命令以 `--cloud` 或 `cloud-first` 策略执行时,媒体输入参数(如 `video_url`、`audio_url`、`image_url`、`subtitle_url`、`sub_image_url` 及对应数组/对象子字段)可传入 `http://` / `https://` URL、`mediakit://...` file_id 或本地文件路径 +- `http://` / `https://` URL 与 `mediakit://...` file_id 会原样提交;本地文件路径会由 CLI 先上传为 `mediakit://...` file_id,再提交给云端工具 +- 各工具 reference 中的参数说明来自 APIHub/OpenAPI 原始字段描述;若其中写有公网 URL 或 HTTP/HTTPS URL,表示云端 API 最终接收的资源形态,不限制 CLI cloud 模式的本地路径预处理能力 + ### Local 模式补充 - 本地输出目录优先级:`--output-path` > `MEDIAKIT_OUTPUT_PATH` > config `output_path` > `~/.mediakit/temp` diff --git a/skills/byted-mediakit-video/reference/video-ocr.md b/skills/byted-mediakit-video/reference/video-ocr.md new file mode 100644 index 0000000..86c60b9 --- /dev/null +++ b/skills/byted-mediakit-video/reference/video-ocr.md @@ -0,0 +1,47 @@ +# 视频识别字幕(OCR) + +## 能力描述 +识别视频画面中的字幕/文字内容,输出带时间戳的字幕片段。 +支持格式:主流视频格式如 mp4、flv、ts、avi、mov、wmv、mkv。 + +## 执行方式 + +| 项目 | 说明 | +|------|------| +| Domain | `video` | +| Tool | `video-ocr` | +| 是否异步 | `是` | +| 是否支持 local | `否` | +| 模式说明 | cloud only;可通过 `--cloud` 强制当前调用 | +| 幂等行为 | 如命令支持 `client_token` 与 `callback_args`,重试时复用同一组值;强制重跑时更换新的 `client_token` | + +## 参数 +| 参数 | CLI flag | 类型 | 必填 | 默认值 | 说明 | +|------|----------|------|------|--------|------| +| video_url | `--video-url` | string | 是 | - | 输入视频 Url(需公网可访问) | +| mode | `--mode` | string | 否 | Subtitle | 工作模式(Subtitle: 识别字幕文本;Detailed: 识别更详细文本信息) | +| callback_args | `--callback-args` | string | 否 | - | 可选,回调参数 | +| client_token | `--client-token` | string | 否 | - | 可选,用于幂等,默认幂等,用户可根据需求进行调整 | + +## 调用示例 +```bash +mediakit-cli video video-ocr \ + --video-url https://example.com/video_url \ + --mode Subtitle \ + --callback-args sample-callback-args \ + --client-token demo-client-token +``` + +## 输出格式 +```json +{ + "task_id": "task_demo_001", + "request_id": "req_demo_001" +} +``` + +## 任务结果查询 +提交成功后会返回 `task_id`,再执行 `mediakit-cli shared query-task --task-id ` 查询。 + +- 当前命令:`mediakit-cli video video-ocr` +- 推荐查询:`mediakit-cli shared query-task --task-id `