diff --git a/shortcuts/mail/mail_messages.go b/shortcuts/mail/mail_messages.go index 717562248..4f957c847 100644 --- a/shortcuts/mail/mail_messages.go +++ b/shortcuts/mail/mail_messages.go @@ -5,6 +5,9 @@ package mail import ( "context" + "fmt" + "regexp" + "strings" "github.com/larksuite/cli/internal/output" "github.com/larksuite/cli/shortcuts/common" @@ -35,7 +38,11 @@ var MailMessages = common.Shortcut{ {Name: "print-output-schema", Type: "bool", Desc: "Print output field reference (run this first to learn field names before parsing output)"}, }, Validate: func(ctx context.Context, runtime *common.RuntimeContext) error { - return validateBotMailboxNotMe(runtime) + if err := validateBotMailboxNotMe(runtime); err != nil { + return err + } + messageIDs := splitByComma(runtime.Str("message-ids")) + return validateMessageIDs(messageIDs) }, DryRun: func(ctx context.Context, runtime *common.RuntimeContext) *common.DryRunAPI { mailboxID := resolveMailboxID(runtime) @@ -86,3 +93,95 @@ var MailMessages = common.Shortcut{ return nil }, } + +// messageIDPattern matches a single message ID after cleaning: non-empty, +// no spaces, no brackets, no colons. Message IDs from the Lark mail API are +// opaque strings (typically hex or alphanumeric), so any character that +// suggests structural content (brackets, colons) is rejected. +var messageIDPattern = regexp.MustCompile(`^[^\s\[\]:]+$`) + +// commonEnglishWords are words that indicate the input is natural language +// rather than opaque message IDs. The check is case-insensitive. +var commonEnglishWords = []string{ + "the", "and", "for", "are", "but", "not", "you", "all", "can", "had", + "her", "was", "one", "our", "out", "get", "has", "how", "its", "may", + "new", "now", "old", "see", "way", "who", "did", "let", "say", + "she", "too", "use", "from", "with", "this", "that", "have", + "will", "been", "they", "what", "about", "would", "could", "their", + "which", "there", "these", "other", "should", "please", "message", + "email", "subject", "fetch", "read", "list", "send", "reply", "forward", +} + +// validateMessageIDs validates each individual message ID after comma splitting. +// It rejects IDs that are clearly illegal before they reach the batch_get API: +// - empty or whitespace-only +// - wrapped in literal quotes (stripped before further validation) +// - look like a JSON array string +// - contain colon separators +// - contain spaces (likely natural language) +// - match common English words (likely natural language) +// - don't match a reasonable message ID pattern +func validateMessageIDs(ids []string) error { + if len(ids) == 0 { + return nil // empty list is handled by the Execute function + } + var invalid []string + for _, raw := range ids { + if reason := validateSingleMessageID(raw); reason != "" { + invalid = append(invalid, reason) + } + } + if len(invalid) > 0 { + return output.ErrValidation("invalid --message-ids: %s", strings.Join(invalid, "; ")) + } + return nil +} + +// validateSingleMessageID returns an empty string if the ID is valid, or a +// human-readable reason if it is invalid. It applies cleaning (quote +// stripping) before validation. +func validateSingleMessageID(raw string) string { + id := strings.TrimSpace(raw) + + // Strip surrounding literal quotes (both single and double). + if len(id) >= 2 { + if (id[0] == '"' && id[len(id)-1] == '"') || (id[0] == '\'' && id[len(id)-1] == '\'') { + id = strings.TrimSpace(id[1 : len(id)-1]) + } + } + + // Reject empty or whitespace-only after trim. + if id == "" { + return fmt.Sprintf("%q: empty or whitespace-only", raw) + } + + // Reject JSON array strings (e.g. "[\"id1\",\"id2\"]"). + if strings.HasPrefix(id, "[") && strings.HasSuffix(id, "]") { + return fmt.Sprintf("%q: looks like a JSON array, not a single message ID", raw) + } + + // Reject colon-separated IDs (e.g. "id1:id2:id3"). + if strings.Contains(id, ":") { + return fmt.Sprintf("%q: contains colon separators (multiple IDs concatenated)", raw) + } + + // Reject IDs with spaces — likely natural language or malformed input. + if strings.Contains(id, " ") { + return fmt.Sprintf("%q: contains spaces (expected opaque identifier)", raw) + } + + // Reject IDs that look like natural language: common English words. + lower := strings.ToLower(id) + for _, word := range commonEnglishWords { + if lower == word { + return fmt.Sprintf("%q: looks like natural language, not a message ID", raw) + } + } + + // Final pattern check: non-empty, no spaces, no brackets, no colons. + if !messageIDPattern.MatchString(id) { + return fmt.Sprintf("%q: contains invalid characters (spaces, brackets, or colons)", raw) + } + + return "" +} diff --git a/shortcuts/mail/mail_messages_test.go b/shortcuts/mail/mail_messages_test.go new file mode 100644 index 000000000..413dc4d44 --- /dev/null +++ b/shortcuts/mail/mail_messages_test.go @@ -0,0 +1,238 @@ +// Copyright (c) 2026 Lark Technologies Pte. Ltd. +// SPDX-License-Identifier: MIT + +package mail + +import ( + "strings" + "testing" +) + +func TestValidateMessageIDs(t *testing.T) { + tests := []struct { + name string + ids []string + wantErr bool + wantSubstr string + }{ + { + name: "empty list passes", + ids: []string{}, + wantErr: false, + }, + { + name: "valid single ID passes", + ids: []string{"msg_abc123"}, + wantErr: false, + }, + { + name: "valid multiple IDs pass", + ids: []string{"msg_abc123", "msg_def456", "msg_ghi789"}, + wantErr: false, + }, + { + name: "valid hex ID passes", + ids: []string{"a1b2c3d4e5f6"}, + wantErr: false, + }, + { + name: "valid ID with underscores and dashes passes", + ids: []string{"msg_abc-123_def"}, + wantErr: false, + }, + { + name: "empty string rejected", + ids: []string{""}, + wantErr: true, + wantSubstr: "empty or whitespace-only", + }, + { + name: "whitespace-only rejected", + ids: []string{" "}, + wantErr: true, + wantSubstr: "empty or whitespace-only", + }, + { + name: "natural language word rejected", + ids: []string{"message"}, + wantErr: true, + wantSubstr: "natural language", + }, + { + name: "natural language phrase rejected", + ids: []string{"please read this email"}, + wantErr: true, + wantSubstr: "contains spaces", + }, + { + name: "JSON array string rejected", + ids: []string{`["id1","id2"]`}, + wantErr: true, + wantSubstr: "JSON array", + }, + { + name: "JSON array string with spaces rejected", + ids: []string{`[ "id1", "id2" ]`}, + wantErr: true, + wantSubstr: "JSON array", + }, + { + name: "double-quoted valid ID passes after quote stripping", + ids: []string{`"msg_abc123"`}, + wantErr: false, + }, + { + name: "single-quoted valid ID passes after quote stripping", + ids: []string{`'msg_abc123'`}, + wantErr: false, + }, + { + name: "double-quoted natural language rejected after stripping", + ids: []string{`"message"`}, + wantErr: true, + wantSubstr: "natural language", + }, + { + name: "single-quoted natural language rejected after stripping", + ids: []string{`'email'`}, + wantErr: true, + wantSubstr: "natural language", + }, + { + name: "ID that just looks like quotes but isn't still valid", + ids: []string{"msg_abc'123"}, + wantErr: false, + }, + { + name: "colon-separated IDs rejected", + ids: []string{"id1:id2:id3"}, + wantErr: true, + wantSubstr: "colon separators", + }, + { + name: "mixed valid and invalid reports invalid ones", + ids: []string{"msg_valid123", "the", "msg_another456"}, + wantErr: true, + wantSubstr: "natural language", + }, + { + name: "double-quoted empty rejected", + ids: []string{`""`}, + wantErr: true, + wantSubstr: "empty or whitespace-only", + }, + { + name: "single-quoted empty rejected", + ids: []string{`''`}, + wantErr: true, + wantSubstr: "empty or whitespace-only", + }, + { + name: "natural language word 'email' rejected", + ids: []string{"email"}, + wantErr: true, + wantSubstr: "natural language", + }, + { + name: "natural language word 'subject' rejected", + ids: []string{"subject"}, + wantErr: true, + wantSubstr: "natural language", + }, + { + name: "natural language word 'fetch' rejected", + ids: []string{"fetch"}, + wantErr: true, + wantSubstr: "natural language", + }, + { + name: "numeric ID passes", + ids: []string{"1234567890"}, + wantErr: false, + }, + { + name: "ID with uppercase passes", + ids: []string{"MSG_ABC123DEF"}, + wantErr: false, + }, + { + name: "realistic Lark message ID passes", + ids: []string{"gmxxxxxxxxxxxxxx"}, + wantErr: false, + }, + { + name: "multiple invalid IDs all reported", + ids: []string{"the", "email", "id1:id2"}, + wantErr: true, + wantSubstr: "natural language", + }, + { + name: "double-quoted whitespace-only rejected", + ids: []string{`" "`}, + wantErr: true, + wantSubstr: "empty or whitespace-only", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + err := validateMessageIDs(tt.ids) + if (err != nil) != tt.wantErr { + t.Errorf("validateMessageIDs(%v) error = %v, wantErr %v", tt.ids, err, tt.wantErr) + return + } + if err != nil && tt.wantSubstr != "" { + if !strings.Contains(err.Error(), tt.wantSubstr) { + t.Errorf("validateMessageIDs(%v) error = %v, want substr %q", tt.ids, err, tt.wantSubstr) + } + } + }) + } +} + +func TestValidateSingleMessageID(t *testing.T) { + tests := []struct { + name string + raw string + wantOK bool + }{ + {name: "valid hex ID", raw: "a1b2c3d4", wantOK: true}, + {name: "valid prefixed ID", raw: "msg_abc123", wantOK: true}, + {name: "empty string", raw: "", wantOK: false}, + {name: "whitespace only", raw: " ", wantOK: false}, + {name: "tab only", raw: "\t", wantOK: false}, + {name: "natural language phrase", raw: "please read my email", wantOK: false}, + {name: "JSON array", raw: `["id1","id2"]`, wantOK: false}, + {name: "colon separated", raw: "id1:id2:id3", wantOK: false}, + {name: "double quoted valid ID passes after strip", raw: `"msg_abc"`, wantOK: true}, + {name: "single quoted valid ID passes after strip", raw: `'msg_abc'`, wantOK: true}, + {name: "double quoted natural language rejected after strip", raw: `"message"`, wantOK: false}, + {name: "single quoted natural language rejected after strip", raw: `'email'`, wantOK: false}, + {name: "word: message", raw: "message", wantOK: false}, + {name: "word: email", raw: "email", wantOK: false}, + {name: "word: subject", raw: "subject", wantOK: false}, + {name: "word: please", raw: "please", wantOK: false}, + {name: "word: THE", raw: "THE", wantOK: false}, + {name: "numeric ID", raw: "1234567890", wantOK: true}, + {name: "ID with dash", raw: "msg-abc-123", wantOK: true}, + {name: "ID with dot", raw: "msg.abc.123", wantOK: true}, + {name: "ID with underscore", raw: "msg_abc_123", wantOK: true}, + {name: "double quoted empty", raw: `""`, wantOK: false}, + {name: "single quoted empty", raw: `''`, wantOK: false}, + {name: "double quoted whitespace", raw: `" "`, wantOK: false}, + {name: "double quoted colon-separated rejected after strip", raw: `"a:b:c"`, wantOK: false}, + {name: "double quoted JSON array rejected", raw: `"[]"`, wantOK: false}, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + reason := validateSingleMessageID(tt.raw) + if tt.wantOK && reason != "" { + t.Errorf("validateSingleMessageID(%q) = %q, want empty (valid)", tt.raw, reason) + } + if !tt.wantOK && reason == "" { + t.Errorf("validateSingleMessageID(%q) = empty, want rejection reason", tt.raw) + } + }) + } +}