Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
101 changes: 100 additions & 1 deletion shortcuts/mail/mail_messages.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,9 @@

import (
"context"
"fmt"
"regexp"
"strings"

"github.com/larksuite/cli/internal/output"
"github.com/larksuite/cli/shortcuts/common"
Expand Down Expand Up @@ -35,7 +38,11 @@
{Name: "print-output-schema", Type: "bool", Desc: "Print output field reference (run this first to learn field names before parsing output)"},
},
Validate: func(ctx context.Context, runtime *common.RuntimeContext) error {
return validateBotMailboxNotMe(runtime)
if err := validateBotMailboxNotMe(runtime); err != nil {
return err
}
messageIDs := splitByComma(runtime.Str("message-ids"))
return validateMessageIDs(messageIDs)
},
DryRun: func(ctx context.Context, runtime *common.RuntimeContext) *common.DryRunAPI {
mailboxID := resolveMailboxID(runtime)
Expand Down Expand Up @@ -86,3 +93,95 @@
return nil
},
}

// messageIDPattern matches a single message ID after cleaning: non-empty,
// no spaces, no brackets, no colons. Message IDs from the Lark mail API are
// opaque strings (typically hex or alphanumeric), so any character that
// suggests structural content (brackets, colons) is rejected.
var messageIDPattern = regexp.MustCompile(`^[^\s\[\]:]+$`)

// commonEnglishWords are words that indicate the input is natural language
// rather than opaque message IDs. The check is case-insensitive.
var commonEnglishWords = []string{
"the", "and", "for", "are", "but", "not", "you", "all", "can", "had",
"her", "was", "one", "our", "out", "get", "has", "how", "its", "may",
"new", "now", "old", "see", "way", "who", "did", "let", "say",
"she", "too", "use", "from", "with", "this", "that", "have",
"will", "been", "they", "what", "about", "would", "could", "their",
"which", "there", "these", "other", "should", "please", "message",
"email", "subject", "fetch", "read", "list", "send", "reply", "forward",
}

// validateMessageIDs validates each individual message ID after comma splitting.
// It rejects IDs that are clearly illegal before they reach the batch_get API:
// - empty or whitespace-only
// - wrapped in literal quotes (stripped before further validation)
// - look like a JSON array string
// - contain colon separators
// - contain spaces (likely natural language)
// - match common English words (likely natural language)
// - don't match a reasonable message ID pattern
func validateMessageIDs(ids []string) error {
if len(ids) == 0 {
return nil // empty list is handled by the Execute function
}
var invalid []string
for _, raw := range ids {
if reason := validateSingleMessageID(raw); reason != "" {
invalid = append(invalid, reason)
}
}
if len(invalid) > 0 {
return output.ErrValidation("invalid --message-ids: %s", strings.Join(invalid, "; "))
}
return nil
}

// validateSingleMessageID returns an empty string if the ID is valid, or a
// human-readable reason if it is invalid. It applies cleaning (quote
// stripping) before validation.
func validateSingleMessageID(raw string) string {
id := strings.TrimSpace(raw)

// Strip surrounding literal quotes (both single and double).
if len(id) >= 2 {
if (id[0] == '"' && id[len(id)-1] == '"') || (id[0] == '\'' && id[len(id)-1] == '\'') {
id = strings.TrimSpace(id[1 : len(id)-1])
}
}

// Reject empty or whitespace-only after trim.
if id == "" {
return fmt.Sprintf("%q: empty or whitespace-only", raw)
}

// Reject JSON array strings (e.g. "[\"id1\",\"id2\"]").
if strings.HasPrefix(id, "[") && strings.HasSuffix(id, "]") {
return fmt.Sprintf("%q: looks like a JSON array, not a single message ID", raw)
}

// Reject colon-separated IDs (e.g. "id1:id2:id3").
if strings.Contains(id, ":") {
return fmt.Sprintf("%q: contains colon separators (multiple IDs concatenated)", raw)
}

// Reject IDs with spaces — likely natural language or malformed input.
if strings.Contains(id, " ") {
return fmt.Sprintf("%q: contains spaces (expected opaque identifier)", raw)
}

// Reject IDs that look like natural language: common English words.
lower := strings.ToLower(id)
for _, word := range commonEnglishWords {
if lower == word {
return fmt.Sprintf("%q: looks like natural language, not a message ID", raw)
}
}

// Final pattern check: non-empty, no spaces, no brackets, no colons.
if !messageIDPattern.MatchString(id) {
return fmt.Sprintf("%q: contains invalid characters (spaces, brackets, or colons)", raw)

Check warning on line 183 in shortcuts/mail/mail_messages.go

View check run for this annotation

Codecov / codecov/patch

shortcuts/mail/mail_messages.go#L183

Added line #L183 was not covered by tests
}

return ""
}
238 changes: 238 additions & 0 deletions shortcuts/mail/mail_messages_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,238 @@
// Copyright (c) 2026 Lark Technologies Pte. Ltd.
// SPDX-License-Identifier: MIT

package mail

import (
"strings"
"testing"
)

func TestValidateMessageIDs(t *testing.T) {
tests := []struct {
name string
ids []string
wantErr bool
wantSubstr string
}{
{
name: "empty list passes",
ids: []string{},
wantErr: false,
},
{
name: "valid single ID passes",
ids: []string{"msg_abc123"},
wantErr: false,
},
{
name: "valid multiple IDs pass",
ids: []string{"msg_abc123", "msg_def456", "msg_ghi789"},
wantErr: false,
},
{
name: "valid hex ID passes",
ids: []string{"a1b2c3d4e5f6"},
wantErr: false,
},
{
name: "valid ID with underscores and dashes passes",
ids: []string{"msg_abc-123_def"},
wantErr: false,
},
{
name: "empty string rejected",
ids: []string{""},
wantErr: true,
wantSubstr: "empty or whitespace-only",
},
{
name: "whitespace-only rejected",
ids: []string{" "},
wantErr: true,
wantSubstr: "empty or whitespace-only",
},
{
name: "natural language word rejected",
ids: []string{"message"},
wantErr: true,
wantSubstr: "natural language",
},
{
name: "natural language phrase rejected",
ids: []string{"please read this email"},
wantErr: true,
wantSubstr: "contains spaces",
},
{
name: "JSON array string rejected",
ids: []string{`["id1","id2"]`},
wantErr: true,
wantSubstr: "JSON array",
},
{
name: "JSON array string with spaces rejected",
ids: []string{`[ "id1", "id2" ]`},
wantErr: true,
wantSubstr: "JSON array",
},
{
name: "double-quoted valid ID passes after quote stripping",
ids: []string{`"msg_abc123"`},
wantErr: false,
},
{
name: "single-quoted valid ID passes after quote stripping",
ids: []string{`'msg_abc123'`},
wantErr: false,
},
{
name: "double-quoted natural language rejected after stripping",
ids: []string{`"message"`},
wantErr: true,
wantSubstr: "natural language",
},
{
name: "single-quoted natural language rejected after stripping",
ids: []string{`'email'`},
wantErr: true,
wantSubstr: "natural language",
},
{
name: "ID that just looks like quotes but isn't still valid",
ids: []string{"msg_abc'123"},
wantErr: false,
},
{
name: "colon-separated IDs rejected",
ids: []string{"id1:id2:id3"},
wantErr: true,
wantSubstr: "colon separators",
},
{
name: "mixed valid and invalid reports invalid ones",
ids: []string{"msg_valid123", "the", "msg_another456"},
wantErr: true,
wantSubstr: "natural language",
},
{
name: "double-quoted empty rejected",
ids: []string{`""`},
wantErr: true,
wantSubstr: "empty or whitespace-only",
},
{
name: "single-quoted empty rejected",
ids: []string{`''`},
wantErr: true,
wantSubstr: "empty or whitespace-only",
},
{
name: "natural language word 'email' rejected",
ids: []string{"email"},
wantErr: true,
wantSubstr: "natural language",
},
{
name: "natural language word 'subject' rejected",
ids: []string{"subject"},
wantErr: true,
wantSubstr: "natural language",
},
{
name: "natural language word 'fetch' rejected",
ids: []string{"fetch"},
wantErr: true,
wantSubstr: "natural language",
},
{
name: "numeric ID passes",
ids: []string{"1234567890"},
wantErr: false,
},
{
name: "ID with uppercase passes",
ids: []string{"MSG_ABC123DEF"},
wantErr: false,
},
{
name: "realistic Lark message ID passes",
ids: []string{"gmxxxxxxxxxxxxxx"},
wantErr: false,
},
{
name: "multiple invalid IDs all reported",
ids: []string{"the", "email", "id1:id2"},
wantErr: true,
wantSubstr: "natural language",
},
{
name: "double-quoted whitespace-only rejected",
ids: []string{`" "`},
wantErr: true,
wantSubstr: "empty or whitespace-only",
},
}

for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
err := validateMessageIDs(tt.ids)
if (err != nil) != tt.wantErr {
t.Errorf("validateMessageIDs(%v) error = %v, wantErr %v", tt.ids, err, tt.wantErr)
return
}
if err != nil && tt.wantSubstr != "" {
if !strings.Contains(err.Error(), tt.wantSubstr) {
t.Errorf("validateMessageIDs(%v) error = %v, want substr %q", tt.ids, err, tt.wantSubstr)
}
}
})
}
}

func TestValidateSingleMessageID(t *testing.T) {
tests := []struct {
name string
raw string
wantOK bool
}{
{name: "valid hex ID", raw: "a1b2c3d4", wantOK: true},
{name: "valid prefixed ID", raw: "msg_abc123", wantOK: true},
{name: "empty string", raw: "", wantOK: false},
{name: "whitespace only", raw: " ", wantOK: false},
{name: "tab only", raw: "\t", wantOK: false},
{name: "natural language phrase", raw: "please read my email", wantOK: false},
{name: "JSON array", raw: `["id1","id2"]`, wantOK: false},
{name: "colon separated", raw: "id1:id2:id3", wantOK: false},
{name: "double quoted valid ID passes after strip", raw: `"msg_abc"`, wantOK: true},
{name: "single quoted valid ID passes after strip", raw: `'msg_abc'`, wantOK: true},
{name: "double quoted natural language rejected after strip", raw: `"message"`, wantOK: false},
{name: "single quoted natural language rejected after strip", raw: `'email'`, wantOK: false},
{name: "word: message", raw: "message", wantOK: false},
{name: "word: email", raw: "email", wantOK: false},
{name: "word: subject", raw: "subject", wantOK: false},
{name: "word: please", raw: "please", wantOK: false},
{name: "word: THE", raw: "THE", wantOK: false},
{name: "numeric ID", raw: "1234567890", wantOK: true},
{name: "ID with dash", raw: "msg-abc-123", wantOK: true},
{name: "ID with dot", raw: "msg.abc.123", wantOK: true},
{name: "ID with underscore", raw: "msg_abc_123", wantOK: true},
{name: "double quoted empty", raw: `""`, wantOK: false},
{name: "single quoted empty", raw: `''`, wantOK: false},
{name: "double quoted whitespace", raw: `" "`, wantOK: false},
{name: "double quoted colon-separated rejected after strip", raw: `"a:b:c"`, wantOK: false},
{name: "double quoted JSON array rejected", raw: `"[]"`, wantOK: false},
}

for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
reason := validateSingleMessageID(tt.raw)
if tt.wantOK && reason != "" {
t.Errorf("validateSingleMessageID(%q) = %q, want empty (valid)", tt.raw, reason)
}
if !tt.wantOK && reason == "" {
t.Errorf("validateSingleMessageID(%q) = empty, want rejection reason", tt.raw)
}
})
}
}
Loading