codebuff/common/src/util/string.ts at cea19b46613c030ba5a534678495adf01919860d · CodebuffAI/codebuff · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
import { sumBy } from './lodash-replacements'

export const truncateString = (str: string, maxLength: number) => {
  if (str.length <= maxLength) {
    return str
  }
  return str.slice(0, maxLength) + '...'
}

export const truncateStringWithMessage = ({
  str,
  maxLength,
  message = 'TRUNCATED DUE TO LENGTH',
  remove = 'END',
}: {
  str: string
  maxLength: number
  message?: string
  remove?: 'END' | 'START' | 'MIDDLE'
}) => {
  if (str.length <= maxLength) {
    return str
  }

  if (remove === 'END') {
    const suffix = `\n[${message}...]`
    return str.slice(0, maxLength - suffix.length) + suffix
  }
  if (remove === 'START') {
    const prefix = `[...${message}]\n`
    return prefix + str.slice(str.length - maxLength + prefix.length)
  }

  const middle = `\n[...${message}...]\n`
  const length = Math.floor((maxLength - middle.length) / 2)
  return str.slice(0, length) + middle + str.slice(-length)
}

/**
 * Check if a character is a whitespace character according
 * to the XML spec (space, carriage return, line feed or tab)
 *
 * @param character Character to check
 * @return Whether the character is whitespace or not
 */
export const isWhitespace = (character: string) => /\s/.test(character)

export const replaceNonStandardPlaceholderComments = (
  content: string,
  replacement: string,
): string => {
  const commentPatterns = [
    // JSX comments (match this first)
    {
      regex:
        /{\s*\/\*\s*\.{3}.*(?:rest|unchanged|keep|file|existing|some).*(?:\s*\.{3})?\s*\*\/\s*}/gi,
      placeholder: replacement,
    },
    // C-style comments (C, C++, Java, JavaScript, TypeScript, etc.)
    {
      regex:
        /\/\/\s*\.{3}.*(?:rest|unchanged|keep|file|existing|some).*(?:\s*\.{3})?/gi,
      placeholder: replacement,
    },
    {
      regex:
        /\/\*\s*\.{3}.*(?:rest|unchanged|keep|file|existing|some).*(?:\s*\.{3})?\s*\*\//gi,
      placeholder: replacement,
    },
    // Python, Ruby, R comments
    {
      regex:
        /#\s*\.{3}.*(?:rest|unchanged|keep|file|existing|some).*(?:\s*\.{3})?/gi,
      placeholder: replacement,
    },
    // HTML-style comments
    {
      regex:
        /<!--\s*\.{3}.*(?:rest|unchanged|keep|file|existing|some).*(?:\s*\.{3})?\s*-->/gi,
      placeholder: replacement,
    },
    // SQL, Haskell, Lua comments
    {
      regex:
        /--\s*\.{3}.*(?:rest|unchanged|keep|file|existing|some).*(?:\s*\.{3})?/gi,
      placeholder: replacement,
    },
    // MATLAB comments
    {
      regex:
        /%\s*\.{3}.*(?:rest|unchanged|keep|file|existing|some).*(?:\s*\.{3})?/gi,
      placeholder: replacement,
    },
  ]

  let updatedContent = content

  for (const { regex, placeholder } of commentPatterns) {
    updatedContent = updatedContent.replaceAll(regex, placeholder)
  }

  return updatedContent
}

export const randBoolFromStr = (str: string) => {
  return sumBy(str.split(''), (char) => char.charCodeAt(0)) % 2 === 0
}

export const pluralize = (count: number, word: string) => {
  if (count === 1) return `${count} ${word}`

  // Handle words ending in f/fe first (before other rules)
  if (word.endsWith('f')) {
    return `${count} ${word.slice(0, -1) + 'ves'}`
  }
  if (word.endsWith('fe')) {
    return `${count} ${word.slice(0, -2) + 'ves'}`
  }

  // Handle words ending in 'y' (unless preceded by a vowel)
  if (word.endsWith('y') && !word.match(/[aeiou]y$/)) {
    return `${count} ${word.slice(0, -1) + 'ies'}`
  }

  // Handle words ending in s, sh, ch, x, z, o
  if (word.match(/[sxz]$/) || word.match(/[cs]h$/) || word.match(/o$/)) {
    return `${count} ${word + 'es'}`
  }

  return `${count} ${word + 's'}`
}

/**
 * Safely replaces all occurrences of a search string with a replacement string,
 * escaping special replacement patterns (like $) in the replacement string.
 */
export const capitalize = (str: string): string => {
  if (!str) return str
  return str.charAt(0).toUpperCase() + str.slice(1).toLowerCase()
}

/**
 * Converts a snake_case string to Title Case
 * Example: "add_subgoal" -> "Add Subgoal"
 */
export const snakeToTitleCase = (str: string): string => {
  return str
    .split('_')
    .map((word) => capitalize(word))
    .join(' ')
}

/**
 * Ensures a URL has the appropriate protocol (http:// or https://)
 * Uses http:// for localhost and local IPs, https:// for all other domains
 */
export const ensureUrlProtocol = (url: string): string => {
  if (
    url.startsWith('http://') ||
    url.startsWith('https://') ||
    url.startsWith('file://')
  ) {
    return url
  }

  if (url.startsWith('localhost') || url.match(/^127\.\d+\.\d+\.\d+/)) {
    return `http://${url}`
  }

  if (url.startsWith('/')) {
    return `file://${url}`
  }

  return `https://${url}`
}

export const safeReplace = (
  content: string,
  searchStr: string,
  replaceStr: string,
): string => {
  const escapedReplaceStr = replaceStr.replace(/\$/g, '$$$$')
  return content.replace(searchStr, escapedReplaceStr)
}

export const hasLazyEdit = (content: string) => {
  const cleanedContent = content.toLowerCase().trim()
  return (
    cleanedContent.includes('... existing code ...') ||
    cleanedContent.includes('// rest of the') ||
    cleanedContent.includes('# rest of the') ||
    // Match various comment styles with ellipsis and specific words
    /\/\/\s*\.{3}.*(?:rest|unchanged|keep|file|existing|some).*(?:\.{3})?/.test(
      cleanedContent,
    ) || // C-style single line
    /\/\*\s*\.{3}.*(?:rest|unchanged|keep|file|existing|some).*(?:\.{3})?\s*\*\//.test(
      cleanedContent,
    ) || // C-style multi-line
    /#\s*\.{3}.*(?:rest|unchanged|keep|file|existing|some).*(?:\.{3})?/.test(
      cleanedContent,
    ) || // Python/Ruby style
    /<!--\s*\.{3}.*(?:rest|unchanged|keep|file|existing|some).*(?:\.{3})?\s*-->/.test(
      cleanedContent,
    ) || // HTML style
    /--\s*\.{3}.*(?:rest|unchanged|keep|file|existing|some).*(?:\.{3})?/.test(
      cleanedContent,
    ) || // SQL/Haskell style
    /%\s*\.{3}.*(?:rest|unchanged|keep|file|existing|some).*(?:\.{3})?/.test(
      cleanedContent,
    ) || // MATLAB style
    /{\s*\/\*\s*\.{3}.*(?:rest|unchanged|keep|file|existing|some).*(?:\.{3})?\s*\*\/\s*}/.test(
      cleanedContent,
    ) // JSX style
  )
}

/**
 * Extracts a JSON field from a string, transforms it, and puts it back.
 * Handles both array and object JSON values.
 * @param content The string containing JSON-like content
 * @param field The field name to find and transform
 * @param transform Function to transform the parsed JSON value
 * @param fallback String to use if parsing fails
 * @returns The content string with the transformed JSON field
 */
export function transformJsonInString<T = unknown>(
  content: string,
  field: string,
  transform: (json: T) => unknown,
  fallback: string,
): string {
  // Use a non-greedy match for objects/arrays to prevent over-matching
  const pattern = new RegExp(`"${field}"\\s*:\\s*(\\{[^}]*?\\}|\\[[^\\]]*?\\])`)
  const match = content.match(pattern)

  if (!match) {
    return content
  }

  try {
    const json = JSON.parse(match[1])
    const transformed = transform(json)

    // Important: Only replace the exact matched portion to prevent duplicates
    return content.replace(
      match[0],
      `"${field}":${JSON.stringify(transformed)}`,
    )
  } catch (error) {
    // Only replace the exact matched portion even in error case
    return content.replace(match[0], `"${field}":${fallback}`)
  }
}

/**
 * Generates a compact unique identifier by combining timestamp bits with random bits.
 * Uses 40 bits of timestamp (enough for ~34 years) and 24 random bits for exactly 64 total bits.
 * Encodes in base64url for compact, URL-safe strings (~11 chars).
 * @param prefix Optional prefix to add to the ID
 * @returns A unique string ID
 * @example
 * generateCompactId()      // => "1a2b3c4d5e6"
 * generateCompactId('msg-') // => "msg-1a2b3c4d5e6"
 */
export const generateCompactId = (prefix?: string): string => {
  // Get the last 32 bits of the timestamp
  const timestamp = (Date.now() & 0xffffffff) >>> 0
  // Generate a 32-bit random number
  const random = Math.floor(Math.random() * 0x100000000) >>> 0

  // Combine them into a 64-bit representation as two 32-bit numbers
  const high = timestamp
  const low = random

  // Convert to a hex string, pad if necessary, and combine
  const highHex = high.toString(16).padStart(8, '0')
  const lowHex = low.toString(16).padStart(8, '0')

  const combinedHex = highHex + lowHex

  // Convert hex to a Buffer and then to base64url
  const bytes = Buffer.from(combinedHex, 'hex')
  const str = bytes.toString('base64url').replace(/=/g, '')

  return prefix ? `${prefix}${str}` : str
}

/**
 * Removes null characters from a string
 */
export const stripNullChars = (str: string): string => {
  return str.replace(/\u0000/g, '')
}

const ansiColorsRegex = /\x1B\[[0-9;]*m/g
export function stripColors(str: string): string {
  return str.replace(ansiColorsRegex, '')
}

const ansiRegex = /\x1B(?:[@-Z\\-_]|\[[0-?]*[ -/]*[@-~]|\][^\x1B]*\x1B\\?)/g
export function stripAnsi(str: string): string {
  return str.replace(ansiRegex, '')
}

export function includesMatch(
  array: (string | RegExp)[],
  value: string,
): boolean {
  return array.some((p) => {
    if (typeof p === 'string') {
      return p === value
    }
    return p.test(value)
  })
}

/**
 * Finds the longest substring that is **both** a suffix of `source`
 * **and** a prefix of `next`.
 * Useful when concatenating strings while avoiding duplicate overlap.
 *
 * @example
 * ```ts
 * suffixPrefixOverlap('foobar', 'barbaz'); // ➜ 'bar'
 * suffixPrefixOverlap('abc', 'def');       // ➜ ''
 * ```
 *
 * @param source  The string whose **suffix** is inspected.
 * @param next    The string whose **prefix** is inspected.
 * @returns       The longest overlapping edge, or an empty string if none exists.
 */
export function suffixPrefixOverlap(source: string, next: string): string {
  for (let len = next.length; len >= 0; len--) {
    const prefix = next.slice(0, len)
    if (source.endsWith(prefix)) {
      return prefix
    }
  }

  return ''
}

export const escapeString = (str: string) => {
  return JSON.stringify(str).slice(1, -1)
}