-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathextension.js
More file actions
351 lines (292 loc) · 11.2 KB
/
extension.js
File metadata and controls
351 lines (292 loc) · 11.2 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
const vscode = require('vscode');
const fs = require('fs');
const path = require('path');
/**
* Detects the most common indentation used in a JSON string with high accuracy
* @param {string} text - The JSON text to analyze
* @returns {string|number} - The detected indentation (string for tabs, number for spaces)
*/
function detectIndentation(text) {
const lines = text.split('\n');
const indentationSamples = [];
const tabCount = [];
let hasTabIndentation = false;
let hasSpaceIndentation = false;
// Analyze each line for indentation patterns
for (let i = 0; i < lines.length; i++) {
const line = lines[i];
const trimmed = line.trim();
// Skip empty lines and lines with only closing brackets/braces
if (trimmed === '' || trimmed.match(/^[\}\]],?$/)) {
continue;
}
// Check for leading whitespace
const match = line.match(/^(\s+)/);
if (match) {
const whitespace = match[1];
// Check for tabs
if (whitespace.includes('\t')) {
hasTabIndentation = true;
const tabOnlyMatch = whitespace.match(/^\t+/);
if (tabOnlyMatch) {
tabCount.push(tabOnlyMatch[0].length);
}
}
// Check for spaces (only if no tabs in this line)
if (!whitespace.includes('\t') && whitespace.length > 0) {
hasSpaceIndentation = true;
indentationSamples.push(whitespace.length);
}
}
}
// If we found both tabs and spaces, determine which is more common
if (hasTabIndentation && hasSpaceIndentation) {
// Count lines with tabs vs spaces
let tabLines = 0;
let spaceLines = 0;
for (let i = 0; i < lines.length; i++) {
const line = lines[i];
const match = line.match(/^(\s+)/);
if (match) {
if (match[1].includes('\t')) {
tabLines++;
} else {
spaceLines++;
}
}
}
// Use the more common indentation type
if (tabLines > spaceLines) {
return '\t';
}
// Continue with space analysis below
} else if (hasTabIndentation) {
return '\t';
}
// Analyze space indentation patterns
if (indentationSamples.length === 0) {
return 2; // Default fallback
}
// Find the most likely base indentation unit
const possibleUnits = [2, 3, 4, 5, 6, 8];
const unitScores = new Map();
// Score each possible unit based on how well it explains the indentation samples
for (const unit of possibleUnits) {
let score = 0;
const levelCounts = new Map();
for (const sample of indentationSamples) {
const level = Math.round(sample / unit);
const expectedIndent = level * unit;
// Give higher score for exact matches
if (sample === expectedIndent) {
score += 10;
levelCounts.set(level, (levelCounts.get(level) || 0) + 1);
}
// Give lower score for close matches (off by 1)
else if (Math.abs(sample - expectedIndent) <= 1) {
score += 3;
levelCounts.set(level, (levelCounts.get(level) || 0) + 1);
}
// Penalize for poor matches
else if (Math.abs(sample - expectedIndent) > 2) {
score -= 2;
}
}
// Bonus for having multiple samples at the same level (consistency)
for (const count of levelCounts.values()) {
if (count > 1) {
score += count * 2;
}
}
unitScores.set(unit, score);
}
// Find the unit with the highest score
let bestUnit = 2;
let bestScore = -Infinity;
for (const [unit, score] of unitScores) {
if (score > bestScore) {
bestScore = score;
bestUnit = unit;
}
}
// Additional validation: check if the best unit makes sense
if (bestScore > 0) {
// Verify that the majority of samples are consistent with this unit
let consistentSamples = 0;
for (const sample of indentationSamples) {
const level = Math.round(sample / bestUnit);
const expectedIndent = level * bestUnit;
if (Math.abs(sample - expectedIndent) <= 1) {
consistentSamples++;
}
}
// If less than 60% of samples are consistent, fall back to most common sample
if (consistentSamples / indentationSamples.length < 0.6) {
return getMostCommonIndentation(indentationSamples);
}
}
return bestUnit;
}
/**
* Fallback method: find the most frequently occurring indentation
* @param {number[]} samples - Array of indentation samples
* @returns {number} - Most common indentation
*/
function getMostCommonIndentation(samples) {
const frequency = new Map();
for (const sample of samples) {
frequency.set(sample, (frequency.get(sample) || 0) + 1);
}
let mostCommon = 2;
let maxCount = 0;
for (const [indent, count] of frequency) {
if (count > maxCount) {
maxCount = count;
mostCommon = indent;
}
}
return mostCommon;
}
/**
* Enhanced version with debugging info (optional)
* @param {string} text - The JSON text to analyze
* @param {boolean} debug - Whether to log debugging information
* @returns {object} - Object with detected indentation and debug info
*/
function detectIndentationWithDebug(text, debug = false) {
const lines = text.split('\n');
const indentationSamples = [];
const debugInfo = {
totalLines: lines.length,
analyzedLines: 0,
samples: [],
hasTabIndentation: false,
hasSpaceIndentation: false
};
// Collect all indentation samples
for (let i = 0; i < lines.length; i++) {
const line = lines[i];
const trimmed = line.trim();
if (trimmed === '' || trimmed.match(/^[\}\]],?$/)) {
continue;
}
const match = line.match(/^(\s+)/);
if (match) {
const whitespace = match[1];
debugInfo.analyzedLines++;
if (whitespace.includes('\t')) {
debugInfo.hasTabIndentation = true;
if (debug) debugInfo.samples.push(`Line ${i + 1}: TAB`);
} else {
debugInfo.hasSpaceIndentation = true;
indentationSamples.push(whitespace.length);
if (debug) debugInfo.samples.push(`Line ${i + 1}: ${whitespace.length} spaces`);
}
}
}
const detectedIndentation = detectIndentation(text);
if (debug) {
console.log('Indentation Detection Debug:', debugInfo);
console.log('Detected indentation:', detectedIndentation);
}
return {
indentation: detectedIndentation,
debugInfo: debugInfo
};
}
/**
* @param {vscode.ExtensionContext} context
*/
function activate(context) {
console.log('Deduplicate Objects from JSON is now active');
let disposable = vscode.commands.registerCommand('deduplicate-json-objects.removeDuplicates', async function () {
// Get the active text editor
const editor = vscode.window.activeTextEditor;
if (!editor) {
vscode.window.showErrorMessage('No active editor found. Please open a JSON file.');
return;
}
// Check if the file is a JSON file
const fileName = editor.document.fileName;
if (path.extname(fileName).toLowerCase() !== '.json') {
vscode.window.showErrorMessage('The active file is not a JSON file.');
return;
}
try {
// Get the content of the file
const document = editor.document;
const text = document.getText();
let jsonData;
// Detect original indentation with enhanced accuracy
const originalIndentation = detectIndentation(text);
try {
jsonData = JSON.parse(text);
} catch (e) {
vscode.window.showErrorMessage('Invalid JSON format: ' + e.message);
return;
}
// Check if the JSON is an array of objects
if (!Array.isArray(jsonData)) {
vscode.window.showErrorMessage('The JSON file must contain an array of objects.');
return;
}
if (jsonData.length === 0) {
vscode.window.showInformationMessage('The JSON array is empty. Nothing to deduplicate.');
return;
}
// Get available keys to compare for duplication
const sampleObj = jsonData[0];
const keys = Object.keys(sampleObj);
// Ask the user which key to use for comparison
const selectedKey = await vscode.window.showQuickPick(keys, {
placeHolder: 'Select a property to use for duplicate comparison',
canPickMany: false
});
if (!selectedKey) {
// User cancelled the operation
return;
}
// Perform deduplication
const uniqueMap = new Map();
const uniqueData = [];
for (const item of jsonData) {
const key = item[selectedKey];
if (key === undefined) {
continue; // Skip items that don't have the selected key
}
// Convert to string to handle objects or arrays as key values
const keyStr = JSON.stringify(key);
if (!uniqueMap.has(keyStr)) {
uniqueMap.set(keyStr, true);
uniqueData.push(item);
}
}
// Count removed duplicates
const removedCount = jsonData.length - uniqueData.length;
// Format the output JSON with the detected indentation
const resultJson = JSON.stringify(uniqueData, null, originalIndentation);
// Edit the file with the deduplicated JSON
const fullRange = new vscode.Range(
document.positionAt(0),
document.positionAt(text.length)
);
const edit = new vscode.WorkspaceEdit();
edit.replace(document.uri, fullRange, resultJson);
await vscode.workspace.applyEdit(edit);
// Show success message
vscode.window.showInformationMessage(
`Removed ${removedCount} duplicate(s) based on "${selectedKey}". ${uniqueData.length} unique item(s) remain.`
);
} catch (error) {
vscode.window.showErrorMessage('Error processing JSON: ' + error.message);
}
});
context.subscriptions.push(disposable);
}
function deactivate() {}
module.exports = {
activate,
deactivate,
detectIndentation,
detectIndentationWithDebug
};