1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
|
/**
* @license
* Copyright 2025 Google LLC
* SPDX-License-Identifier: Apache-2.0
*/
import { vi, describe, it, expect, beforeEach, afterEach } from 'vitest';
import { execSync } from 'child_process';
import * as os from 'os';
import { detect as chardetDetect } from 'chardet';
// Mock dependencies
vi.mock('child_process');
vi.mock('os');
vi.mock('chardet');
// Import the functions we want to test after refactoring
import {
getCachedEncodingForBuffer,
getSystemEncoding,
windowsCodePageToEncoding,
detectEncodingFromBuffer,
resetEncodingCache,
} from './systemEncoding.js';
describe('Shell Command Processor - Encoding Functions', () => {
let consoleWarnSpy: ReturnType<typeof vi.spyOn>;
let mockedExecSync: ReturnType<typeof vi.mocked<typeof execSync>>;
let mockedOsPlatform: ReturnType<typeof vi.mocked<() => string>>;
let mockedChardetDetect: ReturnType<typeof vi.mocked<typeof chardetDetect>>;
beforeEach(() => {
consoleWarnSpy = vi.spyOn(console, 'warn').mockImplementation(() => {});
mockedExecSync = vi.mocked(execSync);
mockedOsPlatform = vi.mocked(os.platform);
mockedChardetDetect = vi.mocked(chardetDetect);
// Reset the encoding cache before each test
resetEncodingCache();
// Clear environment variables that might affect tests
delete process.env.LC_ALL;
delete process.env.LC_CTYPE;
delete process.env.LANG;
});
afterEach(() => {
vi.restoreAllMocks();
resetEncodingCache();
});
describe('windowsCodePageToEncoding', () => {
it('should map common Windows code pages correctly', () => {
expect(windowsCodePageToEncoding(437)).toBe('cp437');
expect(windowsCodePageToEncoding(850)).toBe('cp850');
expect(windowsCodePageToEncoding(65001)).toBe('utf-8');
expect(windowsCodePageToEncoding(1252)).toBe('windows-1252');
expect(windowsCodePageToEncoding(932)).toBe('shift_jis');
expect(windowsCodePageToEncoding(936)).toBe('gb2312');
expect(windowsCodePageToEncoding(949)).toBe('euc-kr');
expect(windowsCodePageToEncoding(950)).toBe('big5');
expect(windowsCodePageToEncoding(1200)).toBe('utf-16le');
expect(windowsCodePageToEncoding(1201)).toBe('utf-16be');
});
it('should return null for unmapped code pages and warn', () => {
expect(windowsCodePageToEncoding(99999)).toBe(null);
expect(consoleWarnSpy).toHaveBeenCalledWith(
'Unable to determine encoding for windows code page 99999.',
);
});
it('should handle all Windows-specific code pages', () => {
expect(windowsCodePageToEncoding(874)).toBe('windows-874');
expect(windowsCodePageToEncoding(1250)).toBe('windows-1250');
expect(windowsCodePageToEncoding(1251)).toBe('windows-1251');
expect(windowsCodePageToEncoding(1253)).toBe('windows-1253');
expect(windowsCodePageToEncoding(1254)).toBe('windows-1254');
expect(windowsCodePageToEncoding(1255)).toBe('windows-1255');
expect(windowsCodePageToEncoding(1256)).toBe('windows-1256');
expect(windowsCodePageToEncoding(1257)).toBe('windows-1257');
expect(windowsCodePageToEncoding(1258)).toBe('windows-1258');
});
});
describe('detectEncodingFromBuffer', () => {
it('should detect encoding using chardet successfully', () => {
const buffer = Buffer.from('test content', 'utf8');
mockedChardetDetect.mockReturnValue('UTF-8');
const result = detectEncodingFromBuffer(buffer);
expect(result).toBe('utf-8');
expect(mockedChardetDetect).toHaveBeenCalledWith(buffer);
});
it('should handle chardet returning mixed case encoding', () => {
const buffer = Buffer.from('test content', 'utf8');
mockedChardetDetect.mockReturnValue('ISO-8859-1');
const result = detectEncodingFromBuffer(buffer);
expect(result).toBe('iso-8859-1');
});
it('should return null when chardet fails', () => {
const buffer = Buffer.from('test content', 'utf8');
mockedChardetDetect.mockImplementation(() => {
throw new Error('Detection failed');
});
const result = detectEncodingFromBuffer(buffer);
expect(result).toBe(null);
expect(consoleWarnSpy).toHaveBeenCalledWith(
'Failed to detect encoding with chardet:',
expect.any(Error),
);
});
it('should return null when chardet returns null', () => {
const buffer = Buffer.from('test content', 'utf8');
mockedChardetDetect.mockReturnValue(null);
const result = detectEncodingFromBuffer(buffer);
expect(result).toBe(null);
});
it('should return null when chardet returns non-string', () => {
const buffer = Buffer.from('test content', 'utf8');
mockedChardetDetect.mockReturnValue([
'utf-8',
'iso-8859-1',
] as unknown as string);
const result = detectEncodingFromBuffer(buffer);
expect(result).toBe(null);
});
});
describe('getSystemEncoding - Windows', () => {
beforeEach(() => {
mockedOsPlatform.mockReturnValue('win32');
});
it('should parse Windows chcp output correctly', () => {
mockedExecSync.mockReturnValue('Active code page: 65001');
const result = getSystemEncoding();
expect(result).toBe('utf-8');
expect(mockedExecSync).toHaveBeenCalledWith('chcp', { encoding: 'utf8' });
});
it('should handle different chcp output formats', () => {
mockedExecSync.mockReturnValue('Current code page: 1252');
const result = getSystemEncoding();
expect(result).toBe('windows-1252');
});
it('should handle chcp output with extra whitespace', () => {
mockedExecSync.mockReturnValue('Active code page: 437 ');
const result = getSystemEncoding();
expect(result).toBe('cp437');
});
it('should return null when chcp command fails', () => {
mockedExecSync.mockImplementation(() => {
throw new Error('Command failed');
});
const result = getSystemEncoding();
expect(result).toBe(null);
expect(consoleWarnSpy).toHaveBeenCalledWith(
expect.stringContaining(
"Failed to get Windows code page using 'chcp' command",
),
);
});
it('should return null when chcp output cannot be parsed', () => {
mockedExecSync.mockReturnValue('Unexpected output format');
const result = getSystemEncoding();
expect(result).toBe(null);
expect(consoleWarnSpy).toHaveBeenCalledWith(
expect.stringContaining(
"Failed to get Windows code page using 'chcp' command",
),
);
});
it('should return null when code page is not a number', () => {
mockedExecSync.mockReturnValue('Active code page: abc');
const result = getSystemEncoding();
expect(result).toBe(null);
expect(consoleWarnSpy).toHaveBeenCalledWith(
expect.stringContaining(
"Failed to get Windows code page using 'chcp' command",
),
);
});
it('should return null when code page maps to null', () => {
mockedExecSync.mockReturnValue('Active code page: 99999');
const result = getSystemEncoding();
expect(result).toBe(null);
// Should warn about unknown code page from windowsCodePageToEncoding
expect(consoleWarnSpy).toHaveBeenCalledWith(
'Unable to determine encoding for windows code page 99999.',
);
});
});
describe('getSystemEncoding - Unix-like', () => {
beforeEach(() => {
mockedOsPlatform.mockReturnValue('linux');
});
it('should parse locale from LC_ALL environment variable', () => {
process.env.LC_ALL = 'en_US.UTF-8';
const result = getSystemEncoding();
expect(result).toBe('utf-8');
});
it('should parse locale from LC_CTYPE when LC_ALL is not set', () => {
process.env.LC_CTYPE = 'fr_FR.ISO-8859-1';
const result = getSystemEncoding();
expect(result).toBe('iso-8859-1');
});
it('should parse locale from LANG when LC_ALL and LC_CTYPE are not set', () => {
process.env.LANG = 'de_DE.UTF-8';
const result = getSystemEncoding();
expect(result).toBe('utf-8');
});
it('should handle locale charmap command when environment variables are empty', () => {
mockedExecSync.mockReturnValue('UTF-8\n');
const result = getSystemEncoding();
expect(result).toBe('utf-8');
expect(mockedExecSync).toHaveBeenCalledWith('locale charmap', {
encoding: 'utf8',
});
});
it('should handle locale charmap with mixed case', () => {
mockedExecSync.mockReturnValue('ISO-8859-1\n');
const result = getSystemEncoding();
expect(result).toBe('iso-8859-1');
});
it('should return null when locale charmap fails', () => {
mockedExecSync.mockImplementation(() => {
throw new Error('Command failed');
});
const result = getSystemEncoding();
expect(result).toBe(null);
expect(consoleWarnSpy).toHaveBeenCalledWith(
'Failed to get locale charmap.',
);
});
it('should handle locale without encoding (no dot)', () => {
process.env.LANG = 'C';
const result = getSystemEncoding();
expect(result).toBe('c');
});
it('should handle empty locale environment variables', () => {
process.env.LC_ALL = '';
process.env.LC_CTYPE = '';
process.env.LANG = '';
mockedExecSync.mockReturnValue('UTF-8');
const result = getSystemEncoding();
expect(result).toBe('utf-8');
});
it('should return locale as-is when locale format has no dot', () => {
process.env.LANG = 'invalid_format';
const result = getSystemEncoding();
expect(result).toBe('invalid_format');
});
it('should prioritize LC_ALL over other environment variables', () => {
process.env.LC_ALL = 'en_US.UTF-8';
process.env.LC_CTYPE = 'fr_FR.ISO-8859-1';
process.env.LANG = 'de_DE.CP1252';
const result = getSystemEncoding();
expect(result).toBe('utf-8');
});
it('should prioritize LC_CTYPE over LANG', () => {
process.env.LC_CTYPE = 'fr_FR.ISO-8859-1';
process.env.LANG = 'de_DE.CP1252';
const result = getSystemEncoding();
expect(result).toBe('iso-8859-1');
});
});
describe('getEncodingForBuffer', () => {
beforeEach(() => {
mockedOsPlatform.mockReturnValue('linux');
});
it('should use cached system encoding on subsequent calls', () => {
process.env.LANG = 'en_US.UTF-8';
const buffer = Buffer.from('test');
// First call
const result1 = getCachedEncodingForBuffer(buffer);
expect(result1).toBe('utf-8');
// Change environment (should not affect cached result)
process.env.LANG = 'fr_FR.ISO-8859-1';
// Second call should use cached value
const result2 = getCachedEncodingForBuffer(buffer);
expect(result2).toBe('utf-8');
});
it('should fall back to buffer detection when system encoding fails', () => {
// No environment variables set
mockedExecSync.mockImplementation(() => {
throw new Error('locale command failed');
});
const buffer = Buffer.from('test');
mockedChardetDetect.mockReturnValue('ISO-8859-1');
const result = getCachedEncodingForBuffer(buffer);
expect(result).toBe('iso-8859-1');
expect(mockedChardetDetect).toHaveBeenCalledWith(buffer);
});
it('should fall back to utf-8 when both system and buffer detection fail', () => {
// System encoding fails
mockedExecSync.mockImplementation(() => {
throw new Error('locale command failed');
});
// Buffer detection fails
mockedChardetDetect.mockImplementation(() => {
throw new Error('chardet failed');
});
const buffer = Buffer.from('test');
const result = getCachedEncodingForBuffer(buffer);
expect(result).toBe('utf-8');
});
it('should not cache buffer detection results', () => {
// System encoding fails initially
mockedExecSync.mockImplementation(() => {
throw new Error('locale command failed');
});
const buffer1 = Buffer.from('test1');
const buffer2 = Buffer.from('test2');
mockedChardetDetect
.mockReturnValueOnce('ISO-8859-1')
.mockReturnValueOnce('UTF-16');
const result1 = getCachedEncodingForBuffer(buffer1);
const result2 = getCachedEncodingForBuffer(buffer2);
expect(result1).toBe('iso-8859-1');
expect(result2).toBe('utf-16');
expect(mockedChardetDetect).toHaveBeenCalledTimes(2);
});
it('should handle Windows system encoding', () => {
mockedOsPlatform.mockReturnValue('win32');
mockedExecSync.mockReturnValue('Active code page: 1252');
const buffer = Buffer.from('test');
const result = getCachedEncodingForBuffer(buffer);
expect(result).toBe('windows-1252');
});
it('should cache null system encoding result', () => {
// Reset the cache specifically for this test
resetEncodingCache();
// Ensure we're on Unix-like for this test
mockedOsPlatform.mockReturnValue('linux');
// System encoding detection returns null
mockedExecSync.mockImplementation(() => {
throw new Error('locale command failed');
});
const buffer1 = Buffer.from('test1');
const buffer2 = Buffer.from('test2');
mockedChardetDetect
.mockReturnValueOnce('ISO-8859-1')
.mockReturnValueOnce('UTF-16');
// Clear any previous calls from beforeEach setup or previous tests
mockedExecSync.mockClear();
const result1 = getCachedEncodingForBuffer(buffer1);
const result2 = getCachedEncodingForBuffer(buffer2);
// Should call execSync only once due to caching (null result is cached)
expect(mockedExecSync).toHaveBeenCalledTimes(1);
expect(result1).toBe('iso-8859-1');
expect(result2).toBe('utf-16');
// Call a third time to verify cache is still used
const buffer3 = Buffer.from('test3');
mockedChardetDetect.mockReturnValueOnce('UTF-32');
const result3 = getCachedEncodingForBuffer(buffer3);
// Still should be only one call to execSync
expect(mockedExecSync).toHaveBeenCalledTimes(1);
expect(result3).toBe('utf-32');
});
});
describe('Cross-platform behavior', () => {
it('should work correctly on macOS', () => {
mockedOsPlatform.mockReturnValue('darwin');
process.env.LANG = 'en_US.UTF-8';
const result = getSystemEncoding();
expect(result).toBe('utf-8');
});
it('should work correctly on other Unix-like systems', () => {
mockedOsPlatform.mockReturnValue('freebsd');
process.env.LANG = 'en_US.UTF-8';
const result = getSystemEncoding();
expect(result).toBe('utf-8');
});
it('should handle unknown platforms as Unix-like', () => {
mockedOsPlatform.mockReturnValue('unknown' as NodeJS.Platform);
process.env.LANG = 'en_US.UTF-8';
const result = getSystemEncoding();
expect(result).toBe('utf-8');
});
});
describe('Edge cases and error handling', () => {
it('should handle empty buffer gracefully', () => {
mockedOsPlatform.mockReturnValue('linux');
process.env.LANG = 'en_US.UTF-8';
const buffer = Buffer.alloc(0);
const result = getCachedEncodingForBuffer(buffer);
expect(result).toBe('utf-8');
});
it('should handle very large buffers', () => {
mockedOsPlatform.mockReturnValue('linux');
process.env.LANG = 'en_US.UTF-8';
const buffer = Buffer.alloc(1024 * 1024, 'a');
const result = getCachedEncodingForBuffer(buffer);
expect(result).toBe('utf-8');
});
it('should handle Unicode content', () => {
mockedOsPlatform.mockReturnValue('linux');
const unicodeText = '你好世界 🌍 ñoño';
// System encoding fails
mockedExecSync.mockImplementation(() => {
throw new Error('locale command failed');
});
mockedChardetDetect.mockReturnValue('UTF-8');
const buffer = Buffer.from(unicodeText, 'utf8');
const result = getCachedEncodingForBuffer(buffer);
expect(result).toBe('utf-8');
});
});
});
|