summaryrefslogtreecommitdiff
path: root/packages/cli/src
diff options
context:
space:
mode:
Diffstat (limited to 'packages/cli/src')
-rw-r--r--packages/cli/src/ui/components/shared/text-buffer.test.ts95
-rw-r--r--packages/cli/src/ui/components/shared/text-buffer.ts354
-rw-r--r--packages/cli/src/ui/components/shared/vim-buffer-actions.test.ts325
-rw-r--r--packages/cli/src/ui/components/shared/vim-buffer-actions.ts408
4 files changed, 856 insertions, 326 deletions
diff --git a/packages/cli/src/ui/components/shared/text-buffer.test.ts b/packages/cli/src/ui/components/shared/text-buffer.test.ts
index cbceedbc..fb75179e 100644
--- a/packages/cli/src/ui/components/shared/text-buffer.test.ts
+++ b/packages/cli/src/ui/components/shared/text-buffer.test.ts
@@ -15,7 +15,11 @@ import {
textBufferReducer,
TextBufferState,
TextBufferAction,
+ findWordEndInLine,
+ findNextWordStartInLine,
+ isWordCharStrict,
} from './text-buffer.js';
+import { cpLen } from '../../utils/textUtils.js';
const initialState: TextBufferState = {
lines: [''],
@@ -1591,3 +1595,94 @@ describe('textBufferReducer vim operations', () => {
});
});
});
+
+describe('Unicode helper functions', () => {
+ describe('findWordEndInLine with Unicode', () => {
+ it('should handle combining characters', () => {
+ // café with combining accent
+ const cafeWithCombining = 'cafe\u0301';
+ const result = findWordEndInLine(cafeWithCombining + ' test', 0);
+ expect(result).toBe(3); // End of 'café' at base character 'e', not combining accent
+ });
+
+ it('should handle precomposed characters with diacritics', () => {
+ // café with precomposed é (U+00E9)
+ const cafePrecomposed = 'café';
+ const result = findWordEndInLine(cafePrecomposed + ' test', 0);
+ expect(result).toBe(3); // End of 'café' at precomposed character 'é'
+ });
+
+ it('should return null when no word end found', () => {
+ const result = findWordEndInLine(' ', 0);
+ expect(result).toBeNull(); // No word end found in whitespace-only string string
+ });
+ });
+
+ describe('findNextWordStartInLine with Unicode', () => {
+ it('should handle right-to-left text', () => {
+ const result = findNextWordStartInLine('hello مرحبا world', 0);
+ expect(result).toBe(6); // Start of Arabic word
+ });
+
+ it('should handle Chinese characters', () => {
+ const result = findNextWordStartInLine('hello 你好 world', 0);
+ expect(result).toBe(6); // Start of Chinese word
+ });
+
+ it('should return null at end of line', () => {
+ const result = findNextWordStartInLine('hello', 10);
+ expect(result).toBeNull();
+ });
+
+ it('should handle combining characters', () => {
+ // café with combining accent + next word
+ const textWithCombining = 'cafe\u0301 test';
+ const result = findNextWordStartInLine(textWithCombining, 0);
+ expect(result).toBe(6); // Start of 'test' after 'café ' (combining char makes string longer)
+ });
+
+ it('should handle precomposed characters with diacritics', () => {
+ // café with precomposed é + next word
+ const textPrecomposed = 'café test';
+ const result = findNextWordStartInLine(textPrecomposed, 0);
+ expect(result).toBe(5); // Start of 'test' after 'café '
+ });
+ });
+
+ describe('isWordCharStrict with Unicode', () => {
+ it('should return true for ASCII word characters', () => {
+ expect(isWordCharStrict('a')).toBe(true);
+ expect(isWordCharStrict('Z')).toBe(true);
+ expect(isWordCharStrict('0')).toBe(true);
+ expect(isWordCharStrict('_')).toBe(true);
+ });
+
+ it('should return false for punctuation', () => {
+ expect(isWordCharStrict('.')).toBe(false);
+ expect(isWordCharStrict(',')).toBe(false);
+ expect(isWordCharStrict('!')).toBe(false);
+ });
+
+ it('should return true for non-Latin scripts', () => {
+ expect(isWordCharStrict('你')).toBe(true); // Chinese character
+ expect(isWordCharStrict('م')).toBe(true); // Arabic character
+ });
+
+ it('should return false for whitespace', () => {
+ expect(isWordCharStrict(' ')).toBe(false);
+ expect(isWordCharStrict('\t')).toBe(false);
+ });
+ });
+
+ describe('cpLen with Unicode', () => {
+ it('should handle combining characters', () => {
+ expect(cpLen('é')).toBe(1); // Precomposed
+ expect(cpLen('e\u0301')).toBe(2); // e + combining acute
+ });
+
+ it('should handle Chinese and Arabic text', () => {
+ expect(cpLen('hello 你好 world')).toBe(14); // 5 + 1 + 2 + 1 + 5 = 14
+ expect(cpLen('hello مرحبا world')).toBe(17);
+ });
+ });
+});
diff --git a/packages/cli/src/ui/components/shared/text-buffer.ts b/packages/cli/src/ui/components/shared/text-buffer.ts
index cf5ce889..d46e52cc 100644
--- a/packages/cli/src/ui/components/shared/text-buffer.ts
+++ b/packages/cli/src/ui/components/shared/text-buffer.ts
@@ -33,143 +33,329 @@ function isWordChar(ch: string | undefined): boolean {
return !/[\s,.;!?]/.test(ch);
}
-// Vim-specific word boundary functions
-export const findNextWordStart = (
- text: string,
- currentOffset: number,
-): number => {
- let i = currentOffset;
+// Helper functions for line-based word navigation
+export const isWordCharStrict = (char: string): boolean =>
+ /[\w\p{L}\p{N}]/u.test(char); // Matches a single character that is any Unicode letter, any Unicode number, or an underscore
+
+export const isWhitespace = (char: string): boolean => /\s/.test(char);
+
+// Check if a character is a combining mark (only diacritics for now)
+export const isCombiningMark = (char: string): boolean => /\p{M}/u.test(char);
+
+// Check if a character should be considered part of a word (including combining marks)
+export const isWordCharWithCombining = (char: string): boolean =>
+ isWordCharStrict(char) || isCombiningMark(char);
+
+// Get the script of a character (simplified for common scripts)
+export const getCharScript = (char: string): string => {
+ if (/[\p{Script=Latin}]/u.test(char)) return 'latin'; // All Latin script chars including diacritics
+ if (/[\p{Script=Han}]/u.test(char)) return 'han'; // Chinese
+ if (/[\p{Script=Arabic}]/u.test(char)) return 'arabic';
+ if (/[\p{Script=Hiragana}]/u.test(char)) return 'hiragana';
+ if (/[\p{Script=Katakana}]/u.test(char)) return 'katakana';
+ if (/[\p{Script=Cyrillic}]/u.test(char)) return 'cyrillic';
+ return 'other';
+};
- if (i >= text.length) return i;
+// Check if two characters are from different scripts (indicating word boundary)
+export const isDifferentScript = (char1: string, char2: string): boolean => {
+ if (!isWordCharStrict(char1) || !isWordCharStrict(char2)) return false;
+ return getCharScript(char1) !== getCharScript(char2);
+};
+
+// Find next word start within a line, starting from col
+export const findNextWordStartInLine = (
+ line: string,
+ col: number,
+): number | null => {
+ const chars = toCodePoints(line);
+ let i = col;
- const currentChar = text[i];
+ if (i >= chars.length) return null;
+
+ const currentChar = chars[i];
// Skip current word/sequence based on character type
- if (/\w/.test(currentChar)) {
- // Skip current word characters
- while (i < text.length && /\w/.test(text[i])) {
+ if (isWordCharStrict(currentChar)) {
+ while (i < chars.length && isWordCharWithCombining(chars[i])) {
+ // Check for script boundary - if next character is from different script, stop here
+ if (
+ i + 1 < chars.length &&
+ isWordCharStrict(chars[i + 1]) &&
+ isDifferentScript(chars[i], chars[i + 1])
+ ) {
+ i++; // Include current character
+ break; // Stop at script boundary
+ }
i++;
}
- } else if (!/\s/.test(currentChar)) {
- // Skip current non-word, non-whitespace characters (like "/", ".", etc.)
- while (i < text.length && !/\w/.test(text[i]) && !/\s/.test(text[i])) {
+ } else if (!isWhitespace(currentChar)) {
+ while (
+ i < chars.length &&
+ !isWordCharStrict(chars[i]) &&
+ !isWhitespace(chars[i])
+ ) {
i++;
}
}
// Skip whitespace
- while (i < text.length && /\s/.test(text[i])) {
+ while (i < chars.length && isWhitespace(chars[i])) {
i++;
}
- // If we reached the end of text and there's no next word,
- // vim behavior for dw is to delete to the end of the current word
- if (i >= text.length) {
- // Go back to find the end of the last word
- let endOfLastWord = text.length - 1;
- while (endOfLastWord >= 0 && /\s/.test(text[endOfLastWord])) {
- endOfLastWord--;
- }
- // For dw on last word, return position AFTER the last character to delete entire word
- return Math.max(currentOffset + 1, endOfLastWord + 1);
- }
-
- return i;
+ return i < chars.length ? i : null;
};
-export const findPrevWordStart = (
- text: string,
- currentOffset: number,
-): number => {
- let i = currentOffset;
+// Find previous word start within a line
+export const findPrevWordStartInLine = (
+ line: string,
+ col: number,
+): number | null => {
+ const chars = toCodePoints(line);
+ let i = col;
- // If at beginning of text, return current position
- if (i <= 0) {
- return currentOffset;
- }
+ if (i <= 0) return null;
- // Move back one character to start searching
i--;
// Skip whitespace moving backwards
- while (i >= 0 && (text[i] === ' ' || text[i] === '\t' || text[i] === '\n')) {
+ while (i >= 0 && isWhitespace(chars[i])) {
i--;
}
- if (i < 0) {
- return 0; // Reached beginning of text
- }
-
- const charAtI = text[i];
+ if (i < 0) return null;
- if (/\w/.test(charAtI)) {
+ if (isWordCharStrict(chars[i])) {
// We're in a word, move to its beginning
- while (i >= 0 && /\w/.test(text[i])) {
+ while (i >= 0 && isWordCharStrict(chars[i])) {
+ // Check for script boundary - if previous character is from different script, stop here
+ if (
+ i - 1 >= 0 &&
+ isWordCharStrict(chars[i - 1]) &&
+ isDifferentScript(chars[i], chars[i - 1])
+ ) {
+ return i; // Return current position at script boundary
+ }
i--;
}
- return i + 1; // Return first character of word
+ return i + 1;
} else {
// We're in punctuation, move to its beginning
- while (
- i >= 0 &&
- !/\w/.test(text[i]) &&
- text[i] !== ' ' &&
- text[i] !== '\t' &&
- text[i] !== '\n'
- ) {
+ while (i >= 0 && !isWordCharStrict(chars[i]) && !isWhitespace(chars[i])) {
i--;
}
- return i + 1; // Return first character of punctuation sequence
+ return i + 1;
}
};
-export const findWordEnd = (text: string, currentOffset: number): number => {
- let i = currentOffset;
+// Find word end within a line
+export const findWordEndInLine = (line: string, col: number): number | null => {
+ const chars = toCodePoints(line);
+ let i = col;
- // If we're already at the end of a word, advance to next word
- if (
- i < text.length &&
- /\w/.test(text[i]) &&
- (i + 1 >= text.length || !/\w/.test(text[i + 1]))
- ) {
- // We're at the end of a word, move forward to find next word
+ // If we're already at the end of a word (including punctuation sequences), advance to next word
+ // This includes both regular word endings and script boundaries
+ const atEndOfWordChar =
+ i < chars.length &&
+ isWordCharWithCombining(chars[i]) &&
+ (i + 1 >= chars.length ||
+ !isWordCharWithCombining(chars[i + 1]) ||
+ (isWordCharStrict(chars[i]) &&
+ i + 1 < chars.length &&
+ isWordCharStrict(chars[i + 1]) &&
+ isDifferentScript(chars[i], chars[i + 1])));
+
+ const atEndOfPunctuation =
+ i < chars.length &&
+ !isWordCharWithCombining(chars[i]) &&
+ !isWhitespace(chars[i]) &&
+ (i + 1 >= chars.length ||
+ isWhitespace(chars[i + 1]) ||
+ isWordCharWithCombining(chars[i + 1]));
+
+ if (atEndOfWordChar || atEndOfPunctuation) {
+ // We're at the end of a word or punctuation sequence, move forward to find next word
i++;
- // Skip whitespace/punctuation to find next word
- while (i < text.length && !/\w/.test(text[i])) {
+ // Skip whitespace to find next word or punctuation
+ while (i < chars.length && isWhitespace(chars[i])) {
i++;
}
}
- // If we're not on a word character, find the next word
- if (i < text.length && !/\w/.test(text[i])) {
- while (i < text.length && !/\w/.test(text[i])) {
+ // If we're not on a word character, find the next word or punctuation sequence
+ if (i < chars.length && !isWordCharWithCombining(chars[i])) {
+ // Skip whitespace to find next word or punctuation
+ while (i < chars.length && isWhitespace(chars[i])) {
i++;
}
}
- // Move to end of current word
- while (i < text.length && /\w/.test(text[i])) {
- i++;
+ // Move to end of current word (including combining marks, but stop at script boundaries)
+ let foundWord = false;
+ let lastBaseCharPos = -1;
+
+ if (i < chars.length && isWordCharWithCombining(chars[i])) {
+ // Handle word characters
+ while (i < chars.length && isWordCharWithCombining(chars[i])) {
+ foundWord = true;
+
+ // Track the position of the last base character (not combining mark)
+ if (isWordCharStrict(chars[i])) {
+ lastBaseCharPos = i;
+ }
+
+ // Check if next character is from a different script (word boundary)
+ if (
+ i + 1 < chars.length &&
+ isWordCharStrict(chars[i + 1]) &&
+ isDifferentScript(chars[i], chars[i + 1])
+ ) {
+ i++; // Include current character
+ if (isWordCharStrict(chars[i - 1])) {
+ lastBaseCharPos = i - 1;
+ }
+ break; // Stop at script boundary
+ }
+
+ i++;
+ }
+ } else if (i < chars.length && !isWhitespace(chars[i])) {
+ // Handle punctuation sequences (like ████)
+ while (
+ i < chars.length &&
+ !isWordCharStrict(chars[i]) &&
+ !isWhitespace(chars[i])
+ ) {
+ foundWord = true;
+ lastBaseCharPos = i;
+ i++;
+ }
}
- // Move back one to be on the last character of the word
- return Math.max(currentOffset, i - 1);
+ // Only return a position if we actually found a word
+ // Return the position of the last base character, not combining marks
+ if (foundWord && lastBaseCharPos >= col) {
+ return lastBaseCharPos;
+ }
+
+ return null;
};
-// Helper functions for vim operations
-export const getOffsetFromPosition = (
- row: number,
- col: number,
+// Find next word across lines
+export const findNextWordAcrossLines = (
lines: string[],
-): number => {
- let offset = 0;
- for (let i = 0; i < row; i++) {
- offset += lines[i].length + 1; // +1 for newline
+ cursorRow: number,
+ cursorCol: number,
+ searchForWordStart: boolean,
+): { row: number; col: number } | null => {
+ // First try current line
+ const currentLine = lines[cursorRow] || '';
+ const colInCurrentLine = searchForWordStart
+ ? findNextWordStartInLine(currentLine, cursorCol)
+ : findWordEndInLine(currentLine, cursorCol);
+
+ if (colInCurrentLine !== null) {
+ return { row: cursorRow, col: colInCurrentLine };
}
- offset += col;
- return offset;
+
+ // Search subsequent lines
+ for (let row = cursorRow + 1; row < lines.length; row++) {
+ const line = lines[row] || '';
+ const chars = toCodePoints(line);
+
+ // For empty lines, if we haven't found any words yet, return the empty line
+ if (chars.length === 0) {
+ // Check if there are any words in remaining lines
+ let hasWordsInLaterLines = false;
+ for (let laterRow = row + 1; laterRow < lines.length; laterRow++) {
+ const laterLine = lines[laterRow] || '';
+ const laterChars = toCodePoints(laterLine);
+ let firstNonWhitespace = 0;
+ while (
+ firstNonWhitespace < laterChars.length &&
+ isWhitespace(laterChars[firstNonWhitespace])
+ ) {
+ firstNonWhitespace++;
+ }
+ if (firstNonWhitespace < laterChars.length) {
+ hasWordsInLaterLines = true;
+ break;
+ }
+ }
+
+ // If no words in later lines, return the empty line
+ if (!hasWordsInLaterLines) {
+ return { row, col: 0 };
+ }
+ continue;
+ }
+
+ // Find first non-whitespace
+ let firstNonWhitespace = 0;
+ while (
+ firstNonWhitespace < chars.length &&
+ isWhitespace(chars[firstNonWhitespace])
+ ) {
+ firstNonWhitespace++;
+ }
+
+ if (firstNonWhitespace < chars.length) {
+ if (searchForWordStart) {
+ return { row, col: firstNonWhitespace };
+ } else {
+ // For word end, find the end of the first word
+ const endCol = findWordEndInLine(line, firstNonWhitespace);
+ if (endCol !== null) {
+ return { row, col: endCol };
+ }
+ }
+ }
+ }
+
+ return null;
+};
+
+// Find previous word across lines
+export const findPrevWordAcrossLines = (
+ lines: string[],
+ cursorRow: number,
+ cursorCol: number,
+): { row: number; col: number } | null => {
+ // First try current line
+ const currentLine = lines[cursorRow] || '';
+ const colInCurrentLine = findPrevWordStartInLine(currentLine, cursorCol);
+
+ if (colInCurrentLine !== null) {
+ return { row: cursorRow, col: colInCurrentLine };
+ }
+
+ // Search previous lines
+ for (let row = cursorRow - 1; row >= 0; row--) {
+ const line = lines[row] || '';
+ const chars = toCodePoints(line);
+
+ if (chars.length === 0) continue;
+
+ // Find last word start
+ let lastWordStart = chars.length;
+ while (lastWordStart > 0 && isWhitespace(chars[lastWordStart - 1])) {
+ lastWordStart--;
+ }
+
+ if (lastWordStart > 0) {
+ // Find start of this word
+ const wordStart = findPrevWordStartInLine(line, lastWordStart);
+ if (wordStart !== null) {
+ return { row, col: wordStart };
+ }
+ }
+ }
+
+ return null;
};
+// Helper functions for vim line operations
export const getPositionFromOffsets = (
startOffset: number,
endOffset: number,
diff --git a/packages/cli/src/ui/components/shared/vim-buffer-actions.test.ts b/packages/cli/src/ui/components/shared/vim-buffer-actions.test.ts
index 8f7f72ab..a07d8df1 100644
--- a/packages/cli/src/ui/components/shared/vim-buffer-actions.test.ts
+++ b/packages/cli/src/ui/components/shared/vim-buffer-actions.test.ts
@@ -140,6 +140,25 @@ describe('vim-buffer-actions', () => {
expect(result.cursorRow).toBe(1);
expect(result.cursorCol).toBe(0);
});
+
+ it('should skip over combining marks to avoid cursor disappearing', () => {
+ // Test case for combining character cursor disappearing bug
+ // "café test" where é is represented as e + combining acute accent
+ const state = createTestState(['cafe\u0301 test'], 0, 2); // Start at 'f'
+ const action = {
+ type: 'vim_move_right' as const,
+ payload: { count: 1 },
+ };
+
+ const result = handleVimAction(state, action);
+ expect(result).toHaveOnlyValidCharacters();
+ expect(result.cursorCol).toBe(3); // Should be on 'e' of 'café'
+
+ // Move right again - should skip combining mark and land on space
+ const result2 = handleVimAction(result, action);
+ expect(result2).toHaveOnlyValidCharacters();
+ expect(result2.cursorCol).toBe(5); // Should be on space after 'café'
+ });
});
describe('vim_move_up', () => {
@@ -169,7 +188,7 @@ describe('vim-buffer-actions', () => {
const result = handleVimAction(state, action);
expect(result).toHaveOnlyValidCharacters();
expect(result.cursorRow).toBe(0);
- expect(result.cursorCol).toBe(5); // End of 'short'
+ expect(result.cursorCol).toBe(4); // Last character 't' of 'short', not past it
});
});
@@ -236,6 +255,20 @@ describe('vim-buffer-actions', () => {
expect(result).toHaveOnlyValidCharacters();
expect(result.cursorCol).toBe(5); // Start of ','
});
+
+ it('should move across empty lines when starting from within a word', () => {
+ // Testing the exact scenario: cursor on 'w' of 'hello world', w should move to next line
+ const state = createTestState(['hello world', ''], 0, 6); // At 'w' of 'world'
+ const action = {
+ type: 'vim_move_word_forward' as const,
+ payload: { count: 1 },
+ };
+
+ const result = handleVimAction(state, action);
+ expect(result).toHaveOnlyValidCharacters();
+ expect(result.cursorRow).toBe(1); // Should move to empty line
+ expect(result.cursorCol).toBe(0); // Beginning of empty line
+ });
});
describe('vim_move_word_backward', () => {
@@ -288,6 +321,85 @@ describe('vim-buffer-actions', () => {
expect(result).toHaveOnlyValidCharacters();
expect(result.cursorCol).toBe(10); // End of 'world'
});
+
+ it('should move across empty lines when at word end', () => {
+ const state = createTestState(['hello world', '', 'test'], 0, 10); // At 'd' of 'world'
+ const action = {
+ type: 'vim_move_word_end' as const,
+ payload: { count: 1 },
+ };
+
+ const result = handleVimAction(state, action);
+ expect(result).toHaveOnlyValidCharacters();
+ expect(result.cursorRow).toBe(2); // Should move to line with 'test'
+ expect(result.cursorCol).toBe(3); // Should be at 't' (end of 'test')
+ });
+
+ it('should handle consecutive word-end movements across empty lines', () => {
+ // Testing the exact scenario: cursor on 'w' of world, press 'e' twice
+ const state = createTestState(['hello world', ''], 0, 6); // At 'w' of 'world'
+
+ // First 'e' should move to 'd' of 'world'
+ let result = handleVimAction(state, {
+ type: 'vim_move_word_end' as const,
+ payload: { count: 1 },
+ });
+ expect(result).toHaveOnlyValidCharacters();
+ expect(result.cursorRow).toBe(0);
+ expect(result.cursorCol).toBe(10); // At 'd' of 'world'
+
+ // Second 'e' should move to the empty line (end of file in this case)
+ result = handleVimAction(result, {
+ type: 'vim_move_word_end' as const,
+ payload: { count: 1 },
+ });
+ expect(result).toHaveOnlyValidCharacters();
+ expect(result.cursorRow).toBe(1); // Should move to empty line
+ expect(result.cursorCol).toBe(0); // Empty line has col 0
+ });
+
+ it('should handle combining characters - advance from end of base character', () => {
+ // Test case for combining character word end bug
+ // "café test" where é is represented as e + combining acute accent
+ const state = createTestState(['cafe\u0301 test'], 0, 0); // Start at 'c'
+
+ // First 'e' command should move to the 'e' (position 3)
+ let result = handleVimAction(state, {
+ type: 'vim_move_word_end' as const,
+ payload: { count: 1 },
+ });
+ expect(result).toHaveOnlyValidCharacters();
+ expect(result.cursorCol).toBe(3); // At 'e' of café
+
+ // Second 'e' command should advance to end of "test" (position 9), not stay stuck
+ result = handleVimAction(result, {
+ type: 'vim_move_word_end' as const,
+ payload: { count: 1 },
+ });
+ expect(result).toHaveOnlyValidCharacters();
+ expect(result.cursorCol).toBe(9); // At 't' of "test"
+ });
+
+ it('should handle precomposed characters with diacritics', () => {
+ // Test case with precomposed é for comparison
+ const state = createTestState(['café test'], 0, 0); // Start at 'c'
+
+ // First 'e' command should move to the 'é' (position 3)
+ let result = handleVimAction(state, {
+ type: 'vim_move_word_end' as const,
+ payload: { count: 1 },
+ });
+ expect(result).toHaveOnlyValidCharacters();
+ expect(result.cursorCol).toBe(3); // At 'é' of café
+
+ // Second 'e' command should advance to end of "test" (position 8)
+ result = handleVimAction(result, {
+ type: 'vim_move_word_end' as const,
+ payload: { count: 1 },
+ });
+ expect(result).toHaveOnlyValidCharacters();
+ expect(result.cursorCol).toBe(8); // At 't' of "test"
+ });
});
describe('Position commands', () => {
@@ -793,4 +905,215 @@ describe('vim-buffer-actions', () => {
expect(result.undoStack).toHaveLength(2); // Original plus new snapshot
});
});
+
+ describe('UTF-32 character handling in word/line operations', () => {
+ describe('Right-to-left text handling', () => {
+ it('should handle Arabic text in word movements', () => {
+ const state = createTestState(['hello مرحبا world'], 0, 0);
+
+ // Move to end of 'hello'
+ let result = handleVimAction(state, {
+ type: 'vim_move_word_end' as const,
+ payload: { count: 1 },
+ });
+ expect(result).toHaveOnlyValidCharacters();
+ expect(result.cursorCol).toBe(4); // End of 'hello'
+
+ // Move to end of Arabic word
+ result = handleVimAction(result, {
+ type: 'vim_move_word_end' as const,
+ payload: { count: 1 },
+ });
+ expect(result).toHaveOnlyValidCharacters();
+ expect(result.cursorCol).toBe(10); // End of Arabic word 'مرحبا'
+ });
+ });
+
+ describe('Chinese character handling', () => {
+ it('should handle Chinese characters in word movements', () => {
+ const state = createTestState(['hello 你好 world'], 0, 0);
+
+ // Move to end of 'hello'
+ let result = handleVimAction(state, {
+ type: 'vim_move_word_end' as const,
+ payload: { count: 1 },
+ });
+ expect(result).toHaveOnlyValidCharacters();
+ expect(result.cursorCol).toBe(4); // End of 'hello'
+
+ // Move forward to start of 'world'
+ result = handleVimAction(result, {
+ type: 'vim_move_word_forward' as const,
+ payload: { count: 1 },
+ });
+ expect(result).toHaveOnlyValidCharacters();
+ expect(result.cursorCol).toBe(6); // Start of '你好'
+ });
+ });
+
+ describe('Mixed script handling', () => {
+ it('should handle mixed Latin and non-Latin scripts with word end commands', () => {
+ const state = createTestState(['test中文test'], 0, 0);
+
+ let result = handleVimAction(state, {
+ type: 'vim_move_word_end' as const,
+ payload: { count: 1 },
+ });
+ expect(result).toHaveOnlyValidCharacters();
+ expect(result.cursorCol).toBe(3); // End of 'test'
+
+ // Second word end command should move to end of '中文'
+ result = handleVimAction(result, {
+ type: 'vim_move_word_end' as const,
+ payload: { count: 1 },
+ });
+ expect(result).toHaveOnlyValidCharacters();
+ expect(result.cursorCol).toBe(5); // End of '中文'
+ });
+
+ it('should handle mixed Latin and non-Latin scripts with word forward commands', () => {
+ const state = createTestState(['test中文test'], 0, 0);
+
+ let result = handleVimAction(state, {
+ type: 'vim_move_word_forward' as const,
+ payload: { count: 1 },
+ });
+ expect(result).toHaveOnlyValidCharacters();
+ expect(result.cursorCol).toBe(4); // Start of '中'
+
+ // Second word forward command should move to start of final 'test'
+ result = handleVimAction(result, {
+ type: 'vim_move_word_forward' as const,
+ payload: { count: 1 },
+ });
+ expect(result).toHaveOnlyValidCharacters();
+ expect(result.cursorCol).toBe(6); // Start of final 'test'
+ });
+
+ it('should handle mixed Latin and non-Latin scripts with word backward commands', () => {
+ const state = createTestState(['test中文test'], 0, 9); // Start at end of final 'test'
+
+ let result = handleVimAction(state, {
+ type: 'vim_move_word_backward' as const,
+ payload: { count: 1 },
+ });
+ expect(result).toHaveOnlyValidCharacters();
+ expect(result.cursorCol).toBe(6); // Start of final 'test'
+
+ // Second word backward command should move to start of '中文'
+ result = handleVimAction(result, {
+ type: 'vim_move_word_backward' as const,
+ payload: { count: 1 },
+ });
+ expect(result).toHaveOnlyValidCharacters();
+ expect(result.cursorCol).toBe(4); // Start of '中'
+ });
+
+ it('should handle Unicode block characters consistently with w and e commands', () => {
+ const state = createTestState(['██ █████ ██'], 0, 0);
+
+ // Test w command progression
+ let wResult = handleVimAction(state, {
+ type: 'vim_move_word_forward' as const,
+ payload: { count: 1 },
+ });
+ expect(wResult).toHaveOnlyValidCharacters();
+ expect(wResult.cursorCol).toBe(3); // Start of second block sequence
+
+ wResult = handleVimAction(wResult, {
+ type: 'vim_move_word_forward' as const,
+ payload: { count: 1 },
+ });
+ expect(wResult).toHaveOnlyValidCharacters();
+ expect(wResult.cursorCol).toBe(9); // Start of third block sequence
+
+ // Test e command progression from beginning
+ let eResult = handleVimAction(state, {
+ type: 'vim_move_word_end' as const,
+ payload: { count: 1 },
+ });
+ expect(eResult).toHaveOnlyValidCharacters();
+ expect(eResult.cursorCol).toBe(1); // End of first block sequence
+
+ eResult = handleVimAction(eResult, {
+ type: 'vim_move_word_end' as const,
+ payload: { count: 1 },
+ });
+ expect(eResult).toHaveOnlyValidCharacters();
+ expect(eResult.cursorCol).toBe(7); // End of second block sequence
+
+ eResult = handleVimAction(eResult, {
+ type: 'vim_move_word_end' as const,
+ payload: { count: 1 },
+ });
+ expect(eResult).toHaveOnlyValidCharacters();
+ expect(eResult.cursorCol).toBe(10); // End of third block sequence
+ });
+
+ it('should handle strings starting with Chinese characters', () => {
+ const state = createTestState(['中文test英文word'], 0, 0);
+
+ // Test 'w' command - when at start of non-Latin word, w moves to next word
+ let wResult = handleVimAction(state, {
+ type: 'vim_move_word_forward' as const,
+ payload: { count: 1 },
+ });
+ expect(wResult).toHaveOnlyValidCharacters();
+ expect(wResult.cursorCol).toBe(2); // Start of 'test'
+
+ wResult = handleVimAction(wResult, {
+ type: 'vim_move_word_forward' as const,
+ payload: { count: 1 },
+ });
+ expect(wResult.cursorCol).toBe(6); // Start of '英文'
+
+ // Test 'e' command
+ let eResult = handleVimAction(state, {
+ type: 'vim_move_word_end' as const,
+ payload: { count: 1 },
+ });
+ expect(eResult).toHaveOnlyValidCharacters();
+ expect(eResult.cursorCol).toBe(1); // End of 中文
+
+ eResult = handleVimAction(eResult, {
+ type: 'vim_move_word_end' as const,
+ payload: { count: 1 },
+ });
+ expect(eResult.cursorCol).toBe(5); // End of test
+ });
+
+ it('should handle strings starting with Arabic characters', () => {
+ const state = createTestState(['مرحباhelloسلام'], 0, 0);
+
+ // Test 'w' command - when at start of non-Latin word, w moves to next word
+ let wResult = handleVimAction(state, {
+ type: 'vim_move_word_forward' as const,
+ payload: { count: 1 },
+ });
+ expect(wResult).toHaveOnlyValidCharacters();
+ expect(wResult.cursorCol).toBe(5); // Start of 'hello'
+
+ wResult = handleVimAction(wResult, {
+ type: 'vim_move_word_forward' as const,
+ payload: { count: 1 },
+ });
+ expect(wResult.cursorCol).toBe(10); // Start of 'سلام'
+
+ // Test 'b' command from end
+ const bState = createTestState(['مرحباhelloسلام'], 0, 13);
+ let bResult = handleVimAction(bState, {
+ type: 'vim_move_word_backward' as const,
+ payload: { count: 1 },
+ });
+ expect(bResult).toHaveOnlyValidCharacters();
+ expect(bResult.cursorCol).toBe(10); // Start of سلام
+
+ bResult = handleVimAction(bResult, {
+ type: 'vim_move_word_backward' as const,
+ payload: { count: 1 },
+ });
+ expect(bResult.cursorCol).toBe(5); // Start of hello
+ });
+ });
+ });
});
diff --git a/packages/cli/src/ui/components/shared/vim-buffer-actions.ts b/packages/cli/src/ui/components/shared/vim-buffer-actions.ts
index ab52e991..0e2e7989 100644
--- a/packages/cli/src/ui/components/shared/vim-buffer-actions.ts
+++ b/packages/cli/src/ui/components/shared/vim-buffer-actions.ts
@@ -7,16 +7,35 @@
import {
TextBufferState,
TextBufferAction,
- findNextWordStart,
- findPrevWordStart,
- findWordEnd,
- getOffsetFromPosition,
- getPositionFromOffsets,
getLineRangeOffsets,
+ getPositionFromOffsets,
replaceRangeInternal,
pushUndo,
+ isWordCharStrict,
+ isWordCharWithCombining,
+ isCombiningMark,
+ findNextWordAcrossLines,
+ findPrevWordAcrossLines,
+ findWordEndInLine,
} from './text-buffer.js';
-import { cpLen } from '../../utils/textUtils.js';
+import { cpLen, toCodePoints } from '../../utils/textUtils.js';
+
+// Check if we're at the end of a base word (on the last base character)
+// Returns true if current position has a base character followed only by combining marks until non-word
+function isAtEndOfBaseWord(lineCodePoints: string[], col: number): boolean {
+ if (!isWordCharStrict(lineCodePoints[col])) return false;
+
+ // Look ahead to see if we have only combining marks followed by non-word
+ let i = col + 1;
+
+ // Skip any combining marks
+ while (i < lineCodePoints.length && isCombiningMark(lineCodePoints[i])) {
+ i++;
+ }
+
+ // If we hit end of line or non-word character, we were at end of base word
+ return i >= lineCodePoints.length || !isWordCharStrict(lineCodePoints[i]);
+}
export type VimAction = Extract<
TextBufferAction,
@@ -59,167 +78,38 @@ export function handleVimAction(
action: VimAction,
): TextBufferState {
const { lines, cursorRow, cursorCol } = state;
- // Cache text join to avoid repeated calculations for word operations
- let text: string | null = null;
- const getText = () => text ?? (text = lines.join('\n'));
switch (action.type) {
- case 'vim_delete_word_forward': {
- const { count } = action.payload;
- const currentOffset = getOffsetFromPosition(cursorRow, cursorCol, lines);
-
- let endOffset = currentOffset;
- let searchOffset = currentOffset;
-
- for (let i = 0; i < count; i++) {
- const nextWordOffset = findNextWordStart(getText(), searchOffset);
- if (nextWordOffset > searchOffset) {
- searchOffset = nextWordOffset;
- endOffset = nextWordOffset;
- } else {
- // If no next word, delete to end of current word
- const wordEndOffset = findWordEnd(getText(), searchOffset);
- endOffset = Math.min(wordEndOffset + 1, getText().length);
- break;
- }
- }
-
- if (endOffset > currentOffset) {
- const nextState = pushUndo(state);
- const { startRow, startCol, endRow, endCol } = getPositionFromOffsets(
- currentOffset,
- endOffset,
- nextState.lines,
- );
- return replaceRangeInternal(
- nextState,
- startRow,
- startCol,
- endRow,
- endCol,
- '',
- );
- }
- return state;
- }
-
- case 'vim_delete_word_backward': {
- const { count } = action.payload;
- const currentOffset = getOffsetFromPosition(cursorRow, cursorCol, lines);
-
- let startOffset = currentOffset;
- let searchOffset = currentOffset;
-
- for (let i = 0; i < count; i++) {
- const prevWordOffset = findPrevWordStart(getText(), searchOffset);
- if (prevWordOffset < searchOffset) {
- searchOffset = prevWordOffset;
- startOffset = prevWordOffset;
- } else {
- break;
- }
- }
-
- if (startOffset < currentOffset) {
- const nextState = pushUndo(state);
- const { startRow, startCol, endRow, endCol } = getPositionFromOffsets(
- startOffset,
- currentOffset,
- nextState.lines,
- );
- const newState = replaceRangeInternal(
- nextState,
- startRow,
- startCol,
- endRow,
- endCol,
- '',
- );
- // Cursor is already at the correct position after deletion
- return newState;
- }
- return state;
- }
-
- case 'vim_delete_word_end': {
- const { count } = action.payload;
- const currentOffset = getOffsetFromPosition(cursorRow, cursorCol, lines);
-
- let offset = currentOffset;
- let endOffset = currentOffset;
-
- for (let i = 0; i < count; i++) {
- const wordEndOffset = findWordEnd(getText(), offset);
- if (wordEndOffset >= offset) {
- endOffset = wordEndOffset + 1; // Include the character at word end
- // For next iteration, move to start of next word
- if (i < count - 1) {
- const nextWordStart = findNextWordStart(
- getText(),
- wordEndOffset + 1,
- );
- offset = nextWordStart;
- if (nextWordStart <= wordEndOffset) {
- break; // No more words
- }
- }
- } else {
- break;
- }
- }
-
- endOffset = Math.min(endOffset, getText().length);
-
- if (endOffset > currentOffset) {
- const nextState = pushUndo(state);
- const { startRow, startCol, endRow, endCol } = getPositionFromOffsets(
- currentOffset,
- endOffset,
- nextState.lines,
- );
- return replaceRangeInternal(
- nextState,
- startRow,
- startCol,
- endRow,
- endCol,
- '',
- );
- }
- return state;
- }
-
+ case 'vim_delete_word_forward':
case 'vim_change_word_forward': {
const { count } = action.payload;
- const currentOffset = getOffsetFromPosition(cursorRow, cursorCol, lines);
-
- let searchOffset = currentOffset;
- let endOffset = currentOffset;
+ let endRow = cursorRow;
+ let endCol = cursorCol;
for (let i = 0; i < count; i++) {
- const nextWordOffset = findNextWordStart(getText(), searchOffset);
- if (nextWordOffset > searchOffset) {
- searchOffset = nextWordOffset;
- endOffset = nextWordOffset;
+ const nextWord = findNextWordAcrossLines(lines, endRow, endCol, true);
+ if (nextWord) {
+ endRow = nextWord.row;
+ endCol = nextWord.col;
} else {
- // If no next word, change to end of current word
- const wordEndOffset = findWordEnd(getText(), searchOffset);
- endOffset = Math.min(wordEndOffset + 1, getText().length);
+ // No more words, delete/change to end of current word or line
+ const currentLine = lines[endRow] || '';
+ const wordEnd = findWordEndInLine(currentLine, endCol);
+ if (wordEnd !== null) {
+ endCol = wordEnd + 1; // Include the character at word end
+ } else {
+ endCol = cpLen(currentLine);
+ }
break;
}
}
- if (endOffset > currentOffset) {
+ if (endRow !== cursorRow || endCol !== cursorCol) {
const nextState = pushUndo(state);
- const { startRow, startCol, endRow, endCol } = getPositionFromOffsets(
- currentOffset,
- endOffset,
- nextState.lines,
- );
return replaceRangeInternal(
nextState,
- startRow,
- startCol,
+ cursorRow,
+ cursorCol,
endRow,
endCol,
'',
@@ -228,61 +118,61 @@ export function handleVimAction(
return state;
}
+ case 'vim_delete_word_backward':
case 'vim_change_word_backward': {
const { count } = action.payload;
- const currentOffset = getOffsetFromPosition(cursorRow, cursorCol, lines);
-
- let startOffset = currentOffset;
- let searchOffset = currentOffset;
+ let startRow = cursorRow;
+ let startCol = cursorCol;
for (let i = 0; i < count; i++) {
- const prevWordOffset = findPrevWordStart(getText(), searchOffset);
- if (prevWordOffset < searchOffset) {
- searchOffset = prevWordOffset;
- startOffset = prevWordOffset;
+ const prevWord = findPrevWordAcrossLines(lines, startRow, startCol);
+ if (prevWord) {
+ startRow = prevWord.row;
+ startCol = prevWord.col;
} else {
break;
}
}
- if (startOffset < currentOffset) {
+ if (startRow !== cursorRow || startCol !== cursorCol) {
const nextState = pushUndo(state);
- const { startRow, startCol, endRow, endCol } = getPositionFromOffsets(
- startOffset,
- currentOffset,
- nextState.lines,
- );
return replaceRangeInternal(
nextState,
startRow,
startCol,
- endRow,
- endCol,
+ cursorRow,
+ cursorCol,
'',
);
}
return state;
}
+ case 'vim_delete_word_end':
case 'vim_change_word_end': {
const { count } = action.payload;
- const currentOffset = getOffsetFromPosition(cursorRow, cursorCol, lines);
-
- let offset = currentOffset;
- let endOffset = currentOffset;
+ let row = cursorRow;
+ let col = cursorCol;
+ let endRow = cursorRow;
+ let endCol = cursorCol;
for (let i = 0; i < count; i++) {
- const wordEndOffset = findWordEnd(getText(), offset);
- if (wordEndOffset >= offset) {
- endOffset = wordEndOffset + 1; // Include the character at word end
+ const wordEnd = findNextWordAcrossLines(lines, row, col, false);
+ if (wordEnd) {
+ endRow = wordEnd.row;
+ endCol = wordEnd.col + 1; // Include the character at word end
// For next iteration, move to start of next word
if (i < count - 1) {
- const nextWordStart = findNextWordStart(
- getText(),
- wordEndOffset + 1,
+ const nextWord = findNextWordAcrossLines(
+ lines,
+ wordEnd.row,
+ wordEnd.col + 1,
+ true,
);
- offset = nextWordStart;
- if (nextWordStart <= wordEndOffset) {
+ if (nextWord) {
+ row = nextWord.row;
+ col = nextWord.col;
+ } else {
break; // No more words
}
}
@@ -291,19 +181,18 @@ export function handleVimAction(
}
}
- endOffset = Math.min(endOffset, getText().length);
+ // Ensure we don't go past the end of the last line
+ if (endRow < lines.length) {
+ const lineLen = cpLen(lines[endRow] || '');
+ endCol = Math.min(endCol, lineLen);
+ }
- if (endOffset !== currentOffset) {
+ if (endRow !== cursorRow || endCol !== cursorCol) {
const nextState = pushUndo(state);
- const { startRow, startCol, endRow, endCol } = getPositionFromOffsets(
- Math.min(currentOffset, endOffset),
- Math.max(currentOffset, endOffset),
- nextState.lines,
- );
return replaceRangeInternal(
nextState,
- startRow,
- startCol,
+ cursorRow,
+ cursorCol,
endRow,
endCol,
'',
@@ -376,32 +265,17 @@ export function handleVimAction(
);
}
- case 'vim_delete_to_end_of_line': {
- const currentLine = lines[cursorRow] || '';
- if (cursorCol < currentLine.length) {
- const nextState = pushUndo(state);
- return replaceRangeInternal(
- nextState,
- cursorRow,
- cursorCol,
- cursorRow,
- currentLine.length,
- '',
- );
- }
- return state;
- }
-
+ case 'vim_delete_to_end_of_line':
case 'vim_change_to_end_of_line': {
const currentLine = lines[cursorRow] || '';
- if (cursorCol < currentLine.length) {
+ if (cursorCol < cpLen(currentLine)) {
const nextState = pushUndo(state);
return replaceRangeInternal(
nextState,
cursorRow,
cursorCol,
cursorRow,
- currentLine.length,
+ cpLen(currentLine),
'',
);
}
@@ -578,6 +452,16 @@ export function handleVimAction(
}
} else if (newCol < lineLength - 1) {
newCol++;
+
+ // Skip over combining marks - don't let cursor land on them
+ const currentLinePoints = toCodePoints(currentLine);
+ while (
+ newCol < currentLinePoints.length &&
+ isCombiningMark(currentLinePoints[newCol]) &&
+ newCol < lineLength - 1
+ ) {
+ newCol++;
+ }
} else if (newRow < lines.length - 1) {
// At end of line - move to beginning of next line
newRow++;
@@ -597,7 +481,12 @@ export function handleVimAction(
const { count } = action.payload;
const { cursorRow, cursorCol, lines } = state;
const newRow = Math.max(0, cursorRow - count);
- const newCol = Math.min(cursorCol, cpLen(lines[newRow] || ''));
+ const targetLine = lines[newRow] || '';
+ const targetLineLength = cpLen(targetLine);
+ const newCol = Math.min(
+ cursorCol,
+ targetLineLength > 0 ? targetLineLength - 1 : 0,
+ );
return {
...state,
@@ -611,7 +500,12 @@ export function handleVimAction(
const { count } = action.payload;
const { cursorRow, cursorCol, lines } = state;
const newRow = Math.min(lines.length - 1, cursorRow + count);
- const newCol = Math.min(cursorCol, cpLen(lines[newRow] || ''));
+ const targetLine = lines[newRow] || '';
+ const targetLineLength = cpLen(targetLine);
+ const newCol = Math.min(
+ cursorCol,
+ targetLineLength > 0 ? targetLineLength - 1 : 0,
+ );
return {
...state,
@@ -623,69 +517,101 @@ export function handleVimAction(
case 'vim_move_word_forward': {
const { count } = action.payload;
- let offset = getOffsetFromPosition(cursorRow, cursorCol, lines);
+ let row = cursorRow;
+ let col = cursorCol;
for (let i = 0; i < count; i++) {
- const nextWordOffset = findNextWordStart(getText(), offset);
- if (nextWordOffset > offset) {
- offset = nextWordOffset;
+ const nextWord = findNextWordAcrossLines(lines, row, col, true);
+ if (nextWord) {
+ row = nextWord.row;
+ col = nextWord.col;
} else {
// No more words to move to
break;
}
}
- const { startRow, startCol } = getPositionFromOffsets(
- offset,
- offset,
- lines,
- );
return {
...state,
- cursorRow: startRow,
- cursorCol: startCol,
+ cursorRow: row,
+ cursorCol: col,
preferredCol: null,
};
}
case 'vim_move_word_backward': {
const { count } = action.payload;
- let offset = getOffsetFromPosition(cursorRow, cursorCol, lines);
+ let row = cursorRow;
+ let col = cursorCol;
for (let i = 0; i < count; i++) {
- offset = findPrevWordStart(getText(), offset);
+ const prevWord = findPrevWordAcrossLines(lines, row, col);
+ if (prevWord) {
+ row = prevWord.row;
+ col = prevWord.col;
+ } else {
+ break;
+ }
}
- const { startRow, startCol } = getPositionFromOffsets(
- offset,
- offset,
- lines,
- );
return {
...state,
- cursorRow: startRow,
- cursorCol: startCol,
+ cursorRow: row,
+ cursorCol: col,
preferredCol: null,
};
}
case 'vim_move_word_end': {
const { count } = action.payload;
- let offset = getOffsetFromPosition(cursorRow, cursorCol, lines);
+ let row = cursorRow;
+ let col = cursorCol;
for (let i = 0; i < count; i++) {
- offset = findWordEnd(getText(), offset);
+ // Special handling for the first iteration when we're at end of word
+ if (i === 0) {
+ const currentLine = lines[row] || '';
+ const lineCodePoints = toCodePoints(currentLine);
+
+ // Check if we're at the end of a word (on the last base character)
+ const atEndOfWord =
+ col < lineCodePoints.length &&
+ isWordCharStrict(lineCodePoints[col]) &&
+ (col + 1 >= lineCodePoints.length ||
+ !isWordCharWithCombining(lineCodePoints[col + 1]) ||
+ // Or if we're on a base char followed only by combining marks until non-word
+ (isWordCharStrict(lineCodePoints[col]) &&
+ isAtEndOfBaseWord(lineCodePoints, col)));
+
+ if (atEndOfWord) {
+ // We're already at end of word, find next word end
+ const nextWord = findNextWordAcrossLines(
+ lines,
+ row,
+ col + 1,
+ false,
+ );
+ if (nextWord) {
+ row = nextWord.row;
+ col = nextWord.col;
+ continue;
+ }
+ }
+ }
+
+ const wordEnd = findNextWordAcrossLines(lines, row, col, false);
+ if (wordEnd) {
+ row = wordEnd.row;
+ col = wordEnd.col;
+ } else {
+ break;
+ }
}
- const { startRow, startCol } = getPositionFromOffsets(
- offset,
- offset,
- lines,
- );
return {
...state,
- cursorRow: startRow,
- cursorCol: startCol,
+ cursorRow: row,
+ cursorCol: col,
preferredCol: null,
};
}
@@ -783,7 +709,7 @@ export function handleVimAction(
let col = 0;
// Find first non-whitespace character using proper Unicode handling
- const lineCodePoints = [...currentLine]; // Proper Unicode iteration
+ const lineCodePoints = toCodePoints(currentLine);
while (col < lineCodePoints.length && /\s/.test(lineCodePoints[col])) {
col++;
}
@@ -820,7 +746,7 @@ export function handleVimAction(
let col = 0;
// Find first non-whitespace character using proper Unicode handling
- const lineCodePoints = [...currentLine]; // Proper Unicode iteration
+ const lineCodePoints = toCodePoints(currentLine);
while (col < lineCodePoints.length && /\s/.test(lineCodePoints[col])) {
col++;
}