-
Notifications
You must be signed in to change notification settings - Fork 32
Performance optimizations and testing improvements #40
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Open
leodutra
wants to merge
8
commits into
andrewrk:master
Choose a base branch
from
leodutra:improvements
base: master
Could not load branches
Branch not found: {{ refName }}
Loading
Could not load tags
Nothing to show
Loading
Are you sure you want to change the base?
Some commits from the old base branch may be removed from the timeline,
and old review comments may become outdated.
Open
Changes from all commits
Commits
Show all changes
8 commits
Select commit
Hold shift + click to select a range
fc04cfc
chore: add check coverage script for diacritics characters
leodutra dead314
feat: add benchmark script and implement performance testing for diac…
leodutra 8810590
Optimize regex pattern for 10-30% performance improvement
leodutra 77afdc9
feat: enhance test coverage for diacritics removal and module exports
leodutra c1cbe4c
feat: add tests for diacritics removal with mixed content and non-Lat…
leodutra 079270f
feat: enhance diacritics pattern with comprehensive Unicode ranges fo…
leodutra a435ccb
feat: rename character coverage analysis for diacritics with Unicode …
leodutra e3069f2
feat: optimize performance by refactoring time measurement and enhanc…
leodutra File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,84 @@ | ||
| const diacritics = require("./index.js"); | ||
|
|
||
| function getAllCoveredCharacters(replacementList) { | ||
| const allChars = new Set(); | ||
| replacementList.forEach((item) => { | ||
| for (let char of item.chars) { | ||
| allChars.add(char); | ||
| } | ||
| }); | ||
| return allChars; | ||
| } | ||
|
|
||
| function getLatinUnicodeRanges() { | ||
| return [ | ||
| { name: "Latin-1 Supplement", start: 0x0080, end: 0x00ff }, | ||
| { name: "Latin Extended-A", start: 0x0100, end: 0x017f }, | ||
| { name: "Latin Extended-B", start: 0x0180, end: 0x024f }, | ||
| { name: "Latin Extended Additional", start: 0x1e00, end: 0x1eff }, | ||
| { name: "Latin Extended-C", start: 0x2c60, end: 0x2c7f }, | ||
| { name: "Latin Extended-D", start: 0xa720, end: 0xa7ff }, | ||
| ]; | ||
| } | ||
|
|
||
| function isLetter(char) { | ||
| return /\p{Letter}/u.test(char); | ||
| } | ||
|
|
||
| function analyzeRangeCoverage(range, coveredChars) { | ||
| let total = 0; | ||
| let covered = 0; | ||
| let missing = []; | ||
|
|
||
| for (let code = range.start; code <= range.end; code++) { | ||
| const char = String.fromCharCode(code); | ||
| if (isLetter(char)) { | ||
| total++; | ||
| if (coveredChars.has(char)) { | ||
| covered++; | ||
| } else { | ||
| missing.push( | ||
| `U+${code.toString(16).toUpperCase().padStart(4, "0")} (${char})` | ||
| ); | ||
| } | ||
| } | ||
| } | ||
|
|
||
| return { total, covered, missing }; | ||
| } | ||
|
|
||
| function formatMissingCharacters(missing) { | ||
| if (missing.length === 0) { | ||
| return null; | ||
| } | ||
|
|
||
| if (missing.length <= 10) { | ||
| return missing.join(", "); | ||
| } | ||
|
|
||
| return `${missing.slice(0, 5).join(", ")} ... and ${missing.length - 5} more`; | ||
| } | ||
|
|
||
| function displayRangeCoverage(range, analysis) { | ||
| const percentage = Math.round((analysis.covered / analysis.total) * 100); | ||
| console.log(`\n${range.name}: ${analysis.covered}/${analysis.total} (${percentage}%)`); | ||
|
|
||
| const missingFormatted = formatMissingCharacters(analysis.missing); | ||
| if (missingFormatted) { | ||
| console.log("Missing:", missingFormatted); | ||
| } | ||
| } | ||
|
|
||
| function checkCoverage() { | ||
| const allChars = getAllCoveredCharacters(diacritics.replacementList); | ||
| console.log("Total characters covered:", allChars.size); | ||
|
|
||
| const latinRanges = getLatinUnicodeRanges(); | ||
|
|
||
| latinRanges.forEach((range) => { | ||
| const analysis = analyzeRangeCoverage(range, allChars); | ||
| displayRangeCoverage(range, analysis); | ||
| }); | ||
| } | ||
|
|
||
| checkCoverage(); |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,235 @@ | ||
| const { remove } = require('./index.js'); | ||
|
|
||
|
|
||
| const logHeader = (title, newLine = false) => { | ||
| logSeparator(newLine); | ||
| console.log(title); | ||
| logSeparator(); | ||
| } | ||
|
|
||
| const logSeparator = (newLine = false) => { | ||
| console.log( | ||
| (newLine ? '\n' : '') + '='.repeat(70) | ||
| ) | ||
| }; | ||
|
|
||
| const WARMUP_ITERATIONS = 1000; | ||
|
|
||
| function nanoToMilli(nano) { | ||
| return Number(nano) / 1_000_000; | ||
| } | ||
|
|
||
| function warmupFunction(fn, iterations = WARMUP_ITERATIONS) { | ||
| for (let i = 0; i < iterations; i++) { | ||
| fn(); | ||
| } | ||
| } | ||
|
|
||
| function measurePerformance(fn, iterations) { | ||
| const start = process.hrtime.bigint(); | ||
| for (let i = 0; i < iterations; i++) { | ||
| fn(); | ||
| } | ||
| const end = process.hrtime.bigint(); | ||
| return nanoToMilli(end - start); | ||
| } | ||
|
|
||
| function calculateMetrics(totalTime, iterations) { | ||
| const avgTime = totalTime / iterations; | ||
| const opsPerSecond = Math.round(1000 / avgTime); | ||
| return { totalTime, avgTime, opsPerSecond }; | ||
| } | ||
|
|
||
| function displayResults({ totalTime, avgTime, opsPerSecond }) { | ||
| console.log(`Total time: ${totalTime.toFixed(2)}ms`); | ||
| console.log(`Average time per operation: ${avgTime.toFixed(6)}ms`); | ||
| console.log(`Operations per second: ${opsPerSecond.toLocaleString()}`); | ||
| } | ||
|
|
||
| function benchmark(name, fn, iterations = 50000) { | ||
| logHeader(name, true); | ||
|
|
||
| warmupFunction(fn); | ||
| const totalTime = measurePerformance(fn, iterations); | ||
| const metrics = calculateMetrics(totalTime, iterations); | ||
|
|
||
| displayResults(metrics); | ||
| return metrics; | ||
| } | ||
|
|
||
| const createLongParagraph = () => ` | ||
| Łorem ipsum dolor sit amet, cōnsectetuer adipīscing elit. | ||
| Maecenās porttitor congue massa. Fusce posuere, magna sed | ||
| pulvinar ultricies, purus lectus malesuada libero, sit amet | ||
| commodo magna eros quis urna. Nunc viverra imperdiet enim. | ||
| Fusce est. Vivamus a tellus. Pellentesque habitant morbi | ||
| tristique senectus et netus et malesuada fames ac turpis | ||
| egestas. Proin pharetra nonummy pede. Mauris et orci. | ||
| Aenean nec lorem. In porttitor. Donec laoreet nonummy augue. | ||
| `.replace(/\s+/g, ' ').trim(); | ||
|
|
||
| const createVeryLongText = () => ` | ||
| Ñoñó, eñ el año mil ñovecieñtos ñoveñta y ñueve, eñ el pueblo | ||
| de Añañuca, vivía uña ñiña llamada Begoña. Begoña teñía uñ | ||
| sueño: coñvertirse eñ la mejor diseñadora de España. Cada | ||
| mañaña se levaña tempraño y se poñía a dibujar coñ mucho | ||
| empeño. Su papá, doñ Toño, y su mamá, doña Coñcepció, la | ||
| apoyadaƅ eñ todo. Uñ día, mieñtras camińaba por el señdero | ||
| del cañó, eñcoñtró uñas piedras muy extrañas coñ símƅolos | ||
| aǹtiguos grabados. Estas piedras teńíaň poderes mágicos que | ||
| podíaň hacer realidad cualquier sueño. Begoña tomó las piedras | ||
| y pidió su deseo coñ mucha fe. Al día siguieñte, recibió uña | ||
| carta de uña uñiversidad prestigiosa de París que la iñvitaba | ||
| a estudiar diseño. Así fue como Begoña cumplió su sueño gracias | ||
| a su dedicació y a la magia de aqellas piedras eñcañtadas. | ||
| `.repeat(10).replace(/\s+/g, ' ').trim(); | ||
|
|
||
| const shortStrings = { | ||
| simple: "café", | ||
| basicAccents: "résumé naïve" | ||
| }; | ||
|
|
||
| const mediumStrings = { | ||
| sentence: "The quick brown fox jumps over the lazy dog with café and résumé", | ||
| international: "Iлtèrnåtïonɑlíƶatï߀ԉ", | ||
| mixed: "Zürich München Köln François José María" | ||
| }; | ||
|
|
||
| const longStrings = { | ||
| paragraph: createLongParagraph(), | ||
| longText: createVeryLongText() | ||
| }; | ||
|
|
||
| const edgeCases = { | ||
| empty: "", | ||
| noAccents: "Hello World 123", | ||
| onlyAccents: "àáâãäåæçèéêëìíîïñòóôõöøùúûüý", | ||
| numbers: "123 456 789", | ||
| specialChars: "!@#$%^&*()_+-=[]{}|;':\",./<>?" | ||
| }; | ||
|
|
||
| const unicodeEdgeCases = { | ||
| emoji: "Hello 👋 world 🌍 with café ☕", | ||
| mixedScript: "Hello мир café 世界 السلام עולם" | ||
| }; | ||
|
|
||
| const realWorldExamples = { | ||
| names: "José María García-González François Müller Søren Østergård", | ||
| cities: "São Paulo München Zürich Kraków Москва", | ||
| words: "naïve résumé fiancé café piñata jalapeño" | ||
| }; | ||
|
|
||
| const testData = { | ||
| ...shortStrings, | ||
| ...mediumStrings, | ||
| ...longStrings, | ||
| ...edgeCases, | ||
| ...unicodeEdgeCases, | ||
| ...realWorldExamples | ||
| }; | ||
|
|
||
| const DENSITY_TEST_ITERATIONS = 75000; | ||
| const MEMORY_TEST_ITERATIONS = 10000; | ||
| const TOP_PERFORMERS_COUNT = 5; | ||
| const SAMPLE_KEYS = ['simple', 'international', 'mixed', 'cities']; | ||
| const MAX_DISPLAY_LENGTH = 50; | ||
| const TRUNCATE_LENGTH = 47; | ||
|
|
||
| const densityTests = { | ||
| '0% accents': 'Hello World Test String', | ||
| '25% accents': 'Héllo Wórld Tést Stríng', | ||
| '50% accents': 'Héllö Wórld Tést Strïng', | ||
| '75% accents': 'Héllö Wörlđ Tést Strïñg', | ||
| '100% accents': 'Héllö Wörlđ Tést Strïñğ' | ||
| }; | ||
|
|
||
| function runMainBenchmarks() { | ||
| const results = []; | ||
| Object.entries(testData).forEach(([name, text]) => { | ||
| const description = `${name} (${text.length} chars)`; | ||
| const result = benchmark(description, () => remove(text)); | ||
| results.push({ name, length: text.length, ...result }); | ||
| }); | ||
| return results; | ||
| } | ||
|
|
||
| function runDensityBenchmarks() { | ||
| Object.entries(densityTests).forEach(([density, text]) => { | ||
| benchmark(density, () => remove(text), DENSITY_TEST_ITERATIONS); | ||
| }); | ||
| } | ||
|
|
||
| function runMemoryTest() { | ||
| const memBefore = process.memoryUsage(); | ||
| for (let i = 0; i < MEMORY_TEST_ITERATIONS; i++) { | ||
| remove(testData.longText); | ||
| } | ||
| const memAfter = process.memoryUsage(); | ||
| return { memBefore, memAfter }; | ||
| } | ||
|
|
||
| function displayMemoryUsage({ memBefore, memAfter }) { | ||
| const rssDiff = (memAfter.rss - memBefore.rss) / 1024 / 1024; | ||
| const heapDiff = (memAfter.heapUsed - memBefore.heapUsed) / 1024 / 1024; | ||
| console.log(`RSS: ${rssDiff.toFixed(2)} MB`); | ||
| console.log(`Heap Used: ${heapDiff.toFixed(2)} MB`); | ||
| } | ||
|
|
||
| function getTopPerformers(results) { | ||
| return results.sort((a, b) => b.opsPerSecond - a.opsPerSecond); | ||
| } | ||
|
|
||
| function displayTopPerformers(sortedResults) { | ||
| console.log('\nTop performers by ops/second:'); | ||
| sortedResults.slice(0, TOP_PERFORMERS_COUNT).forEach((result, i) => { | ||
| console.log(`${i + 1}. ${result.name}: ${result.opsPerSecond.toLocaleString()} ops/sec`); | ||
| }); | ||
| } | ||
|
|
||
| function truncateText(text, maxLength = MAX_DISPLAY_LENGTH, truncateAt = TRUNCATE_LENGTH) { | ||
| return text.length > maxLength ? text.substring(0, truncateAt) + '...' : text; | ||
| } | ||
|
|
||
| function displaySampleOutputs() { | ||
| console.log('\nSample outputs:'); | ||
| SAMPLE_KEYS.forEach(key => { | ||
| if (testData[key]) { | ||
| const input = truncateText(testData[key]); | ||
| const output = remove(testData[key]); | ||
| const outputDisplay = truncateText(output); | ||
| console.log(`${key}: "${input}" → "${outputDisplay}"`); | ||
| } | ||
| }); | ||
| } | ||
|
|
||
| function getTotalMappableCharacters() { | ||
| const { replacementList } = require('./index.js'); | ||
| return replacementList.reduce((sum, item) => sum + item.chars.length, 0); | ||
| } | ||
|
|
||
| function displayCharacterInfo() { | ||
| const totalChars = getTotalMappableCharacters(); | ||
| console.log(`Total mappable characters: ${totalChars}`); | ||
| } | ||
|
|
||
| function runBenchmarkSuite() { | ||
| logHeader('DIACRITICS REMOVAL BENCHMARK'); | ||
| const results = runMainBenchmarks(); | ||
|
|
||
| logHeader('PERFORMANCE BY ACCENT DENSITY', true); | ||
| runDensityBenchmarks(); | ||
|
|
||
| logHeader('MEMORY USAGE TEST', true); | ||
| const memoryResults = runMemoryTest(); | ||
| displayMemoryUsage(memoryResults); | ||
|
|
||
| logHeader('PERFORMANCE SUMMARY', true); | ||
| displayCharacterInfo(); | ||
| const sortedResults = getTopPerformers(results); | ||
| displayTopPerformers(sortedResults); | ||
| displaySampleOutputs(); | ||
|
|
||
| logHeader('BENCHMARK COMPLETE', true); | ||
| } | ||
|
|
||
| runBenchmarkSuite(); | ||
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
Uh oh!
There was an error while loading. Please reload this page.