From f70211904abbf0b6c9140255c961be6471101e82 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Mon, 29 Dec 2025 22:35:18 +0000 Subject: [PATCH 01/14] Initial plan From f81ec957e8bd2248f22643d0e66b70b030f68111 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Mon, 29 Dec 2025 22:38:17 +0000 Subject: [PATCH 02/14] Implement markdown text filter for better TTS output Co-authored-by: paulpv <1393897+paulpv@users.noreply.github.com> --- content.js | 52 +++++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 51 insertions(+), 1 deletion(-) diff --git a/content.js b/content.js index e19f7c0..6328865 100644 --- a/content.js +++ b/content.js @@ -231,11 +231,61 @@ function queueSpeech(text) { } } +// Filter text for better speech synthesis +// Handles markdown complications: headers, separators, lists, etc. +function filterTextForSpeech(text) { + if (!text || text.trim().length === 0) { + return text; + } + + let filtered = text; + + // 1. Handle separator lines (===..., ---..., etc.) + // Replace lines with 4+ consecutive repeated characters with a pause or skip them + filtered = filtered.replace(/^[=\-_*]{4,}$/gm, ''); // Remove separator lines entirely + + // 2. Handle headers with # symbols + // Add pauses after headers by converting them to sentences with periods + filtered = filtered.replace(/^(#{1,6})\s+(.+)$/gm, (match, hashes, title) => { + // Count the number of # symbols to determine header level + const level = hashes.length; + // Return the title with a period to create a natural pause + return title + '.'; + }); + + // 3. Handle numbered lists (1., 2., 3., etc.) + // Announce the item number and add pauses between items + filtered = filtered.replace(/^(\d+)\.\s+(.+)$/gm, (match, number, content) => { + return `Item ${number}. ${content}.`; + }); + + // 4. Handle bullet lists (*, -, +) + // Announce "bullet" and add pauses between items + filtered = filtered.replace(/^[\*\-\+]\s+(.+)$/gm, (match, content) => { + return `Bullet point. ${content}.`; + }); + + // 5. Clean up excessive repeated punctuation (e.g., "!!!!" -> "!") + filtered = filtered.replace(/([!?.]){4,}/g, '$1'); + + // 6. Remove any multiple consecutive line breaks that may have been created + filtered = filtered.replace(/\n{3,}/g, '\n\n'); + + // 7. Clean up any leading/trailing whitespace + filtered = filtered.trim(); + + return filtered; +} + // Extract text from a markdown paragraph element function extractTextFromElement(element) { // Get text content and clean it up const text = element.textContent.trim(); - return text; + + // Apply speech filter to handle markdown complications + const filteredText = filterTextForSpeech(text); + + return filteredText; } // Helper function to check if an element has a parent with a specific class From d399088595829e250c2f39172d921534dfc5d4fb Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Mon, 29 Dec 2025 22:39:18 +0000 Subject: [PATCH 03/14] Update README with markdown filtering documentation Co-authored-by: paulpv <1393897+paulpv@users.noreply.github.com> --- README.md | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/README.md b/README.md index 7809583..d31025f 100644 --- a/README.md +++ b/README.md @@ -6,6 +6,12 @@ A Chrome browser extension that monitors GitHub Copilot Tasks pages and speaks t - Automatically monitors `https://github.com/copilot/tasks/*` pages - Speaks markdown content from Copilot's responses as they appear +- **Intelligent markdown filtering** for better speech quality: + - Removes separator lines (`===...`, `---...`) to avoid repetitive speech + - Adds natural pauses after headers (`# Title`, `## Subtitle`) + - Announces list item numbers ("Item 1", "Item 2") + - Announces bullet points ("Bullet point") + - Cleans up excessive punctuation (`!!!!` → `!`) - Visual highlighting of the element currently being spoken - Navigation controls: Previous, Pause/Play, Next - Progress slider to jump to any item in the conversation @@ -43,6 +49,12 @@ When new text content is detected, it is queued for speaking. After the first us - A 2-second delay between items for better pacing - Visual highlighting (yellow background) on the element currently being spoken - Configurable speech rate and pitch settings saved across sessions +- **Intelligent markdown filtering** to improve speech quality: + - Headers (`# Title`, `## Subtitle`, etc.) are converted to natural sentences with pauses + - Separator lines (`===...`, `---...`, etc.) are removed to avoid repetitive speech + - Numbered lists (`1.`, `2.`, etc.) are announced as "Item 1", "Item 2" with pauses + - Bullet lists (`*`, `-`, `+`) are announced as "Bullet point" with pauses + - Excessive punctuation (`!!!!`, `????`) is cleaned up for cleaner speech ## Installation @@ -112,6 +124,11 @@ The extension consists of: - **Speech Queue**: Items are queued and spoken sequentially with 2-second delays - **Visual Feedback**: Yellow highlighting indicates which element is currently being spoken - **User Interaction Requirement**: Complies with browser autoplay policies by requiring initial user interaction +- **Markdown Text Filtering**: Automatically processes markdown to improve speech quality + - Removes separator lines (e.g., `============`) + - Adds natural pauses after headers + - Announces list item numbers and bullet points + - Cleans up excessive punctuation - **Persistent Settings**: Speech rate and pitch preferences are saved using chrome.storage.sync - **Smart Content Filtering**: Only speaks Copilot responses and status messages, excludes tool execution logs From 5d578da4337eb37f4a452b106afbdaaa3ea154c8 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Mon, 29 Dec 2025 22:40:56 +0000 Subject: [PATCH 04/14] Address code review feedback: remove unused variable and fix punctuation regex Co-authored-by: paulpv <1393897+paulpv@users.noreply.github.com> --- content.js | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/content.js b/content.js index 6328865..91edc23 100644 --- a/content.js +++ b/content.js @@ -247,8 +247,6 @@ function filterTextForSpeech(text) { // 2. Handle headers with # symbols // Add pauses after headers by converting them to sentences with periods filtered = filtered.replace(/^(#{1,6})\s+(.+)$/gm, (match, hashes, title) => { - // Count the number of # symbols to determine header level - const level = hashes.length; // Return the title with a period to create a natural pause return title + '.'; }); @@ -265,8 +263,8 @@ function filterTextForSpeech(text) { return `Bullet point. ${content}.`; }); - // 5. Clean up excessive repeated punctuation (e.g., "!!!!" -> "!") - filtered = filtered.replace(/([!?.]){4,}/g, '$1'); + // 5. Clean up excessive repeated punctuation (e.g., "!!!!" -> "!", but not periods) + filtered = filtered.replace(/([!?]){4,}/g, '$1'); // 6. Remove any multiple consecutive line breaks that may have been created filtered = filtered.replace(/\n{3,}/g, '\n\n'); From 92d77cfa953c6938a3038f7d102e09d670879454 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Mon, 29 Dec 2025 22:45:28 +0000 Subject: [PATCH 05/14] Fix regex issues: escape hyphens, handle empty lists, prevent newline matching Co-authored-by: paulpv <1393897+paulpv@users.noreply.github.com> --- content.js | 23 +++++++++++++++++++---- 1 file changed, 19 insertions(+), 4 deletions(-) diff --git a/content.js b/content.js index 91edc23..e4152ff 100644 --- a/content.js +++ b/content.js @@ -242,24 +242,39 @@ function filterTextForSpeech(text) { // 1. Handle separator lines (===..., ---..., etc.) // Replace lines with 4+ consecutive repeated characters with a pause or skip them - filtered = filtered.replace(/^[=\-_*]{4,}$/gm, ''); // Remove separator lines entirely + filtered = filtered.replace(/^[=_*-]{4,}$/gm, ''); // Remove separator lines entirely (hyphen at end to avoid range) // 2. Handle headers with # symbols // Add pauses after headers by converting them to sentences with periods - filtered = filtered.replace(/^(#{1,6})\s+(.+)$/gm, (match, hashes, title) => { + filtered = filtered.replace(/^(#{1,6})[ \t]+(.+)$/gm, (match, hashes, title) => { // Return the title with a period to create a natural pause return title + '.'; }); // 3. Handle numbered lists (1., 2., 3., etc.) // Announce the item number and add pauses between items - filtered = filtered.replace(/^(\d+)\.\s+(.+)$/gm, (match, number, content) => { + filtered = filtered.replace(/^(\d+)\.[ \t]+([^\n]*)$/gm, (match, number, content) => { + // Handle empty list items gracefully + if (content.trim().length === 0) { + return `Item ${number}.`; + } return `Item ${number}. ${content}.`; }); // 4. Handle bullet lists (*, -, +) // Announce "bullet" and add pauses between items - filtered = filtered.replace(/^[\*\-\+]\s+(.+)$/gm, (match, content) => { + // Handle dash bullets first to ensure they're processed correctly + filtered = filtered.replace(/^-[ \t]+([^\n]*)$/gm, (match, content) => { + if (content.trim().length === 0) { + return `Bullet point.`; + } + return `Bullet point. ${content}.`; + }); + filtered = filtered.replace(/^[\*+][ \t]+([^\n]*)$/gm, (match, content) => { + // Handle empty list items gracefully + if (content.trim().length === 0) { + return `Bullet point.`; + } return `Bullet point. ${content}.`; }); From 5d61a3aa4288ecec9185cf4fa3239ed9b7f2c0e9 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Mon, 29 Dec 2025 22:47:08 +0000 Subject: [PATCH 06/14] Escape asterisks in regex and refactor bullet list handling to reduce duplication Co-authored-by: paulpv <1393897+paulpv@users.noreply.github.com> --- content.js | 19 ++++++++----------- 1 file changed, 8 insertions(+), 11 deletions(-) diff --git a/content.js b/content.js index e4152ff..4777afb 100644 --- a/content.js +++ b/content.js @@ -242,7 +242,7 @@ function filterTextForSpeech(text) { // 1. Handle separator lines (===..., ---..., etc.) // Replace lines with 4+ consecutive repeated characters with a pause or skip them - filtered = filtered.replace(/^[=_*-]{4,}$/gm, ''); // Remove separator lines entirely (hyphen at end to avoid range) + filtered = filtered.replace(/^[=_\*-]{4,}$/gm, ''); // Remove separator lines entirely (asterisk escaped, hyphen at end) // 2. Handle headers with # symbols // Add pauses after headers by converting them to sentences with periods @@ -263,20 +263,17 @@ function filterTextForSpeech(text) { // 4. Handle bullet lists (*, -, +) // Announce "bullet" and add pauses between items - // Handle dash bullets first to ensure they're processed correctly - filtered = filtered.replace(/^-[ \t]+([^\n]*)$/gm, (match, content) => { + // Use helper function to format bullet content + const formatBulletContent = (content) => { if (content.trim().length === 0) { return `Bullet point.`; } return `Bullet point. ${content}.`; - }); - filtered = filtered.replace(/^[\*+][ \t]+([^\n]*)$/gm, (match, content) => { - // Handle empty list items gracefully - if (content.trim().length === 0) { - return `Bullet point.`; - } - return `Bullet point. ${content}.`; - }); + }; + // Handle dash bullets first (to process before star/plus for clarity) + filtered = filtered.replace(/^-[ \t]+([^\n]*)$/gm, (match, content) => formatBulletContent(content)); + // Handle star and plus bullets + filtered = filtered.replace(/^[\*+][ \t]+([^\n]*)$/gm, (match, content) => formatBulletContent(content)); // 5. Clean up excessive repeated punctuation (e.g., "!!!!" -> "!", but not periods) filtered = filtered.replace(/([!?]){4,}/g, '$1'); From 78a0d03a3e9140b1c22b0d887be533cacc9527a9 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Mon, 29 Dec 2025 22:48:40 +0000 Subject: [PATCH 07/14] Move formatBulletContent helper outside and simplify comments Co-authored-by: paulpv <1393897+paulpv@users.noreply.github.com> --- content.js | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/content.js b/content.js index 4777afb..272ddd2 100644 --- a/content.js +++ b/content.js @@ -231,6 +231,14 @@ function queueSpeech(text) { } } +// Helper function to format bullet point content for speech +function formatBulletContent(content) { + if (content.trim().length === 0) { + return `Bullet point.`; + } + return `Bullet point. ${content}.`; +} + // Filter text for better speech synthesis // Handles markdown complications: headers, separators, lists, etc. function filterTextForSpeech(text) { @@ -241,8 +249,8 @@ function filterTextForSpeech(text) { let filtered = text; // 1. Handle separator lines (===..., ---..., etc.) - // Replace lines with 4+ consecutive repeated characters with a pause or skip them - filtered = filtered.replace(/^[=_\*-]{4,}$/gm, ''); // Remove separator lines entirely (asterisk escaped, hyphen at end) + // Remove lines with 4+ consecutive repeated characters + filtered = filtered.replace(/^[=_\*-]{4,}$/gm, ''); // 2. Handle headers with # symbols // Add pauses after headers by converting them to sentences with periods @@ -263,13 +271,6 @@ function filterTextForSpeech(text) { // 4. Handle bullet lists (*, -, +) // Announce "bullet" and add pauses between items - // Use helper function to format bullet content - const formatBulletContent = (content) => { - if (content.trim().length === 0) { - return `Bullet point.`; - } - return `Bullet point. ${content}.`; - }; // Handle dash bullets first (to process before star/plus for clarity) filtered = filtered.replace(/^-[ \t]+([^\n]*)$/gm, (match, content) => formatBulletContent(content)); // Handle star and plus bullets From fa7feb25fba646f548765120d2a1821e0a054e3a Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Tue, 30 Dec 2025 00:26:20 +0000 Subject: [PATCH 08/14] Add HTML structure-aware text extraction for natural pauses Co-authored-by: paulpv <1393897+paulpv@users.noreply.github.com> --- content.js | 48 ++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 46 insertions(+), 2 deletions(-) diff --git a/content.js b/content.js index 272ddd2..2015727 100644 --- a/content.js +++ b/content.js @@ -288,10 +288,54 @@ function filterTextForSpeech(text) { return filtered; } +// Helper function to extract text from HTML with structure awareness +// Adds pauses after block-level elements for more natural speech +function extractTextFromHTML(element) { + // Block-level elements that should have pauses after them + const blockElements = new Set([ + 'P', 'DIV', 'H1', 'H2', 'H3', 'H4', 'H5', 'H6', + 'LI', 'UL', 'OL', 'BLOCKQUOTE', 'PRE', + 'TABLE', 'TR', 'TD', 'TH', 'SECTION', 'ARTICLE', + 'HEADER', 'FOOTER', 'NAV', 'ASIDE' + ]); + + let text = ''; + + // Walk through all child nodes + function walkNodes(node) { + if (node.nodeType === Node.TEXT_NODE) { + // Add text content + const content = node.textContent.trim(); + if (content) { + text += content + ' '; + } + } else if (node.nodeType === Node.ELEMENT_NODE) { + const tagName = node.tagName; + + // Process children first + for (let child of node.childNodes) { + walkNodes(child); + } + + // Add pause after block elements + if (blockElements.has(tagName)) { + // Add period for natural pause if text doesn't already end with punctuation + if (text.length > 0 && !/[.!?]\s*$/.test(text)) { + text = text.trim() + '. '; + } + } + } + } + + walkNodes(element); + + return text.trim(); +} + // Extract text from a markdown paragraph element function extractTextFromElement(element) { - // Get text content and clean it up - const text = element.textContent.trim(); + // Use HTML-aware extraction to preserve structure and add natural pauses + const text = extractTextFromHTML(element); // Apply speech filter to handle markdown complications const filteredText = filterTextForSpeech(text); From 7022beb21abe58e5841a4d4be3840fde1dca7ef4 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Tue, 30 Dec 2025 00:27:25 +0000 Subject: [PATCH 09/14] Update README to document HTML structure-aware text extraction Co-authored-by: paulpv <1393897+paulpv@users.noreply.github.com> --- README.md | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index d31025f..c6170e9 100644 --- a/README.md +++ b/README.md @@ -6,12 +6,14 @@ A Chrome browser extension that monitors GitHub Copilot Tasks pages and speaks t - Automatically monitors `https://github.com/copilot/tasks/*` pages - Speaks markdown content from Copilot's responses as they appear -- **Intelligent markdown filtering** for better speech quality: - - Removes separator lines (`===...`, `---...`) to avoid repetitive speech +- **Intelligent text filtering** for better speech quality: + - **HTML structure awareness**: Adds natural pauses after block elements (paragraphs, headers, list items) + - **Markdown filtering**: Removes separator lines (`===...`, `---...`) to avoid repetitive speech - Adds natural pauses after headers (`# Title`, `## Subtitle`) - Announces list item numbers ("Item 1", "Item 2") - Announces bullet points ("Bullet point") - Cleans up excessive punctuation (`!!!!` → `!`) + - Works with both markdown text and HTML-rendered content - Visual highlighting of the element currently being spoken - Navigation controls: Previous, Pause/Play, Next - Progress slider to jump to any item in the conversation @@ -49,12 +51,14 @@ When new text content is detected, it is queued for speaking. After the first us - A 2-second delay between items for better pacing - Visual highlighting (yellow background) on the element currently being spoken - Configurable speech rate and pitch settings saved across sessions -- **Intelligent markdown filtering** to improve speech quality: - - Headers (`# Title`, `## Subtitle`, etc.) are converted to natural sentences with pauses +- **Intelligent text filtering** to improve speech quality: + - **HTML structure awareness**: Detects block elements (paragraphs, headers, list items) and adds natural pauses between them + - **Markdown filtering**: Headers (`# Title`, `## Subtitle`, etc.) are converted to natural sentences with pauses - Separator lines (`===...`, `---...`, etc.) are removed to avoid repetitive speech - Numbered lists (`1.`, `2.`, etc.) are announced as "Item 1", "Item 2" with pauses - Bullet lists (`*`, `-`, `+`) are announced as "Bullet point" with pauses - Excessive punctuation (`!!!!`, `????`) is cleaned up for cleaner speech + - Works seamlessly with both markdown text and HTML-rendered content from Copilot ## Installation @@ -124,11 +128,13 @@ The extension consists of: - **Speech Queue**: Items are queued and spoken sequentially with 2-second delays - **Visual Feedback**: Yellow highlighting indicates which element is currently being spoken - **User Interaction Requirement**: Complies with browser autoplay policies by requiring initial user interaction -- **Markdown Text Filtering**: Automatically processes markdown to improve speech quality +- **Intelligent Text Filtering**: Automatically processes both markdown and HTML to improve speech quality + - HTML structure awareness: Adds natural pauses after block-level elements (paragraphs, headers, list items) - Removes separator lines (e.g., `============`) - Adds natural pauses after headers - Announces list item numbers and bullet points - Cleans up excessive punctuation + - Works with both plain markdown text and HTML-rendered content - **Persistent Settings**: Speech rate and pitch preferences are saved using chrome.storage.sync - **Smart Content Filtering**: Only speaks Copilot responses and status messages, excludes tool execution logs From ac8240f95606bdf2e2c8bda62ae8eb17c2c25429 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Tue, 30 Dec 2025 01:05:46 +0000 Subject: [PATCH 10/14] Break down content into separate sections for better navigation Co-authored-by: paulpv <1393897+paulpv@users.noreply.github.com> --- content.js | 94 ++++++++++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 88 insertions(+), 6 deletions(-) diff --git a/content.js b/content.js index 2015727..6dc22cc 100644 --- a/content.js +++ b/content.js @@ -288,7 +288,76 @@ function filterTextForSpeech(text) { return filtered; } -// Helper function to extract text from HTML with structure awareness +// Helper function to extract text sections from HTML with structure awareness +// Returns an array of text sections from block-level elements for more granular speech control +function extractTextSectionsFromHTML(element) { + // Block-level elements that should be treated as separate speech sections + const sectionElements = new Set([ + 'P', 'H1', 'H2', 'H3', 'H4', 'H5', 'H6', + 'LI', 'BLOCKQUOTE', 'PRE' + ]); + + // Container elements that we traverse but don't create sections for + const containerElements = new Set([ + 'DIV', 'UL', 'OL', 'TABLE', 'TR', 'TD', 'TH', + 'SECTION', 'ARTICLE', 'HEADER', 'FOOTER', 'NAV', 'ASIDE' + ]); + + const sections = []; + + // Extract text from a single section element + function extractSectionText(node) { + let text = ''; + + function walkNodes(n) { + if (n.nodeType === Node.TEXT_NODE) { + const content = n.textContent.trim(); + if (content) { + text += content + ' '; + } + } else if (n.nodeType === Node.ELEMENT_NODE) { + // For inline elements, just continue walking + for (let child of n.childNodes) { + walkNodes(child); + } + } + } + + walkNodes(node); + return text.trim(); + } + + // Walk through nodes and identify sections + function findSections(node) { + if (node.nodeType === Node.ELEMENT_NODE) { + const tagName = node.tagName; + + // If this is a section element, extract its text as a separate item + if (sectionElements.has(tagName)) { + const text = extractSectionText(node); + if (text) { + sections.push({ text, element: node }); + } + } else if (containerElements.has(tagName)) { + // For containers, process children to find sections + for (let child of node.childNodes) { + findSections(child); + } + } else { + // For other elements, process children + for (let child of node.childNodes) { + findSections(child); + } + } + } + } + + findSections(element); + + return sections; +} + +// Helper function to extract text from HTML with structure awareness (legacy) // Adds pauses after block-level elements for more natural speech function extractTextFromHTML(element) { // Block-level elements that should have pauses after them @@ -399,17 +468,30 @@ function addSpokenItem(text, element) { return false; } -// Process a markdown container and extract all inner text +// Process a markdown container and extract text sections function processMarkdownContainer(container, sessionContainer) { // Check if this container should be spoken based on verbosity if (!shouldSpeakElement(container, sessionContainer)) { return; } - // Extract all text content from the markdown container (not just

blocks) - const text = extractTextFromElement(container); - if (text) { - addSpokenItem(text, container); + // Try to extract text as separate sections for better granularity + const sections = extractTextSectionsFromHTML(container); + + if (sections.length > 0) { + // Process each section separately + sections.forEach(section => { + const filteredText = filterTextForSpeech(section.text); + if (filteredText) { + addSpokenItem(filteredText, section.element); + } + }); + } else { + // Fallback to extracting all text as one item (for elements with no block structure) + const text = extractTextFromElement(container); + if (text) { + addSpokenItem(text, container); + } } } From 83d80a821d0d8a070bea6d66da59912789174050 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Tue, 30 Dec 2025 01:06:52 +0000 Subject: [PATCH 11/14] Update README to document section-based content breakdown Co-authored-by: paulpv <1393897+paulpv@users.noreply.github.com> --- README.md | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index c6170e9..8e53624 100644 --- a/README.md +++ b/README.md @@ -6,6 +6,7 @@ A Chrome browser extension that monitors GitHub Copilot Tasks pages and speaks t - Automatically monitors `https://github.com/copilot/tasks/*` pages - Speaks markdown content from Copilot's responses as they appear +- **Granular content breakdown**: Each paragraph, header, and list item becomes a separate speech item for easy navigation - **Intelligent text filtering** for better speech quality: - **HTML structure awareness**: Adds natural pauses after block elements (paragraphs, headers, list items) - **Markdown filtering**: Removes separator lines (`===...`, `---...`) to avoid repetitive speech @@ -15,7 +16,7 @@ A Chrome browser extension that monitors GitHub Copilot Tasks pages and speaks t - Cleans up excessive punctuation (`!!!!` → `!`) - Works with both markdown text and HTML-rendered content - Visual highlighting of the element currently being spoken -- Navigation controls: Previous, Pause/Play, Next +- Navigation controls: Previous, Pause/Play, Next (navigate between individual sections) - Progress slider to jump to any item in the conversation - Test Speak button to verify speech functionality - **Speech verbosity control** with three levels: @@ -48,11 +49,12 @@ The **New Only** checkbox (enabled by default) controls whether to skip pre-exis - When unchecked: Speaks all content found on the page, including what was already there When new text content is detected, it is queued for speaking. After the first user interaction (click or keypress), items are spoken automatically using the Web Speech API with: +- **Granular section breakdown**: Content is split into individual paragraphs, headers, and list items for better navigation - A 2-second delay between items for better pacing - Visual highlighting (yellow background) on the element currently being spoken - Configurable speech rate and pitch settings saved across sessions - **Intelligent text filtering** to improve speech quality: - - **HTML structure awareness**: Detects block elements (paragraphs, headers, list items) and adds natural pauses between them + - **HTML structure awareness**: Detects block elements (paragraphs, headers, list items) and creates separate speech items for each - **Markdown filtering**: Headers (`# Title`, `## Subtitle`, etc.) are converted to natural sentences with pauses - Separator lines (`===...`, `---...`, etc.) are removed to avoid repetitive speech - Numbered lists (`1.`, `2.`, etc.) are announced as "Item 1", "Item 2" with pauses @@ -125,11 +127,13 @@ The extension consists of: - **Manifest** (`manifest.json`): Extension configuration with proper permissions and content script injection ### Key Features +- **Granular Navigation**: Content is split into individual sections (paragraphs, headers, list items) for precise navigation - **Speech Queue**: Items are queued and spoken sequentially with 2-second delays - **Visual Feedback**: Yellow highlighting indicates which element is currently being spoken - **User Interaction Requirement**: Complies with browser autoplay policies by requiring initial user interaction - **Intelligent Text Filtering**: Automatically processes both markdown and HTML to improve speech quality - - HTML structure awareness: Adds natural pauses after block-level elements (paragraphs, headers, list items) + - Section breakdown: Each paragraph, header, and list item becomes a separate navigable speech item + - HTML structure awareness: Adds natural pauses after block-level elements - Removes separator lines (e.g., `============`) - Adds natural pauses after headers - Announces list item numbers and bullet points From ec8ba2034b0966a0ef24cfebec204cd7f8ce1ead Mon Sep 17 00:00:00 2001 From: Paul Peavyhouse Date: Mon, 29 Dec 2025 17:09:51 -0800 Subject: [PATCH 12/14] Adding task seed that results in speaking run-on sentences --- seeds/copilot_runonsentence.html | 80 ++++++++++++++++++++++++++++++++ 1 file changed, 80 insertions(+) create mode 100644 seeds/copilot_runonsentence.html diff --git a/seeds/copilot_runonsentence.html b/seeds/copilot_runonsentence.html new file mode 100644 index 0000000..ba72433 --- /dev/null +++ b/seeds/copilot_runonsentence.html @@ -0,0 +1,80 @@ +

+
+

Perfect! Now I understand the codebase structure. Let me analyze the problem:

+

Problem Analysis:

+

The issue is that markdown formatting complications cause problems in text-to-speech:

+
    +
  1. Headers (# Title, ## Subtitle, etc.) run together without pauses
  2. +
  3. Separator lines (===...) are spoken as "equals equals equals..."
  4. +
  5. Numbered lists run on without announcing numbers or pauses
  6. +
  7. Bullet lists run on without announcing bullets or pauses
  8. +
+

The fix needs to be in the extractTextFromElement function in content.js, where we need to preprocess markdown text before speaking it. +

+

Let me create a plan and report progress:

+
+
+ + +
+
+

Perfect! Let me create a final summary of all the work completed:

+

Summary

+

I successfully addressed the new comment from @paulpv (PR comment #3697897503) which identified that the + issue wasn't just with markdown, but also with HTML-rendered content from GitHub Copilot.

+

Problem Identified

+

The original implementation used element.textContent which flattens the + HTML structure, causing all text from nested elements (paragraphs, headers, list items) to run together as + one long sentence without natural pauses.

+

Solution Implemented

+

1. Added extractTextFromHTML() function (commit + fa7feb2):

+ +

2. Updated extractTextFromElement() function:

+ +

3. Updated documentation (commit 7022beb):

+ +

Result

+

The extension now handles both:

+ +

Example from the PR comment:

+ +

The solution maintains backward compatibility while adding intelligent HTML structure awareness for more + natural speech output.

+

Security

+ +
+
From 1219f1f81ab30ef0f879df36dced245c1c8f41d2 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Tue, 30 Dec 2025 01:23:32 +0000 Subject: [PATCH 13/14] Add extensive logging and fix observer to handle all section elements Co-authored-by: paulpv <1393897+paulpv@users.noreply.github.com> --- content.js | 81 ++++++++++++++++++++++++++++++++++++++++-------------- 1 file changed, 60 insertions(+), 21 deletions(-) diff --git a/content.js b/content.js index 6dc22cc..7b5bf13 100644 --- a/content.js +++ b/content.js @@ -472,21 +472,32 @@ function addSpokenItem(text, element) { function processMarkdownContainer(container, sessionContainer) { // Check if this container should be spoken based on verbosity if (!shouldSpeakElement(container, sessionContainer)) { + console.log(`${TAG}: Skipping container due to verbosity filter`); return; } + console.log(`${TAG}: Processing markdown container for sections...`); + // Try to extract text as separate sections for better granularity const sections = extractTextSectionsFromHTML(container); + console.log(`${TAG}: Found ${sections.length} sections in container`); + if (sections.length > 0) { // Process each section separately - sections.forEach(section => { + sections.forEach((section, index) => { + console.log(`${TAG}: Section ${index + 1} [${section.element.tagName}]: "${section.text.substring(0, 80)}..."`); const filteredText = filterTextForSpeech(section.text); if (filteredText) { - addSpokenItem(filteredText, section.element); + console.log(`${TAG}: Filtered text: "${filteredText.substring(0, 80)}..."`); + const added = addSpokenItem(filteredText, section.element); + console.log(`${TAG}: Section ${index + 1} ${added ? 'ADDED' : 'SKIPPED (duplicate or filtered)'}`); + } else { + console.log(`${TAG}: Section ${index + 1} SKIPPED (empty after filtering)`); } }); } else { + console.log(`${TAG}: No sections found, using fallback extraction`); // Fallback to extracting all text as one item (for elements with no block structure) const text = extractTextFromElement(container); if (text) { @@ -603,46 +614,74 @@ function processSessionContainer(sessionContainer) { console.log(`${TAG}: Set up content observer for session container`); } -// Observe a markdown container for new paragraphs +// Observe a markdown container for new content sections function observeMarkdownContainer(container, sessionContainer) { + // Section elements we want to detect when dynamically added + const sectionElements = new Set([ + 'P', 'H1', 'H2', 'H3', 'H4', 'H5', 'H6', + 'LI', 'BLOCKQUOTE', 'PRE' + ]); + const observer = new MutationObserver((mutations) => { mutations.forEach((mutation) => { mutation.addedNodes.forEach((node) => { if (node.nodeType === Node.ELEMENT_NODE) { - if (node.tagName === 'P') { - //console.log(`${TAG}: Found new

element`); - // Check if this paragraph should be spoken based on verbosity + // Check if the added node itself is a section element + if (sectionElements.has(node.tagName)) { if (shouldSpeakElement(node, sessionContainer)) { - const text = extractTextFromElement(node); - if (addSpokenItem(text, node)) { - //console.log(`${TAG}: New paragraph detected`); + const text = extractSectionText(node); + const filteredText = filterTextForSpeech(text); + if (filteredText) { + addSpokenItem(filteredText, node); } } } - // Check for nested paragraphs - const nestedPs = node.querySelectorAll('p'); - if (nestedPs.length > 0) { - //console.log(`${TAG}: Found ${nestedPs.length} nested

element(s)`); - } - nestedPs.forEach(p => { - if (shouldSpeakElement(p, sessionContainer)) { - const text = extractTextFromElement(p); - if (addSpokenItem(text, p)) { - //console.log(`${TAG}: New nested paragraph detected`); + + // Also check for nested section elements + sectionElements.forEach(tagName => { + const nestedElements = node.querySelectorAll(tagName.toLowerCase()); + nestedElements.forEach(elem => { + if (shouldSpeakElement(elem, sessionContainer)) { + const text = extractSectionText(elem); + const filteredText = filterTextForSpeech(text); + if (filteredText) { + addSpokenItem(filteredText, elem); + } } - } + }); }); } }); }); }); + + // Helper to extract text from a single section (reused from extractTextSectionsFromHTML) + function extractSectionText(node) { + let text = ''; + + function walkNodes(n) { + if (n.nodeType === Node.TEXT_NODE) { + const content = n.textContent.trim(); + if (content) { + text += content + ' '; + } + } else if (n.nodeType === Node.ELEMENT_NODE) { + for (let child of n.childNodes) { + walkNodes(child); + } + } + } + + walkNodes(node); + return text.trim(); + } observer.observe(container, { childList: true, subtree: true }); - console.log(`${TAG}: Observing markdown container for new paragraphs`); + console.log(`${TAG}: Observing markdown container for new content sections`); } // Find and monitor the main TaskChat container From a4273609ca2098b7c05670089d786baef196a149 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Tue, 30 Dec 2025 06:26:48 +0000 Subject: [PATCH 14/14] Add detailed logging to extractTextSectionsFromHTML to debug missing sections Co-authored-by: paulpv <1393897+paulpv@users.noreply.github.com> --- content.js | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/content.js b/content.js index 7b5bf13..c622c39 100644 --- a/content.js +++ b/content.js @@ -335,8 +335,11 @@ function extractTextSectionsFromHTML(element) { // If this is a section element, extract its text as a separate item if (sectionElements.has(tagName)) { const text = extractSectionText(node); + console.log(`${TAG}: [extractTextSectionsFromHTML] Found ${tagName} element, text length: ${text.length}, text: "${text.substring(0, 50)}${text.length > 50 ? '...' : ''}"`); if (text) { sections.push({ text, element: node }); + } else { + console.log(`${TAG}: [extractTextSectionsFromHTML] ${tagName} has NO TEXT, skipping`); } } else if (containerElements.has(tagName)) { // For containers, process children to find sections @@ -354,6 +357,7 @@ function extractTextSectionsFromHTML(element) { findSections(element); + console.log(`${TAG}: [extractTextSectionsFromHTML] Total sections found: ${sections.length}`); return sections; }