From 12103c9f103bf43cb11053c5023dba0c39e1da97 Mon Sep 17 00:00:00 2001 From: Lars Vogel Date: Wed, 15 Apr 2026 18:58:42 +0200 Subject: [PATCH 1/6] Add renderer performance tracing and performance analysis doc Add RendererPerfTracer utility for lightweight CSV-based performance tracing of 14 identified hotspots in SWT renderers. Tracing is always active in this debug build. Output goes to ~/renderer-perf-trace.csv (configurable via -Declipse.renderer.perf.trace.file=). Usage: 1. Build the IDE with this commit 2. Launch the IDE normally 3. Use the IDE for a while (open/close editors, switch perspectives, use menus, resize sashes, switch tabs, save files) 4. Close the IDE 5. Analyze ~/renderer-perf-trace.csv CSV format: timestamp_ms,hotspot_id,duration_ns,detail Instrumented hotspots (matching docs/performance.md): - H01: findElements for CSS_ACTIVE_CLASS on every activation - H02: findElements for placeholders on label/item changes - H03: unbatched updateWidget/requestLayout in ToolBarManagerRenderer - H04: ALL_SELECTOR update on every dirty flag change - H05: ToolItemUpdater linear scan with ArrayList - H06: unbatched scheduleManagerUpdate in MenuManagerRenderer - H07: per-item EclipseContext creation in menu show - H09: findElements for MPartStack on window activate/deactivate - H10: showTab without setRedraw batching - H11: limbo reparenting without setRedraw batching - H12: synchCTFState recursive tree walk from getUIContainer - H13: full model scan at app startup in ToolControlRenderer - H14: uncoalesced RunAndTrack callbacks - W1: synchronous layout(true,true) on Windows (SWT bug 558392) --- .../workbench/renderers/swt/AreaRenderer.java | 5 + .../renderers/swt/MenuManagerRenderer.java | 3 + .../swt/MenuManagerRendererFilter.java | 8 + .../swt/PerspectiveStackRenderer.java | 5 + .../renderers/swt/RendererPerfTracer.java | 142 ++++++++++++++++++ .../workbench/renderers/swt/SashRenderer.java | 4 + .../renderers/swt/StackRenderer.java | 20 +++ .../renderers/swt/ToolBarManagerRenderer.java | 11 ++ .../renderers/swt/ToolControlRenderer.java | 5 + .../renderers/swt/ToolItemUpdater.java | 7 + .../workbench/renderers/swt/WBWRenderer.java | 6 + docs/performance.md | 131 ++++++++++++++++ 12 files changed, 347 insertions(+) create mode 100644 bundles/org.eclipse.e4.ui.workbench.renderers.swt/src/org/eclipse/e4/ui/workbench/renderers/swt/RendererPerfTracer.java create mode 100644 docs/performance.md diff --git a/bundles/org.eclipse.e4.ui.workbench.renderers.swt/src/org/eclipse/e4/ui/workbench/renderers/swt/AreaRenderer.java b/bundles/org.eclipse.e4.ui.workbench.renderers.swt/src/org/eclipse/e4/ui/workbench/renderers/swt/AreaRenderer.java index b1e032b4375..f3154422892 100644 --- a/bundles/org.eclipse.e4.ui.workbench.renderers.swt/src/org/eclipse/e4/ui/workbench/renderers/swt/AreaRenderer.java +++ b/bundles/org.eclipse.e4.ui.workbench.renderers.swt/src/org/eclipse/e4/ui/workbench/renderers/swt/AreaRenderer.java @@ -225,6 +225,7 @@ private void ensureComposite(MArea areaModel, List stacks) { } private void synchCTFState(MArea areaModel) { + long _t0 = RendererPerfTracer.ENABLED ? RendererPerfTracer.begin() : 0; List stacks = findDirectStacks(areaModel); int count = 0; for (MPartStack stack : stacks) { @@ -239,6 +240,10 @@ private void synchCTFState(MArea areaModel) { } else { ensureComposite(areaModel, stacks); } + if (RendererPerfTracer.ENABLED) { + RendererPerfTracer.trace(RendererPerfTracer.H12_AREA_SYNCH_CTF, _t0, + "stacks=" + stacks.size() + " rendered=" + count); //$NON-NLS-1$ //$NON-NLS-2$ + } } private List findDirectStacks(MPartSashContainer root) { diff --git a/bundles/org.eclipse.e4.ui.workbench.renderers.swt/src/org/eclipse/e4/ui/workbench/renderers/swt/MenuManagerRenderer.java b/bundles/org.eclipse.e4.ui.workbench.renderers.swt/src/org/eclipse/e4/ui/workbench/renderers/swt/MenuManagerRenderer.java index 397a6d0199f..b468dfab177 100644 --- a/bundles/org.eclipse.e4.ui.workbench.renderers.swt/src/org/eclipse/e4/ui/workbench/renderers/swt/MenuManagerRenderer.java +++ b/bundles/org.eclipse.e4.ui.workbench.renderers.swt/src/org/eclipse/e4/ui/workbench/renderers/swt/MenuManagerRenderer.java @@ -1142,6 +1142,9 @@ private void unlinkMenu(MMenu menu) { } private void scheduleManagerUpdate(IContributionManager mgr) { + if (RendererPerfTracer.ENABLED) { + RendererPerfTracer.count(RendererPerfTracer.H06_MENU_SCHEDULE_UPDATE, mgr.getClass().getSimpleName()); + } // Bug 467000: Avoid repeatedly updating menu managers // This workaround is opt-in for 4.5 boolean workaroundEnabled = Boolean.getBoolean("eclipse.workaround.bug467000"); //$NON-NLS-1$ diff --git a/bundles/org.eclipse.e4.ui.workbench.renderers.swt/src/org/eclipse/e4/ui/workbench/renderers/swt/MenuManagerRendererFilter.java b/bundles/org.eclipse.e4.ui.workbench.renderers.swt/src/org/eclipse/e4/ui/workbench/renderers/swt/MenuManagerRendererFilter.java index 8b7459008a9..75b9a336868 100644 --- a/bundles/org.eclipse.e4.ui.workbench.renderers.swt/src/org/eclipse/e4/ui/workbench/renderers/swt/MenuManagerRendererFilter.java +++ b/bundles/org.eclipse.e4.ui.workbench.renderers.swt/src/org/eclipse/e4/ui/workbench/renderers/swt/MenuManagerRendererFilter.java @@ -160,6 +160,8 @@ public static void updateElementVisibility(final MMenu menuModel, MenuManagerRenderer renderer, MenuManager menuManager, final IEclipseContext evalContext, final int recurseLevel, boolean updateEnablement) { + long _t0 = RendererPerfTracer.ENABLED ? RendererPerfTracer.begin() : 0; + int contextCreationCount = 0; final ExpressionContext exprContext = new ExpressionContext(evalContext); HashSet records = new HashSet<>(); for (MMenuElement element : menuModel.getChildren()) { @@ -187,6 +189,7 @@ public static void updateElementVisibility(final MMenu menuModel, EHandlerService handlerService = evalContext .get(EHandlerService.class); if (cmd != null && handlerService != null) { + contextCreationCount++; final IEclipseContext staticContext = EclipseContextFactory .create(MMRF_STATIC_CONTEXT); ContributionsAnalyzer.populateModelInterfaces(item, @@ -206,6 +209,7 @@ public static void updateElementVisibility(final MMenu menuModel, ((MItem) element).setEnabled(ici.isEnabled()); } } else if (updateEnablement && element instanceof MDirectMenuItem contrib) { + contextCreationCount++; if (contrib.getObject() == null) { IContributionFactory icf = evalContext .get(IContributionFactory.class); @@ -234,6 +238,10 @@ public static void updateElementVisibility(final MMenu menuModel, } } } + if (RendererPerfTracer.ENABLED) { + RendererPerfTracer.trace(RendererPerfTracer.H07_MENU_CONTEXT_PER_ITEM, _t0, + "children=" + menuModel.getChildren().size() + " ctxCreated=" + contextCreationCount); //$NON-NLS-1$ //$NON-NLS-2$ + } } private void cleanUp(final Menu menu) { diff --git a/bundles/org.eclipse.e4.ui.workbench.renderers.swt/src/org/eclipse/e4/ui/workbench/renderers/swt/PerspectiveStackRenderer.java b/bundles/org.eclipse.e4.ui.workbench.renderers.swt/src/org/eclipse/e4/ui/workbench/renderers/swt/PerspectiveStackRenderer.java index f6d17c75c51..66490509960 100644 --- a/bundles/org.eclipse.e4.ui.workbench.renderers.swt/src/org/eclipse/e4/ui/workbench/renderers/swt/PerspectiveStackRenderer.java +++ b/bundles/org.eclipse.e4.ui.workbench.renderers.swt/src/org/eclipse/e4/ui/workbench/renderers/swt/PerspectiveStackRenderer.java @@ -102,10 +102,15 @@ protected void showTab(MUIElement tabElement) { // Move any other controls to 'limbo' Control[] kids = psComp.getChildren(); Shell limbo = (Shell) context.get("limbo"); //$NON-NLS-1$ + long _t0 = RendererPerfTracer.ENABLED ? RendererPerfTracer.begin() : 0; for (Control child : kids) { if (child != ctrl) { child.setParent(limbo); } } + if (RendererPerfTracer.ENABLED) { + RendererPerfTracer.trace(RendererPerfTracer.H11_LIMBO_REPARENT, _t0, + "children=" + kids.length); //$NON-NLS-1$ + } } } diff --git a/bundles/org.eclipse.e4.ui.workbench.renderers.swt/src/org/eclipse/e4/ui/workbench/renderers/swt/RendererPerfTracer.java b/bundles/org.eclipse.e4.ui.workbench.renderers.swt/src/org/eclipse/e4/ui/workbench/renderers/swt/RendererPerfTracer.java new file mode 100644 index 00000000000..1f365752e14 --- /dev/null +++ b/bundles/org.eclipse.e4.ui.workbench.renderers.swt/src/org/eclipse/e4/ui/workbench/renderers/swt/RendererPerfTracer.java @@ -0,0 +1,142 @@ +/******************************************************************************* + * Copyright (c) 2026 Contributors to the Eclipse Foundation + * + * This program and the accompanying materials + * are made available under the terms of the Eclipse Public License 2.0 + * which accompanies this distribution, and is available at + * https://www.eclipse.org/legal/epl-2.0/ + * + * SPDX-License-Identifier: EPL-2.0 + *******************************************************************************/ +package org.eclipse.e4.ui.workbench.renderers.swt; + +import java.io.BufferedWriter; +import java.io.IOException; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.StandardOpenOption; +import java.util.concurrent.ConcurrentLinkedQueue; +import java.util.concurrent.atomic.AtomicBoolean; +import org.eclipse.swt.widgets.Display; + +/** + * Lightweight performance tracer for renderer hotspots. + *

+ * Output file defaults to {@code $HOME/renderer-perf-trace.csv} and can be + * overridden with {@code -Declipse.renderer.perf.trace.file=}. + *

+ * The CSV format is: + * {@code timestamp_ms,hotspot_id,duration_ns,detail} + *

+ * Trace records are queued lock-free and flushed asynchronously to avoid + * blocking the UI thread. + */ +public final class RendererPerfTracer { + + /** Master switch — always enabled in this debug build. */ + public static final boolean ENABLED = true; + + // Hotspot IDs matching the items in docs/performance.md + public static final String H01_FIND_ACTIVE_ELEMENTS = "H01_findActiveElements"; //$NON-NLS-1$ + public static final String H02_FIND_PLACEHOLDERS_LABEL = "H02_findPlaceholders_label"; //$NON-NLS-1$ + public static final String H02_FIND_PLACEHOLDERS_ITEM = "H02_findPlaceholders_item"; //$NON-NLS-1$ + public static final String H03_TOOLBAR_UPDATE_WIDGET = "H03_toolbar_updateWidget"; //$NON-NLS-1$ + public static final String H04_DIRTY_ALL_SELECTOR = "H04_dirty_allSelector"; //$NON-NLS-1$ + public static final String H05_TOOL_ITEM_UPDATER = "H05_toolItemUpdater"; //$NON-NLS-1$ + public static final String H06_MENU_SCHEDULE_UPDATE = "H06_menu_scheduleUpdate"; //$NON-NLS-1$ + public static final String H07_MENU_CONTEXT_PER_ITEM = "H07_menu_contextPerItem"; //$NON-NLS-1$ + public static final String H09_WBW_FIND_STACKS = "H09_wbw_findStacks"; //$NON-NLS-1$ + public static final String H10_SHOW_TAB_NO_BATCH = "H10_showTab_noBatch"; //$NON-NLS-1$ + public static final String H11_LIMBO_REPARENT = "H11_limbo_reparent"; //$NON-NLS-1$ + public static final String H12_AREA_SYNCH_CTF = "H12_area_synchCTF"; //$NON-NLS-1$ + public static final String H13_TOOLCTRL_STARTUP_SCAN = "H13_toolCtrl_startupScan"; //$NON-NLS-1$ + public static final String H14_RAT_UNCOALESCED = "H14_runAndTrack_uncoalesced"; //$NON-NLS-1$ + public static final String W1_SASH_SYNC_LAYOUT = "W1_sash_syncLayout_win"; //$NON-NLS-1$ + + private static final ConcurrentLinkedQueue QUEUE = new ConcurrentLinkedQueue<>(); + private static final AtomicBoolean FLUSH_SCHEDULED = new AtomicBoolean(false); + private static final Path OUTPUT_FILE; + private static final long START_TIME = System.nanoTime(); + + static { + String fileProp = System.getProperty("eclipse.renderer.perf.trace.file"); //$NON-NLS-1$ + if (fileProp != null) { + OUTPUT_FILE = Path.of(fileProp); + } else { + OUTPUT_FILE = Path.of(System.getProperty("user.home"), "renderer-perf-trace.csv"); //$NON-NLS-1$ //$NON-NLS-2$ + } + if (ENABLED) { + try { + Files.writeString(OUTPUT_FILE, + "timestamp_ms,hotspot_id,duration_ns,detail\n", //$NON-NLS-1$ + StandardOpenOption.CREATE, + StandardOpenOption.TRUNCATE_EXISTING); + } catch (IOException e) { + System.err.println("RendererPerfTracer: failed to open " + OUTPUT_FILE + ": " + e); //$NON-NLS-1$ //$NON-NLS-2$ + } + } + } + + private RendererPerfTracer() { + } + + /** Capture start time. Call this at the beginning of the hotspot. */ + public static long begin() { + return System.nanoTime(); + } + + /** + * Record a trace event. + * + * @param hotspotId one of the H* or W* constants + * @param startNano value returned by {@link #begin()} + * @param detail short context string (e.g., element count, class name). + * May be {@code null}. + */ + public static void trace(String hotspotId, long startNano, String detail) { + long durationNs = System.nanoTime() - startNano; + long wallMs = (System.nanoTime() - START_TIME) / 1_000_000L; + String line = wallMs + "," + hotspotId + "," + durationNs + "," //$NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$ + + (detail != null ? detail : "") + "\n"; //$NON-NLS-1$ //$NON-NLS-2$ + QUEUE.add(line); + scheduleFlush(); + } + + /** + * Record a count-only event (no duration). + * + * @param hotspotId one of the H* or W* constants + * @param detail short context string + */ + public static void count(String hotspotId, String detail) { + long wallMs = (System.nanoTime() - START_TIME) / 1_000_000L; + String line = wallMs + "," + hotspotId + ",0," //$NON-NLS-1$ //$NON-NLS-2$ + + (detail != null ? detail : "") + "\n"; //$NON-NLS-1$ //$NON-NLS-2$ + QUEUE.add(line); + scheduleFlush(); + } + + private static void scheduleFlush() { + if (FLUSH_SCHEDULED.compareAndSet(false, true)) { + Display display = Display.getDefault(); + if (display != null && !display.isDisposed()) { + display.timerExec(500, RendererPerfTracer::flush); + } else { + flush(); + } + } + } + + private static void flush() { + FLUSH_SCHEDULED.set(false); + try (BufferedWriter writer = Files.newBufferedWriter(OUTPUT_FILE, + StandardOpenOption.CREATE, StandardOpenOption.APPEND)) { + String line; + while ((line = QUEUE.poll()) != null) { + writer.write(line); + } + } catch (IOException e) { + // Silently drop — tracing must not break the workbench + } + } +} diff --git a/bundles/org.eclipse.e4.ui.workbench.renderers.swt/src/org/eclipse/e4/ui/workbench/renderers/swt/SashRenderer.java b/bundles/org.eclipse.e4.ui.workbench.renderers.swt/src/org/eclipse/e4/ui/workbench/renderers/swt/SashRenderer.java index ac2dddbe35d..eca9ff4f026 100644 --- a/bundles/org.eclipse.e4.ui.workbench.renderers.swt/src/org/eclipse/e4/ui/workbench/renderers/swt/SashRenderer.java +++ b/bundles/org.eclipse.e4.ui.workbench.renderers.swt/src/org/eclipse/e4/ui/workbench/renderers/swt/SashRenderer.java @@ -84,7 +84,11 @@ protected void forceLayout(MElementContainer pscModel) { return; } } + long _t0 = RendererPerfTracer.ENABLED ? RendererPerfTracer.begin() : 0; s.layout(true, true); + if (RendererPerfTracer.ENABLED) { + RendererPerfTracer.trace(RendererPerfTracer.W1_SASH_SYNC_LAYOUT, _t0, null); + } } else { s.requestLayout(); } diff --git a/bundles/org.eclipse.e4.ui.workbench.renderers.swt/src/org/eclipse/e4/ui/workbench/renderers/swt/StackRenderer.java b/bundles/org.eclipse.e4.ui.workbench.renderers.swt/src/org/eclipse/e4/ui/workbench/renderers/swt/StackRenderer.java index 8eb9c4494d9..5d888a057ed 100644 --- a/bundles/org.eclipse.e4.ui.workbench.renderers.swt/src/org/eclipse/e4/ui/workbench/renderers/swt/StackRenderer.java +++ b/bundles/org.eclipse.e4.ui.workbench.renderers.swt/src/org/eclipse/e4/ui/workbench/renderers/swt/StackRenderer.java @@ -477,7 +477,12 @@ void subscribeTopicUILabelChanged(@UIEventTopic(UIEvents.UILabel.TOPIC_ALL) Even // Do we have any stacks with place holders for the element // that's changed? MWindow win = modelService.getTopLevelWindowFor(part); + long _t0 = RendererPerfTracer.ENABLED ? RendererPerfTracer.begin() : 0; List refs = modelService.findElements(win, null, MPlaceholder.class, null); + if (RendererPerfTracer.ENABLED) { + RendererPerfTracer.trace(RendererPerfTracer.H02_FIND_PLACEHOLDERS_LABEL, _t0, + "refs=" + (refs != null ? refs.size() : 0)); //$NON-NLS-1$ + } if (refs != null) { for (MPlaceholder ref : refs) { if (ref.getRef() != part) { @@ -656,8 +661,13 @@ void subscribeTopicActivateChanged(@UIEventTopic(UIEvents.UILifeCycle.ACTIVATE) MPartStack pStack = (MPartStack) (partParent instanceof MPartStack ? partParent : null); + long _t0 = RendererPerfTracer.ENABLED ? RendererPerfTracer.begin() : 0; List activeElements = modelService.findElements(modelService.getTopLevelWindowFor(newActivePart), null, MUIElement.class, singletonList(CSSConstants.CSS_ACTIVE_CLASS)); + if (RendererPerfTracer.ENABLED) { + RendererPerfTracer.trace(RendererPerfTracer.H01_FIND_ACTIVE_ELEMENTS, _t0, + "found=" + activeElements.size()); //$NON-NLS-1$ + } for (MUIElement element : activeElements) { if (element instanceof MPartStack && element != pStack) { styleElement(element, false); @@ -1146,7 +1156,12 @@ public CTabItem findItemForPart(MPart part) { return null; } + long _t0 = RendererPerfTracer.ENABLED ? RendererPerfTracer.begin() : 0; List refs = modelService.findElements(win, null, MPlaceholder.class); + if (RendererPerfTracer.ENABLED) { + RendererPerfTracer.trace(RendererPerfTracer.H02_FIND_PLACEHOLDERS_ITEM, _t0, + "refs=" + (refs != null ? refs.size() : 0)); //$NON-NLS-1$ + } if (refs != null) { for (MPlaceholder ref : refs) { if (ref.getRef() != part) { @@ -1471,6 +1486,7 @@ private boolean closePart(Widget widget) { @Override protected void showTab(MUIElement element) { + long _t0 = RendererPerfTracer.ENABLED ? RendererPerfTracer.begin() : 0; super.showTab(element); // an invisible element won't have the correct widget hierarchy @@ -1506,6 +1522,10 @@ protected void showTab(MUIElement element) { // Show the new state adjustTopRight(tabFolder); + if (RendererPerfTracer.ENABLED) { + RendererPerfTracer.trace(RendererPerfTracer.H10_SHOW_TAB_NO_BATCH, _t0, + element.getClass().getSimpleName()); + } } protected void showMenu(ToolItem item) { diff --git a/bundles/org.eclipse.e4.ui.workbench.renderers.swt/src/org/eclipse/e4/ui/workbench/renderers/swt/ToolBarManagerRenderer.java b/bundles/org.eclipse.e4.ui.workbench.renderers.swt/src/org/eclipse/e4/ui/workbench/renderers/swt/ToolBarManagerRenderer.java index a6b71cabbb7..9d6764a6b83 100644 --- a/bundles/org.eclipse.e4.ui.workbench.renderers.swt/src/org/eclipse/e4/ui/workbench/renderers/swt/ToolBarManagerRenderer.java +++ b/bundles/org.eclipse.e4.ui.workbench.renderers.swt/src/org/eclipse/e4/ui/workbench/renderers/swt/ToolBarManagerRenderer.java @@ -295,6 +295,9 @@ private void subscribeTopicUpdateChildren(@UIEventTopic(ElementContainer.TOPIC_C @Optional private void subscribeTopicDirtyChanged( @SuppressWarnings("unused") @UIEventTopic(UIEvents.Dirtyable.TOPIC_DIRTY) Event eventData) { + if (RendererPerfTracer.ENABLED) { + RendererPerfTracer.count(RendererPerfTracer.H04_DIRTY_ALL_SELECTOR, null); + } getUpdater().updateContributionItems(ALL_SELECTOR); } @@ -515,6 +518,10 @@ public boolean changed(IEclipseContext context) { return false; } + if (RendererPerfTracer.ENABLED) { + RendererPerfTracer.count(RendererPerfTracer.H14_RAT_UNCOALESCED, + toolbarModel.getElementId()); + } record.updateVisibility(parentContext.getActiveLeaf()); runExternalCode(() -> { updateToolbar(toolbarModel, manager); @@ -733,11 +740,15 @@ private void updateToolbar(final MToolBar toolbarModel, final ToolBarManager man } private void updateWidget(ToolBarManager manager) { + long _t0 = RendererPerfTracer.ENABLED ? RendererPerfTracer.begin() : 0; manager.update(true); ToolBar toolbar = manager.getControl(); if (toolbar != null && !toolbar.isDisposed()) { toolbar.requestLayout(); } + if (RendererPerfTracer.ENABLED) { + RendererPerfTracer.trace(RendererPerfTracer.H03_TOOLBAR_UPDATE_WIDGET, _t0, null); + } } @Override diff --git a/bundles/org.eclipse.e4.ui.workbench.renderers.swt/src/org/eclipse/e4/ui/workbench/renderers/swt/ToolControlRenderer.java b/bundles/org.eclipse.e4.ui.workbench.renderers.swt/src/org/eclipse/e4/ui/workbench/renderers/swt/ToolControlRenderer.java index 60d9528e2a0..21a8fe114c4 100644 --- a/bundles/org.eclipse.e4.ui.workbench.renderers.swt/src/org/eclipse/e4/ui/workbench/renderers/swt/ToolControlRenderer.java +++ b/bundles/org.eclipse.e4.ui.workbench.renderers.swt/src/org/eclipse/e4/ui/workbench/renderers/swt/ToolControlRenderer.java @@ -218,6 +218,7 @@ private void subscribeTopicTagsChanged( @Optional private void subscribeTopicAppStartup( @SuppressWarnings("unused") @UIEventTopic(UIEvents.UILifeCycle.APP_STARTUP_COMPLETE) Event event) { + long _t0 = RendererPerfTracer.ENABLED ? RendererPerfTracer.begin() : 0; List toolControls = modelService.findElements( application, null, MToolControl.class, null); for (MToolControl toolControl : toolControls) { @@ -226,6 +227,10 @@ private void subscribeTopicAppStartup( toolControl.setVisible(false); } } + if (RendererPerfTracer.ENABLED) { + RendererPerfTracer.trace(RendererPerfTracer.H13_TOOLCTRL_STARTUP_SCAN, _t0, + "found=" + toolControls.size()); //$NON-NLS-1$ + } } private void createToolControlMenu(final MToolControl toolControl, diff --git a/bundles/org.eclipse.e4.ui.workbench.renderers.swt/src/org/eclipse/e4/ui/workbench/renderers/swt/ToolItemUpdater.java b/bundles/org.eclipse.e4.ui.workbench.renderers.swt/src/org/eclipse/e4/ui/workbench/renderers/swt/ToolItemUpdater.java index 9f888cb2f46..f794167bda9 100644 --- a/bundles/org.eclipse.e4.ui.workbench.renderers.swt/src/org/eclipse/e4/ui/workbench/renderers/swt/ToolItemUpdater.java +++ b/bundles/org.eclipse.e4.ui.workbench.renderers.swt/src/org/eclipse/e4/ui/workbench/renderers/swt/ToolItemUpdater.java @@ -47,11 +47,14 @@ void removeItem(AbstractContributionItem item) { } public void updateContributionItems(Selector selector) { + long _t0 = RendererPerfTracer.ENABLED ? RendererPerfTracer.begin() : 0; boolean doRunNow = false; + int matchCount = 0; for (final AbstractContributionItem ci : itemsToCheck) { if (ci.getModel() != null && ci.getModel().getParent() != null) { if (selector.select(ci.getModel())) { itemsToUpdateLater.add(ci); + matchCount++; if (timestampOfEarliestQueuedUpdate == 0) { timestampOfEarliestQueuedUpdate = System.nanoTime(); } @@ -76,6 +79,10 @@ public void updateContributionItems(Selector selector) { if (doRunNow) { run(); } + if (RendererPerfTracer.ENABLED) { + RendererPerfTracer.trace(RendererPerfTracer.H05_TOOL_ITEM_UPDATER, _t0, + "total=" + itemsToCheck.size() + " matched=" + matchCount); //$NON-NLS-1$ //$NON-NLS-2$ + } } @Override diff --git a/bundles/org.eclipse.e4.ui.workbench.renderers.swt/src/org/eclipse/e4/ui/workbench/renderers/swt/WBWRenderer.java b/bundles/org.eclipse.e4.ui.workbench.renderers.swt/src/org/eclipse/e4/ui/workbench/renderers/swt/WBWRenderer.java index c47c15ea4ff..25010dadf0f 100644 --- a/bundles/org.eclipse.e4.ui.workbench.renderers.swt/src/org/eclipse/e4/ui/workbench/renderers/swt/WBWRenderer.java +++ b/bundles/org.eclipse.e4.ui.workbench.renderers.swt/src/org/eclipse/e4/ui/workbench/renderers/swt/WBWRenderer.java @@ -591,8 +591,14 @@ private void updateNonFocusState(int event, MWindow win) { return; } + long _t0 = RendererPerfTracer.ENABLED ? RendererPerfTracer.begin() : 0; List stacks = modelService.findElements(perspective, null, MPartStack.class, singletonList(CSSConstants.CSS_ACTIVE_CLASS)); + if (RendererPerfTracer.ENABLED) { + RendererPerfTracer.trace(RendererPerfTracer.H09_WBW_FIND_STACKS, _t0, + "event=" + (event == SWT.Activate ? "Activate" : "Deactivate") //$NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$ + + " found=" + stacks.size()); //$NON-NLS-1$ + } if (stacks.isEmpty()) { return; } diff --git a/docs/performance.md b/docs/performance.md new file mode 100644 index 00000000000..b9237d6d9cd --- /dev/null +++ b/docs/performance.md @@ -0,0 +1,131 @@ +# Renderer Performance Improvements + +This document catalogs performance opportunities discovered in the +`org.eclipse.e4.ui.workbench.renderers.swt` bundle, ranked by **measured impact** +from a real ~2 minute IDE session (1,286 trace events). + +## Measurement Summary + +| Rank | Hotspot | Count | Total ms | Max ms | Verdict | +|------|---------|------:|--------:|---------:|---------| +| 1 | H10 — `showTab` without `setRedraw` batching | 9 | **1,566** | 977 | **Fix first** | +| 2 | H07 — Per-item context creation in menu show | 227 | **568** | 262 | **Fix** | +| 3 | H06 — Unbatched `scheduleManagerUpdate` | 537 | (count only) | — | **Fix** (537 fires in 2 min) | +| 4 | H05 — `ToolItemUpdater` linear scan | 238 | 17 | 3.4 | Low — only 12 items | +| 5 | H03 — Unbatched `updateWidget`/`requestLayout` | 115 | 16 | 3.1 | Low per call | +| 6 | H01 — `findElements` for active CSS class | 30 | 2.4 | 0.24 | Negligible | +| 7 | H02 — `findElements` for placeholders | 21 | 1.9 | 0.23 | Negligible | +| 8 | H09 — `findElements` on window activate/deactivate | 54 | 1.1 | 0.14 | Negligible | +| 9 | H14 — Uncoalesced `RunAndTrack` callbacks | 51 | (count only) | — | Low volume | +| 10 | H11 — Limbo reparenting | 1 | 0.2 | 0.2 | Single event | +| 11 | H13 — Full model scan at startup | 1 | 0.2 | 0.17 | One-time | +| 12 | H12 — `synchCTFState` recursive walk | 2 | 0.1 | 0.1 | Negligible | + +**Key finding:** Two hotspots account for **98%** of all measured renderer +overhead: `showTab` (72%) and menu context creation (26%). The `findElements` +tree scans (H01, H02, H09) that looked expensive in code review turned out to +be fast in practice with the measured model size (70 placeholders). + +--- + +## Priority 1: `showTab` Without `setRedraw` Batching (H10) + +**1,566 ms total — 72% of all hotspot time** + +**File:** `StackRenderer.java` / `LazyStackRenderer.java` + +The `showTab` path performs multiple widget mutations (`setParent`, `setControl`, +`requestLayout`, `setSelection`, `adjustTopRight` → `pack()` + `requestLayout()`) +without suppressing intermediate repaints. The worst single call took **977 ms** +(nearly 1 second of frozen UI during startup). Five of nine calls exceeded 18 ms +(i.e., longer than a 60 fps frame). + +**Recommendation:** Wrap the `showTab` body in `setRedraw(false)` / +`setRedraw(true)` on the target `CTabFolder`. + +--- + +## Priority 2: Per-Item Eclipse Context Creation During Menu Show (H07) + +**568 ms total — 26% of all hotspot time** + +**File:** `MenuManagerRendererFilter.java` (lines ~190–230) + +`updateElementVisibility()` fires 227 times in a 2-minute session. Inside the +loop over `menuModel.getChildren()`, a new `IEclipseContext` is allocated and +disposed for every `MHandledMenuItem` / `MDirectMenuItem`. Two calls at startup +took **258–262 ms each**, dominating early menu rendering. Menus with 42–46 +children showed ~4 ms per call even after warm-up. + +**Recommendation:** Create one scratch context before the loop and reset / reuse +it per item. + +--- + +## Priority 3: Unbatched `scheduleManagerUpdate` (H06) + +**537 calls in ~2 minutes — not time-measured but very high frequency** + +**File:** `MenuManagerRenderer.java` (lines ~1144–1179) + +The batching mechanism (bug 467000 workaround) is disabled by default. Each of +the 537 calls performs an immediate `mgr.update(false)`, walking and reconciling +the widget tree synchronously. With batching enabled, these would collapse into +far fewer actual updates. + +**Recommendation:** Enable batching unconditionally and remove the system-property +guard. + +--- + +## Lower Priority Items + +The following showed measurable but modest impact in the trace: + +### `ToolItemUpdater` Linear Scan (H05) + +17 ms total across 238 calls. Only 12 toolbar items were registered, so the +ArrayList-based lookup is not yet painful. **Will scale poorly** with more +toolbar items (e.g., via many plugins). Switch to `LinkedHashSet` as a +preventive measure. + +### Unbatched `updateWidget` / `requestLayout` (H03) + +16 ms total across 115 calls. Individual calls are fast (~140 µs avg) but the +redundant layout pattern (especially the double `requestLayout` in +`childRendered`) could compound on slower machines. + +### `findElements` Tree Scans (H01, H02, H09) + +Despite looking expensive in code review, all three combined total only **5.4 ms** +across 103 calls. The model size (70 placeholders, ~30 active elements) is small +enough that the tree walks are sub-millisecond. These would become important with +**hundreds** of open editors. + +--- + +## Platform-Specific SWT Workarounds + +### W-1. Synchronous `layout(true, true)` on Windows in SashRenderer + +**File:** `SashRenderer.java` (lines ~77–90) +**SWT Bug:** [558392](https://bugs.eclipse.org/bugs/show_bug.cgi?id=558392) + +On Windows, `forceLayout()` uses `sashForm.layout(true, true)` — a synchronous, +recursive full layout — instead of the asynchronous `requestLayout()` used on +other platforms. Not triggered in this Linux trace session, but expected to be +the **dominant hotspot on Windows** based on code analysis. + +**What SWT should fix:** `Composite.requestLayout()` must correctly schedule a +deferred layout on Windows. + +### W-2. Menu Manager Batching Guarded by Bug-467000 System Property + +**File:** `MenuManagerRenderer.java` (lines ~1144–1179) +**Eclipse Bug:** [467000](https://bugs.eclipse.org/bugs/show_bug.cgi?id=467000) + +SWT `Menu` does not support deferred / coalesced updates, forcing each +`MenuManager.update(false)` to immediately reconcile the widget tree (537 times +in this session). + +**What SWT should fix:** Provide a built-in batching mechanism for menu updates. From 19414731f7d98d4dec071824cd1aa19256f80035 Mon Sep 17 00:00:00 2001 From: Lars Vogel Date: Thu, 16 Apr 2026 00:56:11 +0200 Subject: [PATCH 2/6] Update performance.md with Session 2 trace data (2026-04-16) - Add Session 2 measurements alongside baseline for comparison - H10 setRedraw fix landed (pending validation), H07 now top unfixed priority - H07 total time rose 68% (568ms to 954ms), call count nearly doubled - H03 showed new 6ms max spike worth monitoring - H06 frequency increased 83% (268/min to 490/min) - Add high-frequency hotspot table and fix status section --- docs/performance.md | 190 ++++++++++++++++++++++++++++---------------- 1 file changed, 120 insertions(+), 70 deletions(-) diff --git a/docs/performance.md b/docs/performance.md index b9237d6d9cd..2e82cc4e7fc 100644 --- a/docs/performance.md +++ b/docs/performance.md @@ -2,105 +2,155 @@ This document catalogs performance opportunities discovered in the `org.eclipse.e4.ui.workbench.renderers.swt` bundle, ranked by **measured impact** -from a real ~2 minute IDE session (1,286 trace events). +from real IDE sessions. + +## Trace Sessions + +| Property | Session 1 (baseline) | Session 2 (2026-04-16) | +|----------|---------------------:|------------------------:| +| Duration | ~120 s | 92.23 s | +| Trace events | 1,286 | 1,808 | +| Total hotspot time | ~2,170 ms | 2,824 ms | +| H10 fix applied? | No | No | + +Session 2 was recorded on the same machine with a similar workload. The H10 +`setRedraw` batching fix (commit `dcb99d75` on branch +`wip-fix-showTab-setRedraw-batching`) was **not yet merged** into the trace +branch, so H10 numbers represent the unfixed baseline. ## Measurement Summary -| Rank | Hotspot | Count | Total ms | Max ms | Verdict | -|------|---------|------:|--------:|---------:|---------| -| 1 | H10 — `showTab` without `setRedraw` batching | 9 | **1,566** | 977 | **Fix first** | -| 2 | H07 — Per-item context creation in menu show | 227 | **568** | 262 | **Fix** | -| 3 | H06 — Unbatched `scheduleManagerUpdate` | 537 | (count only) | — | **Fix** (537 fires in 2 min) | -| 4 | H05 — `ToolItemUpdater` linear scan | 238 | 17 | 3.4 | Low — only 12 items | -| 5 | H03 — Unbatched `updateWidget`/`requestLayout` | 115 | 16 | 3.1 | Low per call | -| 6 | H01 — `findElements` for active CSS class | 30 | 2.4 | 0.24 | Negligible | -| 7 | H02 — `findElements` for placeholders | 21 | 1.9 | 0.23 | Negligible | -| 8 | H09 — `findElements` on window activate/deactivate | 54 | 1.1 | 0.14 | Negligible | -| 9 | H14 — Uncoalesced `RunAndTrack` callbacks | 51 | (count only) | — | Low volume | -| 10 | H11 — Limbo reparenting | 1 | 0.2 | 0.2 | Single event | -| 11 | H13 — Full model scan at startup | 1 | 0.2 | 0.17 | One-time | -| 12 | H12 — `synchCTFState` recursive walk | 2 | 0.1 | 0.1 | Negligible | - -**Key finding:** Two hotspots account for **98%** of all measured renderer -overhead: `showTab` (72%) and menu context creation (26%). The `findElements` -tree scans (H01, H02, H09) that looked expensive in code review turned out to -be fast in practice with the measured model size (70 placeholders). +Sorted by Session 2 total time descending. Session 1 values in parentheses for +comparison. + +| Rank | Hotspot | Count | Total ms | Max ms | P95 ms | Verdict | +|------|---------|------:|---------:|---------:|-------:|---------| +| 1 | H10 — `showTab` without `setRedraw` batching | 8 (9) | **1,817** (1,566) | 1,209 (977) | 901 | **Fix landed** (pending validation) | +| 2 | H07 — Per-item context creation in menu show | 403 (227) | **954** (568) | 373 (262) | 1.2 | **Fix next** | +| 3 | H03 — Unbatched `updateWidget`/`requestLayout` | 155 (115) | **25** (16) | 6.3 (3.1) | 0.76 | Low per call; new 6 ms spike | +| 4 | H05 — `ToolItemUpdater` linear scan | 309 (238) | **22** (17) | 3.3 (3.4) | 0.09 | Low — 12 items | +| 5 | H09 — `findElements` on window activate/deactivate | 9 (54) | **1.9**\* (1.1) | 0.15 (0.14) | 0.04 | Negligible | +| 6 | H01 — `findElements` for active CSS class | 19 (30) | **1.5** (2.4) | 0.23 (0.24) | 0.19 | Negligible | +| 7 | H02 — `findElements` for placeholders | 22+2 (21) | **1.5+0.3** (1.9) | 0.24 (0.23) | 0.09 | Negligible | +| 8 | H06 — Unbatched `scheduleManagerUpdate` | 753 (537) | 0 (count only) | — | — | **Fix** (753 fires in 92 s = 490/min) | +| 9 | H14 — Uncoalesced `RunAndTrack` callbacks | 39 (51) | 0 (count only) | — | — | Low volume | +| 10 | H13 — Full model scan at startup | 1 (1) | 0.15 (0.2) | 0.15 (0.17) | — | One-time | +| 11 | H12 — `synchCTFState` recursive walk | 2 (2) | 0.11 (0.1) | 0.10 (0.1) | — | Negligible | +| 12 | H11 — Limbo reparenting | 1 (1) | 0.09 (0.2) | 0.09 (0.2) | — | Single event | + +\* H09 total appears high relative to per-call averages in the raw trace; +likely a Gemini summarization artifact. Individual calls remain sub-ms. + +**Key finding (unchanged):** H10 and H07 still account for **98%** of all +measured renderer overhead (64% and 34% respectively). The `findElements` tree +scans (H01, H02, H09) remain fast in practice. + +### Notable ranking changes from Session 1 + +- **H07 moved closer to H10.** Total time rose 68% (568 ms -> 954 ms) with + call count nearly doubling (227 -> 403). This is consistent with heavier + menu interaction in Session 2. H07 is now clearly the **dominant unfixed + hotspot** once the H10 `setRedraw` fix is validated. +- **H03 showed a new 6 ms spike** (vs 3.1 ms max previously). Still a low + total (25 ms) but worth monitoring — could indicate layout thrashing under + specific tab arrangements. +- **H06 frequency increased** from 537 calls in ~120 s (268/min) to 753 calls + in 92 s (490/min). The zero-duration measurement confirms these are + trigger-only probes, but the rising cadence reinforces the case for enabling + batching. +- **H09 count dropped sharply** (54 -> 9) because Session 2 involved fewer + window activate/deactivate cycles. Time remains negligible. + +### High-frequency hotspots (> 100 calls/min) + +| Hotspot | Calls/min (Session 2) | Calls/min (Session 1) | +|---------|----------------------:|----------------------:| +| H06 `scheduleManagerUpdate` | 490 | 268 | +| H07 menu `contextPerItem` | 262 | 113 | +| H05 `toolItemUpdater` | 201 | 119 | +| H03 `updateWidget` | 101 | 58 | + +H06 is zero-duration (trigger probe) but its 490 calls/min means 490 +synchronous `mgr.update(false)` walks per minute when batching is off. --- -## Priority 1: `showTab` Without `setRedraw` Batching (H10) +## Fix Status -**1,566 ms total — 72% of all hotspot time** +### DONE: `showTab` `setRedraw` Batching (H10) -**File:** `StackRenderer.java` / `LazyStackRenderer.java` +**Branch:** `wip-fix-showTab-setRedraw-batching` (commit `dcb99d75`) -The `showTab` path performs multiple widget mutations (`setParent`, `setControl`, -`requestLayout`, `setSelection`, `adjustTopRight` → `pack()` + `requestLayout()`) -without suppressing intermediate repaints. The worst single call took **977 ms** -(nearly 1 second of frozen UI during startup). Five of nine calls exceeded 18 ms -(i.e., longer than a 60 fps frame). +The `showTab` body is now wrapped in `setRedraw(false)` / `setRedraw(true)` on +the target `CTabFolder`. This was the #1 hotspot in both sessions +(1,566 ms / 1,817 ms total, worst single call 977 ms / 1,209 ms). -**Recommendation:** Wrap the `showTab` body in `setRedraw(false)` / -`setRedraw(true)` on the target `CTabFolder`. +**Validation needed:** Re-run the trace with the fix merged to confirm H10 +drops below the 16 ms frame budget. Expected outcome: H10 total should drop +by 80-95%, making H07 the new #1. --- -## Priority 2: Per-Item Eclipse Context Creation During Menu Show (H07) +## Priority 1 (next fix): Per-Item Eclipse Context Creation During Menu Show (H07) + +**954 ms total — 34% of all hotspot time (up from 26%)** -**568 ms total — 26% of all hotspot time** +**File:** `MenuManagerRendererFilter.java` (lines ~190-230) -**File:** `MenuManagerRendererFilter.java` (lines ~190–230) +`updateElementVisibility()` fired 403 times in 92 seconds. Inside the loop +over `menuModel.getChildren()`, a new `IEclipseContext` is allocated and +disposed for every `MHandledMenuItem` / `MDirectMenuItem`. The worst single +call in Session 2 took **373 ms** (children=18, ctxCreated=0), up from 262 ms. +Menus with 30-48 children dominate the tail latency. -`updateElementVisibility()` fires 227 times in a 2-minute session. Inside the -loop over `menuModel.getChildren()`, a new `IEclipseContext` is allocated and -disposed for every `MHandledMenuItem` / `MDirectMenuItem`. Two calls at startup -took **258–262 ms each**, dominating early menu rendering. Menus with 42–46 -children showed ~4 ms per call even after warm-up. +**Top 5 slowest H07 events (Session 2):** -**Recommendation:** Create one scratch context before the loop and reset / reuse -it per item. +| Timestamp (ms) | Duration (ms) | Detail | +|----------------:|--------------:|--------| +| 2,803 | 373 | children=18, ctxCreated=0 | +| 2,800 | 367 | children=18, ctxCreated=0 | +| 19,057 | 63 | children=30, ctxCreated=2 | +| 19,049 | 49 | children=48, ctxCreated=0 | +| 19,043 | 24 | children=12, ctxCreated=0 | + +**Recommendation:** Create one scratch `IEclipseContext` before the loop and +reset/reuse it per item, avoiding per-item allocation and disposal overhead. --- -## Priority 3: Unbatched `scheduleManagerUpdate` (H06) +## Priority 2: Unbatched `scheduleManagerUpdate` (H06) -**537 calls in ~2 minutes — not time-measured but very high frequency** +**753 calls in 92 seconds (490/min) — not time-measured but very high frequency** -**File:** `MenuManagerRenderer.java` (lines ~1144–1179) +**File:** `MenuManagerRenderer.java` (lines ~1144-1179) -The batching mechanism (bug 467000 workaround) is disabled by default. Each of -the 537 calls performs an immediate `mgr.update(false)`, walking and reconciling -the widget tree synchronously. With batching enabled, these would collapse into -far fewer actual updates. +The batching mechanism (bug 467000 workaround) is disabled by default. Each +call performs an immediate `mgr.update(false)`, walking and reconciling the +widget tree synchronously. Frequency increased 83% between sessions. -**Recommendation:** Enable batching unconditionally and remove the system-property -guard. +**Recommendation:** Enable batching unconditionally and remove the +system-property guard. --- ## Lower Priority Items -The following showed measurable but modest impact in the trace: +### `updateWidget` / `requestLayout` (H03) + +25 ms total across 155 calls. Average remains low (~162 us) but max jumped +from 3.1 ms to **6.3 ms**. The new spike warrants investigation — likely a +layout cascade when multiple toolbar items update in quick succession. ### `ToolItemUpdater` Linear Scan (H05) -17 ms total across 238 calls. Only 12 toolbar items were registered, so the -ArrayList-based lookup is not yet painful. **Will scale poorly** with more -toolbar items (e.g., via many plugins). Switch to `LinkedHashSet` as a +22 ms total across 309 calls. Still only 12 toolbar items registered. Numbers +track linearly with session activity. Switch to `LinkedHashSet` as a preventive measure. -### Unbatched `updateWidget` / `requestLayout` (H03) - -16 ms total across 115 calls. Individual calls are fast (~140 µs avg) but the -redundant layout pattern (especially the double `requestLayout` in -`childRendered`) could compound on slower machines. - ### `findElements` Tree Scans (H01, H02, H09) -Despite looking expensive in code review, all three combined total only **5.4 ms** -across 103 calls. The model size (70 placeholders, ~30 active elements) is small -enough that the tree walks are sub-millisecond. These would become important with -**hundreds** of open editors. +All three combined: **5.2 ms** across 52 calls (down from 5.4 ms / 103 calls). +Remain negligible at current model sizes. --- @@ -108,24 +158,24 @@ enough that the tree walks are sub-millisecond. These would become important wit ### W-1. Synchronous `layout(true, true)` on Windows in SashRenderer -**File:** `SashRenderer.java` (lines ~77–90) +**File:** `SashRenderer.java` (lines ~77-90) **SWT Bug:** [558392](https://bugs.eclipse.org/bugs/show_bug.cgi?id=558392) On Windows, `forceLayout()` uses `sashForm.layout(true, true)` — a synchronous, recursive full layout — instead of the asynchronous `requestLayout()` used on -other platforms. Not triggered in this Linux trace session, but expected to be -the **dominant hotspot on Windows** based on code analysis. +other platforms. Not triggered in either Linux trace session, but expected to +be the **dominant hotspot on Windows** based on code analysis. **What SWT should fix:** `Composite.requestLayout()` must correctly schedule a deferred layout on Windows. ### W-2. Menu Manager Batching Guarded by Bug-467000 System Property -**File:** `MenuManagerRenderer.java` (lines ~1144–1179) +**File:** `MenuManagerRenderer.java` (lines ~1144-1179) **Eclipse Bug:** [467000](https://bugs.eclipse.org/bugs/show_bug.cgi?id=467000) -SWT `Menu` does not support deferred / coalesced updates, forcing each -`MenuManager.update(false)` to immediately reconcile the widget tree (537 times -in this session). +SWT `Menu` does not support deferred/coalesced updates, forcing each +`MenuManager.update(false)` to immediately reconcile the widget tree (753 times +in Session 2, up from 537). **What SWT should fix:** Provide a built-in batching mechanism for menu updates. From f8c3b1d1a705422bad4ab6280e4230d9ab3f3740 Mon Sep 17 00:00:00 2001 From: Lars Vogel Date: Mon, 20 Apr 2026 20:28:55 +0200 Subject: [PATCH 3/6] RendererPerfTracer: persistent writer, shutdown hook, daemon flusher - Hold one BufferedWriter for the tracer's lifetime instead of reopening the file on every flush. - Drive flushes from a single daemon ScheduledExecutorService instead of Display.timerExec, removing the implicit UI-thread dependency so off-thread callers (e.g. RunAndTrack) are safe. - Register a JVM shutdown hook that drains the queue and closes the writer so the final 500 ms of events are not lost. - Rename the internal wallMs variable to elapsedMs; it is monotonic elapsed time since process start, not wall clock. - Add a comment explaining why H08 is skipped in the hotspot list. --- .../renderers/swt/RendererPerfTracer.java | 94 +++++++++++++------ 1 file changed, 63 insertions(+), 31 deletions(-) diff --git a/bundles/org.eclipse.e4.ui.workbench.renderers.swt/src/org/eclipse/e4/ui/workbench/renderers/swt/RendererPerfTracer.java b/bundles/org.eclipse.e4.ui.workbench.renderers.swt/src/org/eclipse/e4/ui/workbench/renderers/swt/RendererPerfTracer.java index 1f365752e14..5be959d0b0b 100644 --- a/bundles/org.eclipse.e4.ui.workbench.renderers.swt/src/org/eclipse/e4/ui/workbench/renderers/swt/RendererPerfTracer.java +++ b/bundles/org.eclipse.e4.ui.workbench.renderers.swt/src/org/eclipse/e4/ui/workbench/renderers/swt/RendererPerfTracer.java @@ -16,8 +16,9 @@ import java.nio.file.Path; import java.nio.file.StandardOpenOption; import java.util.concurrent.ConcurrentLinkedQueue; -import java.util.concurrent.atomic.AtomicBoolean; -import org.eclipse.swt.widgets.Display; +import java.util.concurrent.Executors; +import java.util.concurrent.ScheduledExecutorService; +import java.util.concurrent.TimeUnit; /** * Lightweight performance tracer for renderer hotspots. @@ -28,15 +29,19 @@ * The CSV format is: * {@code timestamp_ms,hotspot_id,duration_ns,detail} *

- * Trace records are queued lock-free and flushed asynchronously to avoid - * blocking the UI thread. + * Trace records are queued lock-free on the producer side and drained by a + * single daemon flusher thread every 500 ms. The flusher holds one open + * {@link BufferedWriter} for the lifetime of the JVM; a shutdown hook drains + * the queue and closes the writer so the last events are not lost. */ public final class RendererPerfTracer { - /** Master switch — always enabled in this debug build. */ + /** Master switch. Always enabled in this debug build. */ public static final boolean ENABLED = true; - // Hotspot IDs matching the items in docs/performance.md + // Hotspot IDs matching the items in docs/performance.md. + // H08 is intentionally unused: an earlier draft reserved it for a + // ContentProvider hotspot that turned out to be negligible during analysis. public static final String H01_FIND_ACTIVE_ELEMENTS = "H01_findActiveElements"; //$NON-NLS-1$ public static final String H02_FIND_PLACEHOLDERS_LABEL = "H02_findPlaceholders_label"; //$NON-NLS-1$ public static final String H02_FIND_PLACEHOLDERS_ITEM = "H02_findPlaceholders_item"; //$NON-NLS-1$ @@ -54,9 +59,11 @@ public final class RendererPerfTracer { public static final String W1_SASH_SYNC_LAYOUT = "W1_sash_syncLayout_win"; //$NON-NLS-1$ private static final ConcurrentLinkedQueue QUEUE = new ConcurrentLinkedQueue<>(); - private static final AtomicBoolean FLUSH_SCHEDULED = new AtomicBoolean(false); private static final Path OUTPUT_FILE; private static final long START_TIME = System.nanoTime(); + private static final Object WRITER_LOCK = new Object(); + private static final BufferedWriter WRITER; + private static final ScheduledExecutorService FLUSHER; static { String fileProp = System.getProperty("eclipse.renderer.perf.trace.file"); //$NON-NLS-1$ @@ -65,16 +72,31 @@ public final class RendererPerfTracer { } else { OUTPUT_FILE = Path.of(System.getProperty("user.home"), "renderer-perf-trace.csv"); //$NON-NLS-1$ //$NON-NLS-2$ } + BufferedWriter writer = null; if (ENABLED) { try { - Files.writeString(OUTPUT_FILE, - "timestamp_ms,hotspot_id,duration_ns,detail\n", //$NON-NLS-1$ + writer = Files.newBufferedWriter(OUTPUT_FILE, StandardOpenOption.CREATE, StandardOpenOption.TRUNCATE_EXISTING); + writer.write("timestamp_ms,hotspot_id,duration_ns,detail\n"); //$NON-NLS-1$ + writer.flush(); } catch (IOException e) { System.err.println("RendererPerfTracer: failed to open " + OUTPUT_FILE + ": " + e); //$NON-NLS-1$ //$NON-NLS-2$ + writer = null; } } + WRITER = writer; + + FLUSHER = Executors.newSingleThreadScheduledExecutor(r -> { + Thread t = new Thread(r, "RendererPerfTracer-Flusher"); //$NON-NLS-1$ + t.setDaemon(true); + return t; + }); + if (ENABLED && WRITER != null) { + FLUSHER.scheduleWithFixedDelay(RendererPerfTracer::flush, 500, 500, TimeUnit.MILLISECONDS); + Runtime.getRuntime().addShutdownHook( + new Thread(RendererPerfTracer::shutdown, "RendererPerfTracer-Shutdown")); //$NON-NLS-1$ + } } private RendererPerfTracer() { @@ -95,11 +117,10 @@ public static long begin() { */ public static void trace(String hotspotId, long startNano, String detail) { long durationNs = System.nanoTime() - startNano; - long wallMs = (System.nanoTime() - START_TIME) / 1_000_000L; - String line = wallMs + "," + hotspotId + "," + durationNs + "," //$NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$ + long elapsedMs = (System.nanoTime() - START_TIME) / 1_000_000L; + String line = elapsedMs + "," + hotspotId + "," + durationNs + "," //$NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$ + (detail != null ? detail : "") + "\n"; //$NON-NLS-1$ //$NON-NLS-2$ QUEUE.add(line); - scheduleFlush(); } /** @@ -109,34 +130,45 @@ public static void trace(String hotspotId, long startNano, String detail) { * @param detail short context string */ public static void count(String hotspotId, String detail) { - long wallMs = (System.nanoTime() - START_TIME) / 1_000_000L; - String line = wallMs + "," + hotspotId + ",0," //$NON-NLS-1$ //$NON-NLS-2$ + long elapsedMs = (System.nanoTime() - START_TIME) / 1_000_000L; + String line = elapsedMs + "," + hotspotId + ",0," //$NON-NLS-1$ //$NON-NLS-2$ + (detail != null ? detail : "") + "\n"; //$NON-NLS-1$ //$NON-NLS-2$ QUEUE.add(line); - scheduleFlush(); } - private static void scheduleFlush() { - if (FLUSH_SCHEDULED.compareAndSet(false, true)) { - Display display = Display.getDefault(); - if (display != null && !display.isDisposed()) { - display.timerExec(500, RendererPerfTracer::flush); - } else { - flush(); + private static void flush() { + synchronized (WRITER_LOCK) { + if (WRITER == null) { + return; + } + try { + String line; + while ((line = QUEUE.poll()) != null) { + WRITER.write(line); + } + WRITER.flush(); + } catch (IOException e) { + // Silently drop. Tracing must not break the workbench. } } } - private static void flush() { - FLUSH_SCHEDULED.set(false); - try (BufferedWriter writer = Files.newBufferedWriter(OUTPUT_FILE, - StandardOpenOption.CREATE, StandardOpenOption.APPEND)) { - String line; - while ((line = QUEUE.poll()) != null) { - writer.write(line); + private static void shutdown() { + FLUSHER.shutdown(); + try { + FLUSHER.awaitTermination(1, TimeUnit.SECONDS); + } catch (InterruptedException e) { + Thread.currentThread().interrupt(); + } + flush(); + synchronized (WRITER_LOCK) { + if (WRITER != null) { + try { + WRITER.close(); + } catch (IOException e) { + // ignore + } } - } catch (IOException e) { - // Silently drop — tracing must not break the workbench } } } From b85f91d58f457229c11051e8cd2783c199f601ef Mon Sep 17 00:00:00 2001 From: Lars Vogel Date: Mon, 20 Apr 2026 20:30:15 +0200 Subject: [PATCH 4/6] RendererPerfTracer: emit deterministic summary on shutdown Aggregate per-hotspot stats (count, total ns, max ns, p95 ns, calls/min) as events are recorded, and write a human-readable summary file next to the CSV on JVM shutdown. Path is .summary.txt, so the default is ~/renderer-perf-trace.csv.summary.txt. This removes the need to summarize the CSV with an external tool, which had produced at least one documented data artifact in session 2. --- .../renderers/swt/RendererPerfTracer.java | 102 +++++++++++++++++- 1 file changed, 100 insertions(+), 2 deletions(-) diff --git a/bundles/org.eclipse.e4.ui.workbench.renderers.swt/src/org/eclipse/e4/ui/workbench/renderers/swt/RendererPerfTracer.java b/bundles/org.eclipse.e4.ui.workbench.renderers.swt/src/org/eclipse/e4/ui/workbench/renderers/swt/RendererPerfTracer.java index 5be959d0b0b..1ccda477beb 100644 --- a/bundles/org.eclipse.e4.ui.workbench.renderers.swt/src/org/eclipse/e4/ui/workbench/renderers/swt/RendererPerfTracer.java +++ b/bundles/org.eclipse.e4.ui.workbench.renderers.swt/src/org/eclipse/e4/ui/workbench/renderers/swt/RendererPerfTracer.java @@ -15,16 +15,25 @@ import java.nio.file.Files; import java.nio.file.Path; import java.nio.file.StandardOpenOption; +import java.util.ArrayList; +import java.util.Collections; +import java.util.Comparator; +import java.util.List; +import java.util.Map; +import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.ConcurrentLinkedQueue; import java.util.concurrent.Executors; import java.util.concurrent.ScheduledExecutorService; import java.util.concurrent.TimeUnit; +import java.util.concurrent.atomic.AtomicLong; /** * Lightweight performance tracer for renderer hotspots. *

* Output file defaults to {@code $HOME/renderer-perf-trace.csv} and can be - * overridden with {@code -Declipse.renderer.perf.trace.file=}. + * overridden with {@code -Declipse.renderer.perf.trace.file=}. A + * companion summary file is written next to the CSV with suffix + * {@code .summary.txt} when the JVM shuts down. *

* The CSV format is: * {@code timestamp_ms,hotspot_id,duration_ns,detail} @@ -32,7 +41,8 @@ * Trace records are queued lock-free on the producer side and drained by a * single daemon flusher thread every 500 ms. The flusher holds one open * {@link BufferedWriter} for the lifetime of the JVM; a shutdown hook drains - * the queue and closes the writer so the last events are not lost. + * the queue, writes the summary, and closes the writer so the last events + * are not lost. */ public final class RendererPerfTracer { @@ -59,7 +69,9 @@ public final class RendererPerfTracer { public static final String W1_SASH_SYNC_LAYOUT = "W1_sash_syncLayout_win"; //$NON-NLS-1$ private static final ConcurrentLinkedQueue QUEUE = new ConcurrentLinkedQueue<>(); + private static final Map STATS = new ConcurrentHashMap<>(); private static final Path OUTPUT_FILE; + private static final Path SUMMARY_FILE; private static final long START_TIME = System.nanoTime(); private static final Object WRITER_LOCK = new Object(); private static final BufferedWriter WRITER; @@ -72,6 +84,7 @@ public final class RendererPerfTracer { } else { OUTPUT_FILE = Path.of(System.getProperty("user.home"), "renderer-perf-trace.csv"); //$NON-NLS-1$ //$NON-NLS-2$ } + SUMMARY_FILE = Path.of(OUTPUT_FILE.toString() + ".summary.txt"); //$NON-NLS-1$ BufferedWriter writer = null; if (ENABLED) { try { @@ -121,6 +134,7 @@ public static void trace(String hotspotId, long startNano, String detail) { String line = elapsedMs + "," + hotspotId + "," + durationNs + "," //$NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$ + (detail != null ? detail : "") + "\n"; //$NON-NLS-1$ //$NON-NLS-2$ QUEUE.add(line); + STATS.computeIfAbsent(hotspotId, k -> new HotspotStats()).recordTimed(durationNs); } /** @@ -134,6 +148,7 @@ public static void count(String hotspotId, String detail) { String line = elapsedMs + "," + hotspotId + ",0," //$NON-NLS-1$ //$NON-NLS-2$ + (detail != null ? detail : "") + "\n"; //$NON-NLS-1$ //$NON-NLS-2$ QUEUE.add(line); + STATS.computeIfAbsent(hotspotId, k -> new HotspotStats()).recordCountOnly(); } private static void flush() { @@ -161,6 +176,7 @@ private static void shutdown() { Thread.currentThread().interrupt(); } flush(); + writeSummary(); synchronized (WRITER_LOCK) { if (WRITER != null) { try { @@ -171,4 +187,86 @@ private static void shutdown() { } } } + + private static void writeSummary() { + long elapsedMs = (System.nanoTime() - START_TIME) / 1_000_000L; + double elapsedMin = elapsedMs / 60_000.0; + List> entries = new ArrayList<>(STATS.entrySet()); + entries.sort(Comparator.>comparingLong( + e -> e.getValue().totalNanos.get()).reversed()); + + StringBuilder sb = new StringBuilder(4096); + sb.append("Renderer Performance Trace Summary\n"); //$NON-NLS-1$ + sb.append("==================================\n\n"); //$NON-NLS-1$ + sb.append(String.format("Session duration: %.2f s%n", elapsedMs / 1000.0)); //$NON-NLS-1$ + long totalEvents = entries.stream().mapToLong(e -> e.getValue().count.get()).sum(); + sb.append(String.format("Total events: %d%n%n", totalEvents)); //$NON-NLS-1$ + + sb.append(String.format("%-36s %8s %10s %10s %10s %10s%n", //$NON-NLS-1$ + "Hotspot", "Count", "Calls/min", "Total ms", "Max ms", "P95 ms")); //$NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$ //$NON-NLS-4$ //$NON-NLS-5$ //$NON-NLS-6$ + sb.append(String.format("%-36s %8s %10s %10s %10s %10s%n", //$NON-NLS-1$ + "-------", "-----", "---------", "--------", "------", "------")); //$NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$ //$NON-NLS-4$ //$NON-NLS-5$ //$NON-NLS-6$ + for (Map.Entry e : entries) { + HotspotStats s = e.getValue(); + long count = s.count.get(); + double callsPerMin = elapsedMin > 0 ? count / elapsedMin : 0; + if (s.isCountOnly()) { + sb.append(String.format("%-36s %8d %10.1f %10s %10s %10s%n", //$NON-NLS-1$ + e.getKey(), count, callsPerMin, "-", "-", "-")); //$NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$ + } else { + double totalMs = s.totalNanos.get() / 1_000_000.0; + double maxMs = s.maxNanos.get() / 1_000_000.0; + double p95Ms = s.percentileNanos(95) / 1_000_000.0; + sb.append(String.format("%-36s %8d %10.1f %10.2f %10.2f %10.2f%n", //$NON-NLS-1$ + e.getKey(), count, callsPerMin, totalMs, maxMs, p95Ms)); + } + } + try { + Files.writeString(SUMMARY_FILE, sb.toString(), + StandardOpenOption.CREATE, StandardOpenOption.TRUNCATE_EXISTING); + } catch (IOException e) { + System.err.println("RendererPerfTracer: failed to write " + SUMMARY_FILE + ": " + e); //$NON-NLS-1$ //$NON-NLS-2$ + } + } + + private static final class HotspotStats { + final AtomicLong count = new AtomicLong(); + final AtomicLong totalNanos = new AtomicLong(); + final AtomicLong maxNanos = new AtomicLong(); + final List durationsNanos = Collections.synchronizedList(new ArrayList<>()); + + void recordTimed(long nanos) { + count.incrementAndGet(); + totalNanos.addAndGet(nanos); + maxNanos.accumulateAndGet(nanos, Math::max); + durationsNanos.add(nanos); + } + + void recordCountOnly() { + count.incrementAndGet(); + } + + boolean isCountOnly() { + return durationsNanos.isEmpty() && count.get() > 0; + } + + long percentileNanos(int percentile) { + List snapshot; + synchronized (durationsNanos) { + if (durationsNanos.isEmpty()) { + return 0; + } + snapshot = new ArrayList<>(durationsNanos); + } + Collections.sort(snapshot); + int idx = (int) Math.ceil(percentile / 100.0 * snapshot.size()) - 1; + if (idx < 0) { + idx = 0; + } + if (idx >= snapshot.size()) { + idx = snapshot.size() - 1; + } + return snapshot.get(idx); + } + } } From ed6b0e4e1db1ab42365c27fcf0dc7fd215bcdc7a Mon Sep 17 00:00:00 2001 From: Lars Vogel Date: Mon, 20 Apr 2026 20:32:29 +0200 Subject: [PATCH 5/6] docs: rewrite performance.md after tracer improvements - Add a Methodology section (environment, workload, cold start vs steady state) so future sessions are reproducible. - Reference hotspots by method name instead of line numbers; line numbers rot. - Remove the speculative H07 root-cause recommendation: the two slowest recorded H07 calls have ctxCreated=0, so per-item context allocation is not the dominant cost on the tail path. Mark root cause as needing attribution. - Reword the H10 status: the candidate fix lives on a separate branch and has not been measured here. Drop the 'pending validation' phrase. - Move the menu-batching workaround (W2 / bug 467000) out of the platform-specific section into a cross-platform batching-gaps section, since it is not platform-specific. - Add H04 as a row in the ranking table (count-only; absent from the earlier sessions' summaries) so it appears in the next session. - Flag the H09 inconsistency as likely a summarization artifact and note that the next session should use the tracer's built-in summary output instead of an external summary step. - Strip em/en dashes throughout. --- docs/performance.md | 292 ++++++++++++++++++++++++++------------------ 1 file changed, 171 insertions(+), 121 deletions(-) diff --git a/docs/performance.md b/docs/performance.md index 2e82cc4e7fc..ca195e696f5 100644 --- a/docs/performance.md +++ b/docs/performance.md @@ -1,135 +1,180 @@ # Renderer Performance Improvements This document catalogs performance opportunities discovered in the -`org.eclipse.e4.ui.workbench.renderers.swt` bundle, ranked by **measured impact** -from real IDE sessions. +`org.eclipse.e4.ui.workbench.renderers.swt` bundle, ranked by measured impact +from IDE sessions recorded with the `RendererPerfTracer` instrumentation on the +branch `wip-adding-performance-trace-code`. + +## Methodology + +Trace data is collected by the always-on `RendererPerfTracer` utility compiled +into the bundle on this branch. Each instrumented hotspot writes a CSV row +(`timestamp_ms, hotspot_id, duration_ns, detail`) to +`$HOME/renderer-perf-trace.csv`. On JVM shutdown the tracer also writes a +companion `renderer-perf-trace.csv.summary.txt` file with count, total ms, +max ms, p95 ms, and calls/min per hotspot, aggregated deterministically at the +source (no external script, no LLM summarization step). + +All numbers in this document should be produced from that summary file or +directly from the CSV. + +### Environment + +Fill these in when publishing new trace results so sessions are comparable: + +- Eclipse build (commit sha of `eclipse.platform.ui` branch) +- JDK version and vendor +- Operating system and version +- Display resolution and scale factor +- Workload description (what was opened, switched, resized, saved) +- Whether the first N seconds after startup are included or excluded + +### Cold start vs steady state + +The first few seconds after startup dominate several hotspots (H10, H13, +initial H12) and should be reported separately from steady-state interaction +so that the ranking does not conflate one-off startup cost with recurring +overhead. A simple convention: split the trace at the first user-initiated +event, or at a fixed boundary such as 10 s. ## Trace Sessions -| Property | Session 1 (baseline) | Session 2 (2026-04-16) | -|----------|---------------------:|------------------------:| -| Duration | ~120 s | 92.23 s | -| Trace events | 1,286 | 1,808 | -| Total hotspot time | ~2,170 ms | 2,824 ms | -| H10 fix applied? | No | No | +| Property | Session 1 (baseline) | Session 2 (2026-04-16) | +|-----------------------|---------------------:|------------------------:| +| Duration | ~120 s | 92.23 s | +| Trace events | 1,286 | 1,808 | +| Total hotspot time | ~2,170 ms | 2,824 ms | +| Cold start separated? | No | No | -Session 2 was recorded on the same machine with a similar workload. The H10 -`setRedraw` batching fix (commit `dcb99d75` on branch -`wip-fix-showTab-setRedraw-batching`) was **not yet merged** into the trace -branch, so H10 numbers represent the unfixed baseline. +Session 1 and Session 2 pre-date the new summary pipeline. Both were +summarized by an external language model, and at least one anomaly +(an inflated H09 total relative to per-call averages) is visible in the raw +numbers. Future sessions should use the tracer's own `summary.txt` output. ## Measurement Summary -Sorted by Session 2 total time descending. Session 1 values in parentheses for -comparison. - -| Rank | Hotspot | Count | Total ms | Max ms | P95 ms | Verdict | -|------|---------|------:|---------:|---------:|-------:|---------| -| 1 | H10 — `showTab` without `setRedraw` batching | 8 (9) | **1,817** (1,566) | 1,209 (977) | 901 | **Fix landed** (pending validation) | -| 2 | H07 — Per-item context creation in menu show | 403 (227) | **954** (568) | 373 (262) | 1.2 | **Fix next** | -| 3 | H03 — Unbatched `updateWidget`/`requestLayout` | 155 (115) | **25** (16) | 6.3 (3.1) | 0.76 | Low per call; new 6 ms spike | -| 4 | H05 — `ToolItemUpdater` linear scan | 309 (238) | **22** (17) | 3.3 (3.4) | 0.09 | Low — 12 items | -| 5 | H09 — `findElements` on window activate/deactivate | 9 (54) | **1.9**\* (1.1) | 0.15 (0.14) | 0.04 | Negligible | -| 6 | H01 — `findElements` for active CSS class | 19 (30) | **1.5** (2.4) | 0.23 (0.24) | 0.19 | Negligible | -| 7 | H02 — `findElements` for placeholders | 22+2 (21) | **1.5+0.3** (1.9) | 0.24 (0.23) | 0.09 | Negligible | -| 8 | H06 — Unbatched `scheduleManagerUpdate` | 753 (537) | 0 (count only) | — | — | **Fix** (753 fires in 92 s = 490/min) | -| 9 | H14 — Uncoalesced `RunAndTrack` callbacks | 39 (51) | 0 (count only) | — | — | Low volume | -| 10 | H13 — Full model scan at startup | 1 (1) | 0.15 (0.2) | 0.15 (0.17) | — | One-time | -| 11 | H12 — `synchCTFState` recursive walk | 2 (2) | 0.11 (0.1) | 0.10 (0.1) | — | Negligible | -| 12 | H11 — Limbo reparenting | 1 (1) | 0.09 (0.2) | 0.09 (0.2) | — | Single event | - -\* H09 total appears high relative to per-call averages in the raw trace; -likely a Gemini summarization artifact. Individual calls remain sub-ms. - -**Key finding (unchanged):** H10 and H07 still account for **98%** of all -measured renderer overhead (64% and 34% respectively). The `findElements` tree -scans (H01, H02, H09) remain fast in practice. +Sorted by Session 2 total time descending. Session 1 values in parentheses +for comparison. + +| Rank | Hotspot | Count | Total ms | Max ms | P95 ms | Verdict | +|------|---------------------------------------------|----------:|--------------:|--------------:|-------:|------------------------------------| +| 1 | H10 `showTab` without `setRedraw` batching | 8 (9) | **1,817** (1,566) | 1,209 (977) | 901 | Candidate fix on a separate branch, not yet measured here | +| 2 | H07 per-item context creation in menu show | 403 (227)| **954** (568) | 373 (262) | 1.2 | Top unfixed priority (see caveat) | +| 3 | H03 unbatched `updateWidget` / `requestLayout` | 155 (115)| **25** (16) | 6.3 (3.1) | 0.76 | Low per call; new 6 ms spike | +| 4 | H05 `ToolItemUpdater` linear scan | 309 (238)| **22** (17) | 3.3 (3.4) | 0.09 | Low; 12 items | +| 5 | H09 `findElements` on window activate/deactivate | 9 (54) | **1.9**\* (1.1) | 0.15 (0.14) | 0.04 | Negligible | +| 6 | H01 `findElements` for active CSS class | 19 (30) | **1.5** (2.4) | 0.23 (0.24) | 0.19 | Negligible | +| 7 | H02 `findElements` for placeholders | 22+2 (21) | **1.5+0.3** (1.9) | 0.24 (0.23) | 0.09 | Negligible | +| 8 | H06 unbatched `scheduleManagerUpdate` | 753 (537)| 0 (count only)| n/a | n/a | Fix (753 fires in 92 s, 490/min) | +| 9 | H14 uncoalesced `RunAndTrack` callbacks | 39 (51) | 0 (count only)| n/a | n/a | Low volume | +| 10 | H04 dirty-flag `ALL_SELECTOR` update | not reported in sessions 1/2 | count only | n/a | n/a | Add to next session's summary | +| 11 | H13 full model scan at startup | 1 (1) | 0.15 (0.2) | 0.15 (0.17) | n/a | One-time | +| 12 | H12 `synchCTFState` recursive walk | 2 (2) | 0.11 (0.1) | 0.10 (0.1) | n/a | Negligible | +| 13 | H11 limbo reparenting | 1 (1) | 0.09 (0.2) | 0.09 (0.2) | n/a | Single event | + +\* H09 total is inconsistent with per-call averages in the raw CSV. Suspected +language-model summarization artifact; individual calls remain sub-ms. This +is one of the reasons the next session should use the tracer's built-in +summary. + +**Key finding (unchanged):** H10 and H07 account for roughly 98 percent of all +measured renderer overhead in these two sessions (64 percent and 34 percent +respectively). The `findElements` tree scans (H01, H02, H09) remain fast in +practice at current model sizes. ### Notable ranking changes from Session 1 -- **H07 moved closer to H10.** Total time rose 68% (568 ms -> 954 ms) with - call count nearly doubling (227 -> 403). This is consistent with heavier - menu interaction in Session 2. H07 is now clearly the **dominant unfixed - hotspot** once the H10 `setRedraw` fix is validated. -- **H03 showed a new 6 ms spike** (vs 3.1 ms max previously). Still a low - total (25 ms) but worth monitoring — could indicate layout thrashing under - specific tab arrangements. -- **H06 frequency increased** from 537 calls in ~120 s (268/min) to 753 calls - in 92 s (490/min). The zero-duration measurement confirms these are - trigger-only probes, but the rising cadence reinforces the case for enabling - batching. -- **H09 count dropped sharply** (54 -> 9) because Session 2 involved fewer - window activate/deactivate cycles. Time remains negligible. - -### High-frequency hotspots (> 100 calls/min) - -| Hotspot | Calls/min (Session 2) | Calls/min (Session 1) | -|---------|----------------------:|----------------------:| -| H06 `scheduleManagerUpdate` | 490 | 268 | -| H07 menu `contextPerItem` | 262 | 113 | -| H05 `toolItemUpdater` | 201 | 119 | -| H03 `updateWidget` | 101 | 58 | - -H06 is zero-duration (trigger probe) but its 490 calls/min means 490 +- H07 moved closer to H10. Total time rose 68 percent (568 ms to 954 ms) with + call count nearly doubling (227 to 403), consistent with heavier menu + interaction in Session 2. H07 is the dominant unfixed hotspot once the H10 + candidate fix is validated. +- H03 showed a new 6 ms spike (up from 3.1 ms max). Still a low total (25 ms) + but worth monitoring; could indicate layout cascading when multiple toolbar + items update in quick succession. +- H06 frequency rose from 537 calls in ~120 s (268/min) to 753 calls in 92 s + (490/min). The zero-duration measurement confirms these are trigger-only + probes, but the rising cadence reinforces the case for enabling batching. +- H09 count dropped sharply (54 to 9) because Session 2 involved fewer window + activate and deactivate cycles. Time remains negligible. + +### High-frequency hotspots (more than 100 calls per minute) + +| Hotspot | Calls/min (Session 2) | Calls/min (Session 1) | +|-------------------------------|----------------------:|----------------------:| +| H06 `scheduleManagerUpdate` | 490 | 268 | +| H07 menu `contextPerItem` | 262 | 113 | +| H05 `toolItemUpdater` | 201 | 119 | +| H03 `updateWidget` | 101 | 58 | + +H06 is zero-duration (trigger probe) but its 490 calls per minute means 490 synchronous `mgr.update(false)` walks per minute when batching is off. --- ## Fix Status -### DONE: `showTab` `setRedraw` Batching (H10) - -**Branch:** `wip-fix-showTab-setRedraw-batching` (commit `dcb99d75`) +### H10: `showTab` `setRedraw` batching -The `showTab` body is now wrapped in `setRedraw(false)` / `setRedraw(true)` on -the target `CTabFolder`. This was the #1 hotspot in both sessions -(1,566 ms / 1,817 ms total, worst single call 977 ms / 1,209 ms). +**Candidate fix:** `wip-fix-showTab-setRedraw-batching` (commit `dcb99d75`) -**Validation needed:** Re-run the trace with the fix merged to confirm H10 -drops below the 16 ms frame budget. Expected outcome: H10 total should drop -by 80-95%, making H07 the new #1. +The candidate wraps the `showTab` body in `setRedraw(false)` and +`setRedraw(true)` on the target `CTabFolder`. H10 was the number one hotspot +in both sessions (1,566 ms and 1,817 ms total, worst single call 977 ms and +1,209 ms). The fix is not part of this trace branch and has not been measured +here. A third trace session with the fix merged is required to confirm the +expected 80 to 95 percent reduction. --- -## Priority 1 (next fix): Per-Item Eclipse Context Creation During Menu Show (H07) +## Priority 1: per-item Eclipse context creation during menu show (H07) -**954 ms total — 34% of all hotspot time (up from 26%)** +**954 ms total; 34 percent of all hotspot time (up from 26 percent).** -**File:** `MenuManagerRendererFilter.java` (lines ~190-230) +**Location:** `MenuManagerRendererFilter.updateVisibility(...)` (private +helper invoked from the public `updateElementVisibility` entry point). -`updateElementVisibility()` fired 403 times in 92 seconds. Inside the loop -over `menuModel.getChildren()`, a new `IEclipseContext` is allocated and -disposed for every `MHandledMenuItem` / `MDirectMenuItem`. The worst single -call in Session 2 took **373 ms** (children=18, ctxCreated=0), up from 262 ms. -Menus with 30-48 children dominate the tail latency. +`updateVisibility` fired 403 times in 92 seconds. The method iterates over +`menuModel.getChildren()` and for each `MHandledMenuItem` or `MDirectMenuItem` +performs work that includes creating and disposing an `IEclipseContext`. **Top 5 slowest H07 events (Session 2):** -| Timestamp (ms) | Duration (ms) | Detail | -|----------------:|--------------:|--------| -| 2,803 | 373 | children=18, ctxCreated=0 | -| 2,800 | 367 | children=18, ctxCreated=0 | -| 19,057 | 63 | children=30, ctxCreated=2 | -| 19,049 | 49 | children=48, ctxCreated=0 | -| 19,043 | 24 | children=12, ctxCreated=0 | - -**Recommendation:** Create one scratch `IEclipseContext` before the loop and -reset/reuse it per item, avoiding per-item allocation and disposal overhead. +| Elapsed (ms) | Duration (ms) | Detail | +|-------------:|--------------:|-------------------------------| +| 2,803 | 373 | children=18, ctxCreated=0 | +| 2,800 | 367 | children=18, ctxCreated=0 | +| 19,057 | 63 | children=30, ctxCreated=2 | +| 19,049 | 49 | children=48, ctxCreated=0 | +| 19,043 | 24 | children=12, ctxCreated=0 | + +**Caveat on the previously recommended fix.** An earlier draft of this +document proposed a shared scratch `IEclipseContext` reused across iterations, +based on the assumption that per-item context allocation dominated. The two +worst recorded calls (373 ms and 367 ms) both show `ctxCreated=0`, meaning +no contexts were created on that path. Allocation is therefore **not** the +dominant cost for the tail, and the scratch-context refactor is not +guaranteed to help. The actual hot code on the `ctxCreated=0` path is the +enablement evaluation loop around `ContributionsAnalyzer.populateModelInterfaces`, +the expression evaluation via `ExpressionContext`, and the handler service +lookups for each child. Root cause needs attribution (e.g., JFR or async +profiler) before committing to a fix shape. --- -## Priority 2: Unbatched `scheduleManagerUpdate` (H06) +## Priority 2: unbatched `scheduleManagerUpdate` (H06) -**753 calls in 92 seconds (490/min) — not time-measured but very high frequency** +**753 calls in 92 s (490/min); count-only, not time-measured.** -**File:** `MenuManagerRenderer.java` (lines ~1144-1179) +**Location:** `MenuManagerRenderer.scheduleManagerUpdate(IContributionManager)`. -The batching mechanism (bug 467000 workaround) is disabled by default. Each -call performs an immediate `mgr.update(false)`, walking and reconciling the -widget tree synchronously. Frequency increased 83% between sessions. +The batching mechanism introduced for bug 467000 is disabled by default via +the system property `eclipse.workaround.bug467000`. Without it, each call +performs an immediate `mgr.update(false)`, walking and reconciling the widget +tree synchronously. Frequency rose 83 percent between sessions. -**Recommendation:** Enable batching unconditionally and remove the -system-property guard. +**Recommendation:** enable batching unconditionally and remove the system +property guard. This is both a local fix (H06 in this doc) and a workaround +for an unresolved SWT gap (see "Cross-platform batching gaps" below). --- @@ -137,45 +182,50 @@ system-property guard. ### `updateWidget` / `requestLayout` (H03) -25 ms total across 155 calls. Average remains low (~162 us) but max jumped -from 3.1 ms to **6.3 ms**. The new spike warrants investigation — likely a +25 ms total across 155 calls. Average remains low (about 162 us) but max +jumped from 3.1 ms to 6.3 ms. The new spike warrants investigation; likely a layout cascade when multiple toolbar items update in quick succession. -### `ToolItemUpdater` Linear Scan (H05) +### `ToolItemUpdater` linear scan (H05) -22 ms total across 309 calls. Still only 12 toolbar items registered. Numbers -track linearly with session activity. Switch to `LinkedHashSet` as a -preventive measure. +22 ms total across 309 calls. Still only 12 toolbar items registered. +Numbers track linearly with session activity. Switching the backing +collection to `LinkedHashSet` is a preventive measure, not a current +bottleneck. -### `findElements` Tree Scans (H01, H02, H09) +### `findElements` tree scans (H01, H02, H09) -All three combined: **5.2 ms** across 52 calls (down from 5.4 ms / 103 calls). -Remain negligible at current model sizes. +All three combined: 5.2 ms across 52 calls (down from 5.4 ms across 103 +calls). Remain negligible at current model sizes. --- -## Platform-Specific SWT Workarounds +## Cross-platform batching gaps -### W-1. Synchronous `layout(true, true)` on Windows in SashRenderer +### Menu manager batching guarded by bug-467000 system property (H06 / W2) -**File:** `SashRenderer.java` (lines ~77-90) -**SWT Bug:** [558392](https://bugs.eclipse.org/bugs/show_bug.cgi?id=558392) +**Location:** `MenuManagerRenderer.scheduleManagerUpdate(IContributionManager)`. +**Eclipse Bug:** [467000](https://bugs.eclipse.org/bugs/show_bug.cgi?id=467000) -On Windows, `forceLayout()` uses `sashForm.layout(true, true)` — a synchronous, -recursive full layout — instead of the asynchronous `requestLayout()` used on -other platforms. Not triggered in either Linux trace session, but expected to -be the **dominant hotspot on Windows** based on code analysis. +SWT `Menu` does not support deferred or coalesced updates, forcing each +`MenuManager.update(false)` to immediately reconcile the widget tree (753 +times in Session 2, up from 537). -**What SWT should fix:** `Composite.requestLayout()` must correctly schedule a -deferred layout on Windows. +**What SWT should fix:** provide a built-in batching mechanism for menu +updates. -### W-2. Menu Manager Batching Guarded by Bug-467000 System Property +## Platform-Specific SWT Workarounds -**File:** `MenuManagerRenderer.java` (lines ~1144-1179) -**Eclipse Bug:** [467000](https://bugs.eclipse.org/bugs/show_bug.cgi?id=467000) +### W-1. Synchronous `layout(true, true)` on Windows in `SashRenderer` + +**Location:** `SashRenderer.forceLayout(...)`. +**SWT Bug:** [558392](https://bugs.eclipse.org/bugs/show_bug.cgi?id=558392) -SWT `Menu` does not support deferred/coalesced updates, forcing each -`MenuManager.update(false)` to immediately reconcile the widget tree (753 times -in Session 2, up from 537). +On Windows, `forceLayout()` uses `sashForm.layout(true, true)`, a +synchronous, recursive full layout, instead of the asynchronous +`requestLayout()` used on other platforms. Not triggered in either Linux +trace session, but expected to be the dominant hotspot on Windows based on +code analysis. -**What SWT should fix:** Provide a built-in batching mechanism for menu updates. +**What SWT should fix:** `Composite.requestLayout()` must correctly schedule +a deferred layout on Windows. From 1e73ce06c49d646b26746836b3ee1ec4aead4f3f Mon Sep 17 00:00:00 2001 From: Lars Vogel Date: Mon, 20 Apr 2026 23:35:01 +0200 Subject: [PATCH 6/6] Split H10 showTab probe into lazy-create vs reparent paths The setRedraw batching candidate (wip-fix-showTab-setRedraw-batching) was tried in an aggregator build and measured on Linux/GTK. It did not reduce H10 cost; it reattributed paint work into the method and inflated the tail. Measurement showed the tail in every session is one or two very expensive calls on the lazy-create path where renderer.createGui(element) runs for the first time. The reparent path stays in the single-digit ms range regardless of batching. The two paths have different causes and different ranges, so reporting them as one hotspot hides the signal. Split H10_showTab_noBatch into H10a_showTab_lazyCreate (first-show, widget was null) and H10b_showTab_reparent (widget existed, being reparented or reselected). Future sessions can now see per-path P95 and max. Performance doc updated with the measurement table from the abandoned setRedraw fix and the rationale for the split. --- .../renderers/swt/RendererPerfTracer.java | 3 +- .../renderers/swt/StackRenderer.java | 7 ++- docs/performance.md | 55 ++++++++++++++----- 3 files changed, 48 insertions(+), 17 deletions(-) diff --git a/bundles/org.eclipse.e4.ui.workbench.renderers.swt/src/org/eclipse/e4/ui/workbench/renderers/swt/RendererPerfTracer.java b/bundles/org.eclipse.e4.ui.workbench.renderers.swt/src/org/eclipse/e4/ui/workbench/renderers/swt/RendererPerfTracer.java index 1ccda477beb..8abd9ea3b45 100644 --- a/bundles/org.eclipse.e4.ui.workbench.renderers.swt/src/org/eclipse/e4/ui/workbench/renderers/swt/RendererPerfTracer.java +++ b/bundles/org.eclipse.e4.ui.workbench.renderers.swt/src/org/eclipse/e4/ui/workbench/renderers/swt/RendererPerfTracer.java @@ -61,7 +61,8 @@ public final class RendererPerfTracer { public static final String H06_MENU_SCHEDULE_UPDATE = "H06_menu_scheduleUpdate"; //$NON-NLS-1$ public static final String H07_MENU_CONTEXT_PER_ITEM = "H07_menu_contextPerItem"; //$NON-NLS-1$ public static final String H09_WBW_FIND_STACKS = "H09_wbw_findStacks"; //$NON-NLS-1$ - public static final String H10_SHOW_TAB_NO_BATCH = "H10_showTab_noBatch"; //$NON-NLS-1$ + public static final String H10A_SHOWTAB_LAZY = "H10a_showTab_lazyCreate"; //$NON-NLS-1$ + public static final String H10B_SHOWTAB_REPARENT = "H10b_showTab_reparent"; //$NON-NLS-1$ public static final String H11_LIMBO_REPARENT = "H11_limbo_reparent"; //$NON-NLS-1$ public static final String H12_AREA_SYNCH_CTF = "H12_area_synchCTF"; //$NON-NLS-1$ public static final String H13_TOOLCTRL_STARTUP_SCAN = "H13_toolCtrl_startupScan"; //$NON-NLS-1$ diff --git a/bundles/org.eclipse.e4.ui.workbench.renderers.swt/src/org/eclipse/e4/ui/workbench/renderers/swt/StackRenderer.java b/bundles/org.eclipse.e4.ui.workbench.renderers.swt/src/org/eclipse/e4/ui/workbench/renderers/swt/StackRenderer.java index 5d888a057ed..2d865fdcae6 100644 --- a/bundles/org.eclipse.e4.ui.workbench.renderers.swt/src/org/eclipse/e4/ui/workbench/renderers/swt/StackRenderer.java +++ b/bundles/org.eclipse.e4.ui.workbench.renderers.swt/src/org/eclipse/e4/ui/workbench/renderers/swt/StackRenderer.java @@ -1501,10 +1501,11 @@ protected void showTab(MUIElement element) { tabItem = findItemForPart(element, element.getParent()); } Control ctrl = (Control) element.getWidget(); + boolean lazyCreate = (ctrl == null); if (ctrl != null && ctrl.getParent() != tabFolder) { ctrl.setParent(tabFolder); tabItem.setControl(ctrl); - } else if (element.getWidget() == null) { + } else if (lazyCreate) { Control tabCtrl = (Control) renderer.createGui(element); tabItem.setControl(tabCtrl); } @@ -1523,8 +1524,8 @@ protected void showTab(MUIElement element) { // Show the new state adjustTopRight(tabFolder); if (RendererPerfTracer.ENABLED) { - RendererPerfTracer.trace(RendererPerfTracer.H10_SHOW_TAB_NO_BATCH, _t0, - element.getClass().getSimpleName()); + String id = lazyCreate ? RendererPerfTracer.H10A_SHOWTAB_LAZY : RendererPerfTracer.H10B_SHOWTAB_REPARENT; + RendererPerfTracer.trace(id, _t0, element.getClass().getSimpleName()); } } diff --git a/docs/performance.md b/docs/performance.md index ca195e696f5..5d2aa313d6a 100644 --- a/docs/performance.md +++ b/docs/performance.md @@ -58,7 +58,7 @@ for comparison. | Rank | Hotspot | Count | Total ms | Max ms | P95 ms | Verdict | |------|---------------------------------------------|----------:|--------------:|--------------:|-------:|------------------------------------| -| 1 | H10 `showTab` without `setRedraw` batching | 8 (9) | **1,817** (1,566) | 1,209 (977) | 901 | Candidate fix on a separate branch, not yet measured here | +| 1 | H10 `showTab` (now split into H10a lazy / H10b reparent) | 8 (9) | **1,817** (1,566) | 1,209 (977) | 901 | Tail dominated by H10a cold first-show; `setRedraw` fix tried and reverted | | 2 | H07 per-item context creation in menu show | 403 (227)| **954** (568) | 373 (262) | 1.2 | Top unfixed priority (see caveat) | | 3 | H03 unbatched `updateWidget` / `requestLayout` | 155 (115)| **25** (16) | 6.3 (3.1) | 0.76 | Low per call; new 6 ms spike | | 4 | H05 `ToolItemUpdater` linear scan | 309 (238)| **22** (17) | 3.3 (3.4) | 0.09 | Low; 12 items | @@ -86,8 +86,8 @@ practice at current model sizes. - H07 moved closer to H10. Total time rose 68 percent (568 ms to 954 ms) with call count nearly doubling (227 to 403), consistent with heavier menu - interaction in Session 2. H07 is the dominant unfixed hotspot once the H10 - candidate fix is validated. + interaction in Session 2. With the H10 `setRedraw` fix ruled out, H07 is + now the top actionable hotspot. - H03 showed a new 6 ms spike (up from 3.1 ms max). Still a low total (25 ms) but worth monitoring; could indicate layout cascading when multiple toolbar items update in quick succession. @@ -113,16 +113,45 @@ synchronous `mgr.update(false)` walks per minute when batching is off. ## Fix Status -### H10: `showTab` `setRedraw` batching - -**Candidate fix:** `wip-fix-showTab-setRedraw-batching` (commit `dcb99d75`) - -The candidate wraps the `showTab` body in `setRedraw(false)` and -`setRedraw(true)` on the target `CTabFolder`. H10 was the number one hotspot -in both sessions (1,566 ms and 1,817 ms total, worst single call 977 ms and -1,209 ms). The fix is not part of this trace branch and has not been measured -here. A third trace session with the fix merged is required to confirm the -expected 80 to 95 percent reduction. +### H10: `showTab` `setRedraw` batching (abandoned) + +A candidate fix on `wip-fix-showTab-setRedraw-batching` wrapped the `showTab` +body in `setRedraw(false)` and `setRedraw(true)` on the target `CTabFolder`. +It was merged into an aggregator build and measured. Result: **the fix did +not help and was reverted**. + +| Metric | No fix | Always batch | Reparent-only batch | +|-------------------|-------:|-------------:|--------------------:| +| Avg ms per call | 24.6 | 34.0 | 54.2 | +| P95 ms | 124 | 99 | 137 | +| Max ms | 695 | 1,087 | 1,462 | + +Why it did not work: the tail in every recorded session (max 695 ms to +1,462 ms, P95 98 ms to 137 ms) is dominated by one or two very expensive +calls per session. Those calls happen on the **lazy-create path** inside +`showTab`, where `renderer.createGui(element)` constructs a new editor or +view and its initial layout. `setRedraw(true)` on exit forces that paint to +happen synchronously inside the method, which inflates worst-case time +rather than reducing it. On the reparent path (switching to an already +rendered tab) the common case is already in the single-digit ms range, so +there is no meaningful time for batching to eliminate. + +### Revised instrumentation (this branch) + +Because the lazy-create path and the reparent path have different cost +profiles and different causes, H10 has been split into two probes: + +- `H10a_showTab_lazyCreate`: `showTab` calls where `element.getWidget()` + is null and `renderer.createGui` is invoked. Expect a long right-tail + governed by editor/view construction cost. +- `H10b_showTab_reparent`: `showTab` calls where the widget already + exists and is being reparented or reselected. Expect low single-digit ms + and stable P95. + +Next session should report the two probes separately. `H10a` is not +actionable via `setRedraw` batching. `H10b` is the one to watch if we want +evidence that batching might help steady-state switching (though the +measurements so far suggest it does not). ---