From db483a6f60f9c002fe25c347454a6f646d993f29 Mon Sep 17 00:00:00 2001 From: Joe Finney Date: Sat, 7 Mar 2026 11:53:04 -0600 Subject: [PATCH 01/13] Add UiAutomation language support and options Added a new LanguageKind.UiAutomation to HistoryInfo and implemented the UiAutomationLang class for "UI Automation Text" language support. Introduced UiAutomationOptions record to configure UI Automation traversal and filtering behavior. --- Text-Grab/Models/HistoryInfo.cs | 1 + Text-Grab/Models/UiAutomationLang.cs | 25 +++++++++++++++++++++++++ Text-Grab/Models/UiAutomationOptions.cs | 9 +++++++++ 3 files changed, 35 insertions(+) create mode 100644 Text-Grab/Models/UiAutomationLang.cs create mode 100644 Text-Grab/Models/UiAutomationOptions.cs diff --git a/Text-Grab/Models/HistoryInfo.cs b/Text-Grab/Models/HistoryInfo.cs index b335e397..5c422553 100644 --- a/Text-Grab/Models/HistoryInfo.cs +++ b/Text-Grab/Models/HistoryInfo.cs @@ -58,6 +58,7 @@ public ILanguage OcrLanguage LanguageKind.Global => new GlobalLang(new Language(LanguageTag)), LanguageKind.Tesseract => new TessLang(LanguageTag), LanguageKind.WindowsAi => new WindowsAiLang(), + LanguageKind.UiAutomation => new UiAutomationLang(), _ => new GlobalLang(LanguageUtilities.GetCurrentInputLanguage().AsLanguage() ?? new Language("en-US")), }; } diff --git a/Text-Grab/Models/UiAutomationLang.cs b/Text-Grab/Models/UiAutomationLang.cs new file mode 100644 index 00000000..fb993a60 --- /dev/null +++ b/Text-Grab/Models/UiAutomationLang.cs @@ -0,0 +1,25 @@ +using Text_Grab.Interfaces; +using Windows.Globalization; + +namespace Text_Grab.Models; + +public class UiAutomationLang : ILanguage +{ + public const string Tag = "UIAutomation"; + + public string AbbreviatedName => "UIA"; + + public string DisplayName => "UI Automation Text"; + + public string CurrentInputMethodLanguageTag => string.Empty; + + public string CultureDisplayName => "UI Automation Text"; + + public string LanguageTag => Tag; + + public LanguageLayoutDirection LayoutDirection => LanguageLayoutDirection.Ltr; + + public string NativeName => "UI Automation Text"; + + public string Script => string.Empty; +} diff --git a/Text-Grab/Models/UiAutomationOptions.cs b/Text-Grab/Models/UiAutomationOptions.cs new file mode 100644 index 00000000..fdf5a722 --- /dev/null +++ b/Text-Grab/Models/UiAutomationOptions.cs @@ -0,0 +1,9 @@ +using System.Windows; + +namespace Text_Grab.Models; + +public record UiAutomationOptions( + UiAutomationTraversalMode TraversalMode, + bool IncludeOffscreen, + bool PreferFocusedElement, + Rect? FilterBounds = null); From 2a4d5d5a2a11de23556d732e5a4131f9e2dad6cb Mon Sep 17 00:00:00 2001 From: Joe Finney Date: Sat, 7 Mar 2026 11:54:39 -0600 Subject: [PATCH 02/13] Add UI Automation as OCR language option and utilities Added support for UI Automation as a selectable OCR language. Integrated UiAutomationLang into language selection, caching, and kind/type checks. Introduced UIAutomationUtilities for extracting text from screen regions, points, and windows using Windows UI Automation APIs. Updated OcrUtilities to route requests to UIAutomationUtilities when appropriate, with fallback logic to traditional OCR. Added CaptureLanguageUtilities for language enumeration and compatibility checks. Improved settings import/export robustness to handle property-based settings. These changes enable text extraction from UI elements as an alternative to image-based OCR. --- Text-Grab/Services/LanguageService.cs | 13 + .../Utilities/CaptureLanguageUtilities.cs | 82 ++ Text-Grab/Utilities/OcrUtilities.cs | 69 +- .../SettingsImportExportUtilities.cs | 43 +- Text-Grab/Utilities/UIAutomationUtilities.cs | 731 ++++++++++++++++++ 5 files changed, 921 insertions(+), 17 deletions(-) create mode 100644 Text-Grab/Utilities/CaptureLanguageUtilities.cs create mode 100644 Text-Grab/Utilities/UIAutomationUtilities.cs diff --git a/Text-Grab/Services/LanguageService.cs b/Text-Grab/Services/LanguageService.cs index 3b4cb88f..b1d8e1ce 100644 --- a/Text-Grab/Services/LanguageService.cs +++ b/Text-Grab/Services/LanguageService.cs @@ -30,6 +30,8 @@ public class LanguageService // Static instance of WindowsAiLang to avoid allocations private static readonly WindowsAiLang _windowsAiLangInstance = new(); private static readonly string _windowsAiLangTag = _windowsAiLangInstance.LanguageTag; + private static readonly UiAutomationLang _uiAutomationLangInstance = new(); + private static readonly string _uiAutomationLangTag = _uiAutomationLangInstance.LanguageTag; #endregion Fields @@ -71,6 +73,9 @@ public IList GetAllLanguages() List languages = []; + if (AppUtilities.TextGrabSettings.UiAutomationEnabled) + languages.Add(_uiAutomationLangInstance); + if (WindowsAiUtilities.CanDeviceUseWinAI()) { // Add Windows AI languages - use static instance @@ -97,6 +102,7 @@ public static string GetLanguageTag(object language) { Language lang => lang.LanguageTag, WindowsAiLang => _windowsAiLangTag, + UiAutomationLang => _uiAutomationLangTag, TessLang tessLang => tessLang.RawTag, GlobalLang gLang => gLang.LanguageTag, _ => throw new ArgumentException("Unsupported language type", nameof(language)), @@ -112,6 +118,7 @@ public static LanguageKind GetLanguageKind(object language) { Language => LanguageKind.Global, WindowsAiLang => LanguageKind.WindowsAi, + UiAutomationLang => LanguageKind.UiAutomation, TessLang => LanguageKind.Tesseract, _ => LanguageKind.Global, // Default fallback }; @@ -145,6 +152,12 @@ public ILanguage GetOCRLanguage() return _cachedOcrLanguage; } + if (lastUsedLang == _uiAutomationLangTag && AppUtilities.TextGrabSettings.UiAutomationEnabled) + { + _cachedOcrLanguage = _uiAutomationLangInstance; + return _cachedOcrLanguage; + } + try { selectedLanguage = new GlobalLang(lastUsedLang); diff --git a/Text-Grab/Utilities/CaptureLanguageUtilities.cs b/Text-Grab/Utilities/CaptureLanguageUtilities.cs new file mode 100644 index 00000000..f1762764 --- /dev/null +++ b/Text-Grab/Utilities/CaptureLanguageUtilities.cs @@ -0,0 +1,82 @@ +using System; +using System.Collections.Generic; +using System.Linq; +using System.Threading.Tasks; +using Text_Grab.Interfaces; +using Text_Grab.Models; +using Windows.Media.Ocr; + +namespace Text_Grab.Utilities; + +internal static class CaptureLanguageUtilities +{ + public static async Task> GetCaptureLanguagesAsync(bool includeTesseract) + { + List languages = []; + + if (AppUtilities.TextGrabSettings.UiAutomationEnabled) + languages.Add(new UiAutomationLang()); + + if (WindowsAiUtilities.CanDeviceUseWinAI()) + languages.Add(new WindowsAiLang()); + + if (includeTesseract + && AppUtilities.TextGrabSettings.UseTesseract + && TesseractHelper.CanLocateTesseractExe()) + { + languages.AddRange(await TesseractHelper.TesseractLanguages()); + } + + foreach (Windows.Globalization.Language language in OcrEngine.AvailableRecognizerLanguages) + languages.Add(new GlobalLang(language)); + + return languages; + } + + public static bool MatchesPersistedLanguage(ILanguage language, string persistedLanguage) + { + if (string.IsNullOrWhiteSpace(persistedLanguage)) + return false; + + return string.Equals(language.LanguageTag, persistedLanguage, StringComparison.CurrentCultureIgnoreCase) + || string.Equals(language.CultureDisplayName, persistedLanguage, StringComparison.CurrentCultureIgnoreCase) + || string.Equals(language.DisplayName, persistedLanguage, StringComparison.CurrentCultureIgnoreCase); + } + + public static int FindPreferredLanguageIndex(IReadOnlyList languages, string persistedLanguage, ILanguage fallbackLanguage) + { + for (int i = 0; i < languages.Count; i++) + { + if (MatchesPersistedLanguage(languages[i], persistedLanguage)) + return i; + } + + for (int i = 0; i < languages.Count; i++) + { + if (string.Equals(languages[i].LanguageTag, fallbackLanguage.LanguageTag, StringComparison.CurrentCultureIgnoreCase)) + return i; + } + + return languages.Count > 0 ? 0 : -1; + } + + public static void PersistSelectedLanguage(ILanguage language) + { + AppUtilities.TextGrabSettings.LastUsedLang = language.LanguageTag; + AppUtilities.TextGrabSettings.Save(); + LanguageUtilities.InvalidateOcrLanguageCache(); + } + + public static ILanguage GetUiAutomationFallbackLanguage() + { + ILanguage currentInputLanguage = LanguageUtilities.GetCurrentInputLanguage(); + + return currentInputLanguage as GlobalLang ?? new GlobalLang(currentInputLanguage.LanguageTag); + } + + public static bool SupportsTableOutput(ILanguage language) + => language is not TessLang && language is not UiAutomationLang; + + public static bool IsStaticImageCompatible(ILanguage language) + => language is not UiAutomationLang; +} diff --git a/Text-Grab/Utilities/OcrUtilities.cs b/Text-Grab/Utilities/OcrUtilities.cs index 74bb8859..4a1439d4 100644 --- a/Text-Grab/Utilities/OcrUtilities.cs +++ b/Text-Grab/Utilities/OcrUtilities.cs @@ -32,6 +32,16 @@ public static partial class OcrUtilities // Cache the SpaceJoiningWordRegex to avoid creating it on every method call private static readonly Regex _cachedSpaceJoiningWordRegex = SpaceJoiningWordRegex(); + private static bool IsUiAutomationLanguage(ILanguage language) => language is UiAutomationLang; + + private static ILanguage GetCompatibleOcrLanguage(ILanguage language) + { + if (language is UiAutomationLang) + return CaptureLanguageUtilities.GetUiAutomationFallbackLanguage(); + + return language; + } + public static void GetTextFromOcrLine(this IOcrLine ocrLine, bool isSpaceJoiningOCRLang, StringBuilder text) { // (when OCR language is zh or ja) @@ -79,6 +89,15 @@ public static void GetTextFromOcrLine(this IOcrLine ocrLine, bool isSpaceJoining public static async Task GetTextFromAbsoluteRectAsync(Rect rect, ILanguage language) { + if (IsUiAutomationLanguage(language)) + { + string uiAutomationText = await UIAutomationUtilities.GetTextFromRegionAsync(rect); + if (!string.IsNullOrWhiteSpace(uiAutomationText) || !DefaultSettings.UiAutomationFallbackToOcr) + return uiAutomationText; + + language = GetCompatibleOcrLanguage(language); + } + Rectangle selectedRegion = rect.AsRectangle(); Bitmap bmp = ImageMethods.GetRegionOfScreenAsBitmap(selectedRegion); @@ -93,13 +112,12 @@ public static async Task GetRegionsTextAsync(Window passedWindow, Rectan int thisCorrectedTop = (int)absPosPoint.Y + selectedRegion.Top; Rectangle correctedRegion = new(thisCorrectedLeft, thisCorrectedTop, selectedRegion.Width, selectedRegion.Height); - Bitmap bmp = ImageMethods.GetRegionOfScreenAsBitmap(correctedRegion); - - return GetStringFromOcrOutputs(await GetTextFromImageAsync(bmp, language)); + return await GetTextFromAbsoluteRectAsync(correctedRegion.AsRect(), language); } public static async Task GetRegionsTextAsTableAsync(Window passedWindow, Rectangle selectedRegion, ILanguage objLang) { + ILanguage compatibleLanguage = GetCompatibleOcrLanguage(objLang); Point absPosPoint = passedWindow.GetAbsolutePosition(); int thisCorrectedLeft = (int)absPosPoint.X + selectedRegion.Left; @@ -107,10 +125,10 @@ public static async Task GetRegionsTextAsTableAsync(Window passedWindow, Rectangle correctedRegion = new(thisCorrectedLeft, thisCorrectedTop, selectedRegion.Width, selectedRegion.Height); Bitmap bmp = ImageMethods.GetRegionOfScreenAsBitmap(correctedRegion); - double scale = await GetIdealScaleFactorForOcrAsync(bmp, objLang); + double scale = await GetIdealScaleFactorForOcrAsync(bmp, compatibleLanguage); using Bitmap scaledBitmap = ImageMethods.ScaleBitmapUniform(bmp, scale); DpiScale dpiScale = VisualTreeHelper.GetDpi(passedWindow); - IOcrLinesWords ocrResult = await GetOcrResultFromImageAsync(scaledBitmap, objLang); + IOcrLinesWords ocrResult = await GetOcrResultFromImageAsync(scaledBitmap, compatibleLanguage); // New model-only flow List wordBorderInfos = ResultTable.ParseOcrResultIntoWordBorderInfos(ocrResult, dpiScale); @@ -127,12 +145,20 @@ public static async Task GetRegionsTextAsTableAsync(Window passedWindow, table.AnalyzeAsTable(wordBorderInfos, rectCanvasSize); StringBuilder sb = new(); - ResultTable.GetTextFromTabledWordBorders(sb, wordBorderInfos, objLang.IsSpaceJoining()); + ResultTable.GetTextFromTabledWordBorders(sb, wordBorderInfos, compatibleLanguage.IsSpaceJoining()); return sb.ToString(); } public static async Task GetTextFromBitmapAsync(Bitmap bitmap, ILanguage language) { + if (IsUiAutomationLanguage(language)) + { + if (!DefaultSettings.UiAutomationFallbackToOcr) + return string.Empty; + + language = GetCompatibleOcrLanguage(language); + } + return GetStringFromOcrOutputs(await GetTextFromImageAsync(bitmap, language)); } @@ -144,9 +170,10 @@ public static async Task GetTextFromBitmapSourceAsync(BitmapSource bitma public static async Task GetTextFromBitmapAsTableAsync(Bitmap bitmap, ILanguage language) { - double scale = await GetIdealScaleFactorForOcrAsync(bitmap, language); + ILanguage compatibleLanguage = GetCompatibleOcrLanguage(language); + double scale = await GetIdealScaleFactorForOcrAsync(bitmap, compatibleLanguage); using Bitmap scaledBitmap = ImageMethods.ScaleBitmapUniform(bitmap, scale); - IOcrLinesWords ocrResult = await GetOcrResultFromImageAsync(scaledBitmap, language); + IOcrLinesWords ocrResult = await GetOcrResultFromImageAsync(scaledBitmap, compatibleLanguage); DpiScale bitmapDpiScale = new(1.0, 1.0); List wordBorderInfos = ResultTable.ParseOcrResultIntoWordBorderInfos(ocrResult, bitmapDpiScale); @@ -163,7 +190,7 @@ public static async Task GetTextFromBitmapAsTableAsync(Bitmap bitmap, IL table.AnalyzeAsTable(wordBorderInfos, rectCanvasSize); StringBuilder textBuilder = new(); - ResultTable.GetTextFromTabledWordBorders(textBuilder, wordBorderInfos, language.IsSpaceJoining()); + ResultTable.GetTextFromTabledWordBorders(textBuilder, wordBorderInfos, compatibleLanguage.IsSpaceJoining()); return textBuilder.ToString(); } @@ -175,6 +202,7 @@ public static async Task GetTextFromBitmapSourceAsTableAsync(BitmapSourc public static async Task<(IOcrLinesWords?, double)> GetOcrResultFromRegionAsync(Rectangle region, ILanguage language) { + language = GetCompatibleOcrLanguage(language); Bitmap bmp = ImageMethods.GetRegionOfScreenAsBitmap(region); if (language is WindowsAiLang) @@ -196,6 +224,8 @@ public static async Task GetTextFromBitmapSourceAsTableAsync(BitmapSourc public static async Task GetOcrResultFromImageAsync(SoftwareBitmap scaledBitmap, ILanguage language) { + language = GetCompatibleOcrLanguage(language); + if (language is WindowsAiLang winAiLang) { return new WinAiOcrLinesWords(await WindowsAiUtilities.GetOcrResultAsync(scaledBitmap)); @@ -213,6 +243,7 @@ public static async Task GetOcrResultFromImageAsync(SoftwareBitm public static async Task GetOcrResultFromImageAsync(Bitmap scaledBitmap, ILanguage language) { + language = GetCompatibleOcrLanguage(language); await using MemoryStream memory = new(); using WrappingStream wrapper = new(memory); @@ -331,6 +362,14 @@ public static async Task> GetTextFromImageAsync(Bitmap bitmap, I { List outputs = []; + if (IsUiAutomationLanguage(language)) + { + if (!DefaultSettings.UiAutomationFallbackToOcr) + return outputs; + + language = GetCompatibleOcrLanguage(language); + } + if (language is TessLang tessLang) { OcrOutput tesseractOutput = await TesseractHelper.GetOcrOutputFromBitmap(bitmap, tessLang); @@ -417,6 +456,17 @@ public static async Task OcrAbsoluteFilePathAsync(string absolutePath, I public static async Task GetClickedWordAsync(Window passedWindow, Point clickedPoint, ILanguage OcrLang) { + if (IsUiAutomationLanguage(OcrLang)) + { + Point absoluteWindowPosition = passedWindow.GetAbsolutePosition(); + Point absoluteClickedPoint = new(absoluteWindowPosition.X + clickedPoint.X, absoluteWindowPosition.Y + clickedPoint.Y); + string uiAutomationText = await UIAutomationUtilities.GetTextFromPointAsync(absoluteClickedPoint); + if (!string.IsNullOrWhiteSpace(uiAutomationText) || !DefaultSettings.UiAutomationFallbackToOcr) + return uiAutomationText.Trim(); + + OcrLang = GetCompatibleOcrLanguage(OcrLang); + } + using Bitmap bmp = ImageMethods.GetWindowsBoundsBitmap(passedWindow); string ocrText = await GetTextFromClickedWordAsync(clickedPoint, bmp, OcrLang); return ocrText.Trim(); @@ -441,6 +491,7 @@ private static string GetTextFromClickedWord(Point singlePoint, IOcrLinesWords o public static async Task GetIdealScaleFactorForOcrAsync(Bitmap bitmap, ILanguage selectedLanguage) { + selectedLanguage = GetCompatibleOcrLanguage(selectedLanguage); IOcrLinesWords ocrResult = await OcrUtilities.GetOcrResultFromImageAsync(bitmap, selectedLanguage); return GetIdealScaleFactorForOcrResult(ocrResult, bitmap.Height, bitmap.Width); } diff --git a/Text-Grab/Utilities/SettingsImportExportUtilities.cs b/Text-Grab/Utilities/SettingsImportExportUtilities.cs index b49743ef..e87b2c5d 100644 --- a/Text-Grab/Utilities/SettingsImportExportUtilities.cs +++ b/Text-Grab/Utilities/SettingsImportExportUtilities.cs @@ -4,6 +4,7 @@ using System.IO; using System.IO.Compression; using System.Linq; +using System.Reflection; using System.Text.Json; using System.Threading.Tasks; using Text_Grab.Properties; @@ -116,6 +117,12 @@ private static async Task ExportSettingsToJsonAsync(string filePath) settingsDict[propertyName] = value; } + if (settingsDict.Count == 0) + { + foreach (PropertyInfo propertyInfo in GetSerializableSettingProperties(settings.GetType())) + settingsDict[propertyInfo.Name] = propertyInfo.GetValue(settings); + } + JsonSerializerOptions options = new() { WriteIndented = true, @@ -141,6 +148,8 @@ private static async Task ImportSettingsFromJsonAsync(string filePath) return; Settings settings = AppUtilities.TextGrabSettings; + Dictionary reflectedSettings = GetSerializableSettingProperties(settings.GetType()) + .ToDictionary(property => property.Name, property => property, StringComparer.Ordinal); // Apply each setting foreach (var kvp in settingsDict) @@ -151,14 +160,23 @@ private static async Task ImportSettingsFromJsonAsync(string filePath) try { SettingsProperty? property = settings.Properties[propertyName]; - if (property is null) - continue; - - object? value = ConvertJsonElementToSettingValue(kvp.Value, property); - if (value is not null) + if (property is not null) { - settings[propertyName] = value; + object? value = ConvertJsonElementToSettingValue(kvp.Value, property.PropertyType); + if (value is not null) + { + settings[propertyName] = value; + } + + continue; } + + if (!reflectedSettings.TryGetValue(propertyName, out PropertyInfo? propertyInfo)) + continue; + + object? reflectedValue = ConvertJsonElementToSettingValue(kvp.Value, propertyInfo.PropertyType); + if (reflectedValue is not null) + propertyInfo.SetValue(settings, reflectedValue); } catch (Exception ex) { @@ -252,10 +270,19 @@ private static string ConvertToPascalCase(string camelCase) return char.ToUpper(camelCase[0]) + camelCase.Substring(1); } - private static object? ConvertJsonElementToSettingValue(JsonElement jsonElement, SettingsProperty property) + private static IEnumerable GetSerializableSettingProperties(Type settingsType) { - Type propertyType = property.PropertyType; + return settingsType + .GetProperties(BindingFlags.Instance | BindingFlags.Public) + .Where(property => + property.CanRead + && property.CanWrite + && property.GetIndexParameters().Length == 0 + && property.GetCustomAttribute() is not null); + } + private static object? ConvertJsonElementToSettingValue(JsonElement jsonElement, Type propertyType) + { try { if (propertyType == typeof(string)) diff --git a/Text-Grab/Utilities/UIAutomationUtilities.cs b/Text-Grab/Utilities/UIAutomationUtilities.cs new file mode 100644 index 00000000..ec9b30b5 --- /dev/null +++ b/Text-Grab/Utilities/UIAutomationUtilities.cs @@ -0,0 +1,731 @@ +using System; +using System.Collections.Generic; +using System.Linq; +using System.Threading.Tasks; +using System.Windows; +using System.Windows.Automation; +using Text_Grab.Models; +using TextPatternRange = System.Windows.Automation.Text.TextPatternRange; +using TextUnit = System.Windows.Automation.Text.TextUnit; + +namespace Text_Grab.Utilities; + +public static class UIAutomationUtilities +{ + private const int FastMaxDepth = 2; + private const int BalancedMaxDepth = 6; + private const int ThoroughMaxDepth = 12; + private const int MaxPointAncestorDepth = 5; + + private enum AutomationTextSource + { + None = 0, + NameFallback = 1, + TextPattern = 2, + ValuePattern = 3, + PointTextPattern = 4, + } + + private readonly record struct TextExtractionCandidate(string Text, AutomationTextSource Source, int Depth); + + public static Task GetTextFromPointAsync(Point screenPoint) + { + UiAutomationOptions options = GetOptionsFromSettings(); + return Task.Run(() => GetTextFromPoint(screenPoint, options)); + } + + public static Task GetTextFromRegionAsync(Rect screenRect) + { + UiAutomationOptions options = GetOptionsFromSettings(screenRect); + return Task.Run(() => GetTextFromRegion(screenRect, options)); + } + + public static Task GetTextFromWindowAsync(IntPtr windowHandle, Rect? filterBounds = null) + { + UiAutomationOptions options = GetOptionsFromSettings(filterBounds); + return Task.Run(() => GetTextFromWindow(windowHandle, options)); + } + + internal static UiAutomationOptions GetOptionsFromSettings(Rect? filterBounds = null) + { + UiAutomationTraversalMode traversalMode = UiAutomationTraversalMode.Balanced; + Enum.TryParse(AppUtilities.TextGrabSettings.UiAutomationTraversalMode, true, out traversalMode); + + return new UiAutomationOptions( + traversalMode, + AppUtilities.TextGrabSettings.UiAutomationIncludeOffscreen, + AppUtilities.TextGrabSettings.UiAutomationPreferFocusedElement, + filterBounds); + } + + internal static WindowSelectionCandidate? FindTargetWindowCandidate(Rect selectionRect, IEnumerable candidates) + { + Point centerPoint = new(selectionRect.X + (selectionRect.Width / 2), selectionRect.Y + (selectionRect.Height / 2)); + WindowSelectionCandidate? centerCandidate = WindowSelectionUtilities.FindWindowAtPoint(candidates, centerPoint); + if (centerCandidate is not null) + return centerCandidate; + + return candidates + .Select(candidate => new + { + Candidate = candidate, + Area = GetIntersectionArea(selectionRect, candidate.Bounds) + }) + .Where(entry => entry.Area > 0) + .OrderByDescending(entry => entry.Area) + .Select(entry => entry.Candidate) + .FirstOrDefault(); + } + + internal static string NormalizeText(string? text) + { + if (string.IsNullOrWhiteSpace(text)) + return string.Empty; + + return string.Join( + Environment.NewLine, + text.Split([Environment.NewLine, "\r", "\n"], StringSplitOptions.TrimEntries | StringSplitOptions.RemoveEmptyEntries) + .Select(line => string.Join(' ', line.Split([' ', '\t'], StringSplitOptions.RemoveEmptyEntries)))); + } + + internal static bool TryAddUniqueText(string? text, ISet seenText, List output) + { + string normalizedText = NormalizeText(text); + if (string.IsNullOrWhiteSpace(normalizedText)) + return false; + + if (!seenText.Add(normalizedText)) + return false; + + output.Add(normalizedText); + return true; + } + + internal static bool ShouldUseNameFallback(ControlType controlType) + { + return controlType != ControlType.Window + && controlType != ControlType.Pane + && controlType != ControlType.Group + && controlType != ControlType.Custom + && controlType != ControlType.Table + && controlType != ControlType.List + && controlType != ControlType.Tree + && controlType != ControlType.Menu + && controlType != ControlType.MenuBar + && controlType != ControlType.ToolBar + && controlType != ControlType.TitleBar + && controlType != ControlType.StatusBar + && controlType != ControlType.ScrollBar + && controlType != ControlType.Separator + && controlType != ControlType.ProgressBar + && controlType != ControlType.Slider + && controlType != ControlType.Spinner + && controlType != ControlType.Calendar + && controlType != ControlType.DataGrid + && controlType != ControlType.Header + && controlType != ControlType.Tab; + } + + internal static IReadOnlyList GetSamplePoints(Rect selectionRect) + { + if (selectionRect == Rect.Empty || selectionRect.Width <= 0 || selectionRect.Height <= 0) + return []; + + double[] xRatios = selectionRect.Width < 80 ? [0.5] : [0.2, 0.5, 0.8]; + double[] yRatios = selectionRect.Height < 80 ? [0.5] : [0.2, 0.5, 0.8]; + + HashSet seen = new(StringComparer.Ordinal); + List samplePoints = []; + + foreach (double yRatio in yRatios) + { + foreach (double xRatio in xRatios) + { + Point point = new( + selectionRect.Left + (selectionRect.Width * xRatio), + selectionRect.Top + (selectionRect.Height * yRatio)); + + string key = $"{Math.Round(point.X, 2)}|{Math.Round(point.Y, 2)}"; + if (seen.Add(key)) + samplePoints.Add(point); + } + } + + return samplePoints; + } + + internal static IReadOnlyList GetPointProbePoints(Point screenPoint) + { + const double probeOffset = 2.0; + + return + [ + screenPoint, + new Point(screenPoint.X - probeOffset, screenPoint.Y), + new Point(screenPoint.X + probeOffset, screenPoint.Y), + new Point(screenPoint.X, screenPoint.Y - probeOffset), + new Point(screenPoint.X, screenPoint.Y + probeOffset), + ]; + } + + private static string GetTextFromPoint(Point screenPoint, UiAutomationOptions options) + { + TextExtractionCandidate? bestCandidate = null; + + foreach (Point probePoint in GetPointProbePoints(screenPoint)) + { + AutomationElement? element = GetElementAtPoint(probePoint); + if (element is null) + continue; + + TextExtractionCandidate? probeCandidate = GetBestPointTextCandidate(element, probePoint, options, TextUnit.Line); + if (probeCandidate is not null && IsBetterCandidate(probeCandidate.Value, bestCandidate)) + { + bestCandidate = probeCandidate; + + if (probePoint == screenPoint + && probeCandidate.Value.Source == AutomationTextSource.PointTextPattern + && probeCandidate.Value.Depth == 0) + { + break; + } + } + } + + return bestCandidate?.Text ?? string.Empty; + } + + private static string GetTextFromRegion(Rect screenRect, UiAutomationOptions options) + { + List candidates = WindowSelectionUtilities.GetCapturableWindows(); + WindowSelectionCandidate? targetWindow = FindTargetWindowCandidate(screenRect, candidates); + if (targetWindow is null) + return string.Empty; + + if (targetWindow.Handle == IntPtr.Zero) + return string.Empty; + + try + { + AutomationElement root = AutomationElement.FromHandle(targetWindow.Handle); + HashSet seenText = new(StringComparer.CurrentCulture); + List extractedText = []; + + AppendTextFromSamplePoints(root, screenRect, options, seenText, extractedText); + AppendTextFromElementTree(root, options, seenText, extractedText); + + return string.Join(Environment.NewLine, extractedText); + } + catch (ElementNotAvailableException) + { + return string.Empty; + } + catch (ArgumentException) + { + return string.Empty; + } + } + + private static string GetTextFromWindow(IntPtr windowHandle, UiAutomationOptions options) + { + if (windowHandle == IntPtr.Zero) + return string.Empty; + + try + { + AutomationElement root = AutomationElement.FromHandle(windowHandle); + return ExtractTextFromElementTree(root, options); + } + catch (ElementNotAvailableException) + { + return string.Empty; + } + catch (ArgumentException) + { + return string.Empty; + } + } + + private static string ExtractTextFromElementTree(AutomationElement root, UiAutomationOptions options) + { + HashSet seenText = new(StringComparer.CurrentCulture); + List extractedText = []; + AppendTextFromElementTree(root, options, seenText, extractedText); + return string.Join(Environment.NewLine, extractedText); + } + + private static void AppendTextFromElementTree( + AutomationElement root, + UiAutomationOptions options, + ISet seenText, + List extractedText) + { + if (options.PreferFocusedElement) + TryExtractFocusedElementText(root, options, seenText, extractedText); + + foreach (AutomationElement element in EnumerateElements(root, options)) + { + if (ShouldSkipElementText(element, options)) + continue; + + TryAddUniqueText(ExtractTextFromElement(element, options.FilterBounds), seenText, extractedText); + } + } + + private static void AppendTextFromSamplePoints( + AutomationElement root, + Rect selectionRect, + UiAutomationOptions options, + ISet seenText, + List extractedText) + { + foreach (Point samplePoint in GetSamplePoints(selectionRect)) + { + AutomationElement? element = GetElementAtPoint(samplePoint); + if (element is null || !IsDescendantOrSelf(root, element)) + continue; + + TryAddUniqueText( + GetBestPointText(element, samplePoint, options, TextUnit.Line), + seenText, + extractedText); + } + } + + private static string GetBestPointText( + AutomationElement element, + Point screenPoint, + UiAutomationOptions options, + TextUnit pointTextUnit) + { + return GetBestPointTextCandidate(element, screenPoint, options, pointTextUnit)?.Text ?? string.Empty; + } + + private static TextExtractionCandidate? GetBestPointTextCandidate( + AutomationElement element, + Point screenPoint, + UiAutomationOptions options, + TextUnit pointTextUnit) + { + TextExtractionCandidate? bestCandidate = null; + AutomationElement? current = element; + + for (int depth = 0; current is not null && depth <= MaxPointAncestorDepth; depth++) + { + if (!ShouldSkipElementText(current, options) + && TryCreatePointTextCandidate(current, screenPoint, depth, pointTextUnit, out TextExtractionCandidate candidate) + && IsBetterCandidate(candidate, bestCandidate)) + { + bestCandidate = candidate; + + if (candidate.Source == AutomationTextSource.PointTextPattern && candidate.Depth == 0) + break; + } + + current = GetParentElement(current); + } + + return bestCandidate; + } + + private static bool TryCreatePointTextCandidate( + AutomationElement element, + Point screenPoint, + int depth, + TextUnit pointTextUnit, + out TextExtractionCandidate candidate) + { + candidate = default; + + if (TryExtractTextPatternTextAtPoint(element, screenPoint, pointTextUnit, out string pointText)) + { + candidate = new(NormalizeText(pointText), AutomationTextSource.PointTextPattern, depth); + return true; + } + + if (TryExtractValuePatternText(element, out string valuePatternText)) + { + candidate = new(NormalizeText(valuePatternText), AutomationTextSource.ValuePattern, depth); + return true; + } + + if (TryExtractTextPatternText(element, null, out string textPatternText)) + { + candidate = new(NormalizeText(textPatternText), AutomationTextSource.TextPattern, depth); + return true; + } + + if (TryExtractNameText(element, out string nameText)) + { + candidate = new(NormalizeText(nameText), AutomationTextSource.NameFallback, depth); + return true; + } + + return false; + } + + private static bool IsBetterCandidate(TextExtractionCandidate candidate, TextExtractionCandidate? currentBest) + { + if (currentBest is null) + return true; + + if (candidate.Source != currentBest.Value.Source) + return candidate.Source > currentBest.Value.Source; + + return candidate.Depth < currentBest.Value.Depth; + } + + private static void TryExtractFocusedElementText( + AutomationElement root, + UiAutomationOptions options, + ISet seenText, + List extractedText) + { + try + { + AutomationElement? focusedElement = AutomationElement.FocusedElement; + if (focusedElement is null || !IsDescendantOrSelf(root, focusedElement)) + return; + + if (!ShouldSkipElementText(focusedElement, options)) + TryAddUniqueText(ExtractTextFromElement(focusedElement, options.FilterBounds), seenText, extractedText); + } + catch (ElementNotAvailableException) + { + } + catch (InvalidOperationException) + { + } + } + + private static IEnumerable EnumerateElements(AutomationElement root, UiAutomationOptions options) + { + Queue<(AutomationElement Element, int Depth)> queue = new(); + queue.Enqueue((root, 0)); + TreeWalker walker = options.TraversalMode == UiAutomationTraversalMode.Thorough + ? TreeWalker.RawViewWalker + : TreeWalker.ControlViewWalker; + int maxDepth = GetMaxDepth(options.TraversalMode); + + while (queue.Count > 0) + { + (AutomationElement element, int depth) = queue.Dequeue(); + yield return element; + + if (depth >= maxDepth) + continue; + + AutomationElement? child = null; + try + { + child = walker.GetFirstChild(element); + } + catch (ElementNotAvailableException) + { + } + + while (child is not null) + { + queue.Enqueue((child, depth + 1)); + + try + { + child = walker.GetNextSibling(child); + } + catch (ElementNotAvailableException) + { + child = null; + } + } + } + } + + private static bool ShouldSkipElementText(AutomationElement element, UiAutomationOptions options) + { + try + { + AutomationElement.AutomationElementInformation current = element.Current; + + if (!options.IncludeOffscreen && current.IsOffscreen) + return true; + + Rect bounds = current.BoundingRectangle; + if (bounds == Rect.Empty || bounds.Width < 1 || bounds.Height < 1) + return true; + + if (!current.IsContentElement && !IsTextBearingControlType(current.ControlType)) + return true; + + if (options.FilterBounds is Rect filterBounds && !bounds.IntersectsWith(filterBounds)) + return true; + + return false; + } + catch (ElementNotAvailableException) + { + return true; + } + catch (InvalidOperationException) + { + return true; + } + } + + private static string ExtractTextFromElement(AutomationElement element, Rect? filterBounds = null) + { + if (TryExtractTextPatternText(element, filterBounds, out string textPatternText)) + return textPatternText; + + if (TryExtractValuePatternText(element, out string valuePatternText)) + return valuePatternText; + + if (TryExtractNameText(element, out string nameText)) + return nameText; + + return string.Empty; + } + + private static bool TryExtractTextPatternTextAtPoint( + AutomationElement element, + Point screenPoint, + TextUnit preferredUnit, + out string text) + { + text = string.Empty; + + try + { + if (element.TryGetCurrentPattern(TextPattern.Pattern, out object pattern) + && pattern is TextPattern textPattern) + { + TextPatternRange range = textPattern.RangeFromPoint(screenPoint); + range.ExpandToEnclosingUnit(preferredUnit); + text = range.GetText(-1); + + if (!string.IsNullOrWhiteSpace(text)) + return true; + + if (preferredUnit != TextUnit.Line) + { + range = textPattern.RangeFromPoint(screenPoint); + range.ExpandToEnclosingUnit(TextUnit.Line); + text = range.GetText(-1); + return !string.IsNullOrWhiteSpace(text); + } + } + } + catch (ArgumentException) + { + } + catch (ElementNotAvailableException) + { + } + catch (InvalidOperationException) + { + } + + return false; + } + + private static bool TryExtractTextPatternText(AutomationElement element, Rect? filterBounds, out string text) + { + text = string.Empty; + + try + { + if (element.TryGetCurrentPattern(TextPattern.Pattern, out object pattern) + && pattern is TextPattern textPattern) + { + if (filterBounds is Rect bounds) + return TryExtractVisibleTextPatternText(textPattern, bounds, out text); + + text = textPattern.DocumentRange.GetText(-1); + return !string.IsNullOrWhiteSpace(text); + } + } + catch (ElementNotAvailableException) + { + } + catch (InvalidOperationException) + { + } + + return false; + } + + private static bool TryExtractVisibleTextPatternText(TextPattern textPattern, Rect filterBounds, out string text) + { + text = string.Empty; + + try + { + TextPatternRange[] visibleRanges = textPattern.GetVisibleRanges(); + if (visibleRanges.Length == 0) + return false; + + HashSet seenText = new(StringComparer.CurrentCulture); + List extractedText = []; + + foreach (TextPatternRange range in visibleRanges) + { + if (!RangeIntersectsBounds(range, filterBounds)) + continue; + + TryAddUniqueText(range.GetText(-1), seenText, extractedText); + } + + text = string.Join(Environment.NewLine, extractedText); + return !string.IsNullOrWhiteSpace(text); + } + catch (ElementNotAvailableException) + { + } + catch (InvalidOperationException) + { + } + + return false; + } + + private static bool RangeIntersectsBounds(TextPatternRange range, Rect filterBounds) + { + try + { + return range.GetBoundingRectangles().Any(textBounds => textBounds != Rect.Empty && textBounds.IntersectsWith(filterBounds)); + } + catch (InvalidOperationException) + { + return false; + } + } + + private static bool TryExtractValuePatternText(AutomationElement element, out string text) + { + text = string.Empty; + + try + { + if (element.TryGetCurrentPattern(ValuePattern.Pattern, out object pattern) + && pattern is ValuePattern valuePattern) + { + text = valuePattern.Current.Value; + return !string.IsNullOrWhiteSpace(text); + } + } + catch (ElementNotAvailableException) + { + } + catch (InvalidOperationException) + { + } + + return false; + } + + private static bool TryExtractNameText(AutomationElement element, out string text) + { + text = string.Empty; + + try + { + AutomationElement.AutomationElementInformation current = element.Current; + if (!ShouldUseNameFallback(current.ControlType)) + return false; + + text = current.Name; + return !string.IsNullOrWhiteSpace(text); + } + catch (ElementNotAvailableException) + { + } + catch (InvalidOperationException) + { + } + + return false; + } + + private static bool IsTextBearingControlType(ControlType controlType) + { + return controlType == ControlType.Text + || controlType == ControlType.Edit + || controlType == ControlType.Document + || controlType == ControlType.Button + || controlType == ControlType.CheckBox + || controlType == ControlType.RadioButton + || controlType == ControlType.Hyperlink + || controlType == ControlType.ListItem + || controlType == ControlType.DataItem + || controlType == ControlType.TreeItem + || controlType == ControlType.MenuItem + || controlType == ControlType.TabItem + || controlType == ControlType.HeaderItem + || controlType == ControlType.ComboBox + || controlType == ControlType.SplitButton; + } + + private static AutomationElement? GetParentElement(AutomationElement element) + { + try + { + return TreeWalker.RawViewWalker.GetParent(element); + } + catch (ElementNotAvailableException) + { + return null; + } + catch (InvalidOperationException) + { + return null; + } + } + + private static AutomationElement? GetElementAtPoint(Point screenPoint) + { + try + { + return AutomationElement.FromPoint(screenPoint); + } + catch (ElementNotAvailableException) + { + return null; + } + catch (ArgumentException) + { + return null; + } + } + + private static bool IsDescendantOrSelf(AutomationElement root, AutomationElement candidate) + { + AutomationElement? current = candidate; + while (current is not null) + { + if (current.Equals(root)) + return true; + + current = GetParentElement(current); + } + + return false; + } + + private static int GetMaxDepth(UiAutomationTraversalMode traversalMode) + { + return traversalMode switch + { + UiAutomationTraversalMode.Fast => FastMaxDepth, + UiAutomationTraversalMode.Thorough => ThoroughMaxDepth, + _ => BalancedMaxDepth, + }; + } + + private static double GetIntersectionArea(Rect first, Rect second) + { + Rect intersection = Rect.Intersect(first, second); + if (intersection == Rect.Empty) + return 0; + + return intersection.Width * intersection.Height; + } +} From 582a76b0a344e2f0db251d68e470481aef3a5a29 Mon Sep 17 00:00:00 2001 From: Joe Finney Date: Sat, 7 Mar 2026 11:55:40 -0600 Subject: [PATCH 03/13] Add UI Automation text extraction settings and UI Added user-configurable settings and UI controls for UI Automation text extraction, including toggles for enabling UI Automation, fallback to OCR, traversal mode, offscreen element inclusion, and focus preference. Updated language picker to use OCR language by default and persist selection. Improved language selection experience and settings persistence. --- Text-Grab/App.config | 18 ++++++ Text-Grab/Controls/LanguagePicker.xaml | 2 +- Text-Grab/Controls/LanguagePicker.xaml.cs | 5 +- Text-Grab/Pages/LanguageSettings.xaml | 64 ++++++++++++++++++++++ Text-Grab/Pages/LanguageSettings.xaml.cs | 67 +++++++++++++++++++++++ Text-Grab/Properties/Settings.Designer.cs | 60 ++++++++++++++++++++ Text-Grab/Properties/Settings.settings | 17 +++++- 7 files changed, 229 insertions(+), 4 deletions(-) diff --git a/Text-Grab/App.config b/Text-Grab/App.config index 4675d28e..230fa998 100644 --- a/Text-Grab/App.config +++ b/Text-Grab/App.config @@ -163,6 +163,9 @@ Default + + Region + True @@ -196,6 +199,21 @@ False + + True + + + True + + + Balanced + + + False + + + True + False diff --git a/Text-Grab/Controls/LanguagePicker.xaml b/Text-Grab/Controls/LanguagePicker.xaml index c7d89f89..bbbb54da 100644 --- a/Text-Grab/Controls/LanguagePicker.xaml +++ b/Text-Grab/Controls/LanguagePicker.xaml @@ -19,7 +19,7 @@ Mode=TwoWay}" SelectionChanged="MainComboBox_SelectionChanged"> - + diff --git a/Text-Grab/Controls/LanguagePicker.xaml.cs b/Text-Grab/Controls/LanguagePicker.xaml.cs index 75ca5ba8..0ee7c2e7 100644 --- a/Text-Grab/Controls/LanguagePicker.xaml.cs +++ b/Text-Grab/Controls/LanguagePicker.xaml.cs @@ -31,14 +31,14 @@ private void UserControl_Loaded(object sender, RoutedEventArgs e) { Languages.Clear(); - ILanguage currentInputGlobalLang = LanguageUtilities.GetCurrentInputLanguage(); + ILanguage currentSelectedLanguage = LanguageUtilities.GetOCRLanguage(); int selectedIndex = 0; int i = 0; foreach (ILanguage langFromUtil in LanguageUtilities.GetAllLanguages()) { Languages.Add(langFromUtil); - if (langFromUtil.LanguageTag == currentInputGlobalLang.LanguageTag) + if (langFromUtil.LanguageTag == currentSelectedLanguage.LanguageTag) selectedIndex = i; i++; } @@ -54,6 +54,7 @@ private void MainComboBox_SelectionChanged(object sender, SelectionChangedEventA if (MainComboBox.SelectedItem is ILanguage selectedILanguage) { SelectedLanguage = selectedILanguage; + CaptureLanguageUtilities.PersistSelectedLanguage(selectedILanguage); LanguageChanged?.Invoke(this, new RoutedEventArgs()); } } diff --git a/Text-Grab/Pages/LanguageSettings.xaml b/Text-Grab/Pages/LanguageSettings.xaml index 4add9857..0d240b19 100644 --- a/Text-Grab/Pages/LanguageSettings.xaml +++ b/Text-Grab/Pages/LanguageSettings.xaml @@ -57,6 +57,70 @@ Content="Learn more about Windows AI Foundry" NavigateUri="https://learn.microsoft.com/en-us/windows/ai/apis/" /> + + + When the UI Automation language is selected, Text Grab will try to read native accessibility text from live application controls before falling back to OCR. + + + + Show UI Automation as a language option + + + + + Fall back to OCR when UI Automation returns no text + + + + + Prefer the focused UI element before scanning the rest of the window + + + + + Include offscreen UI Automation elements + + + + + + + + (); + if (Enum.TryParse(DefaultSettings.UiAutomationTraversalMode, true, out UiAutomationTraversalMode traversalMode)) + UiAutomationTraversalModeComboBox.SelectedItem = traversalMode; + else + UiAutomationTraversalModeComboBox.SelectedItem = UiAutomationTraversalMode.Balanced; + } + private async void InstallButton_Click(object sender, RoutedEventArgs e) { if (string.IsNullOrEmpty(AllLanguagesComboBox.Text)) @@ -141,6 +161,53 @@ private void HyperlinkButton_Click(object sender, RoutedEventArgs e) } + private void UiAutomationEnabledToggle_Checked(object sender, RoutedEventArgs e) + { + if (loadingUiAutomationSettings) + return; + + DefaultSettings.UiAutomationEnabled = UiAutomationEnabledToggle.IsChecked is true; + DefaultSettings.Save(); + LanguageUtilities.InvalidateAllCaches(); + } + + private void UiAutomationFallbackToggle_Checked(object sender, RoutedEventArgs e) + { + if (loadingUiAutomationSettings) + return; + + DefaultSettings.UiAutomationFallbackToOcr = UiAutomationFallbackToggle.IsChecked is true; + DefaultSettings.Save(); + } + + private void UiAutomationPreferFocusedToggle_Checked(object sender, RoutedEventArgs e) + { + if (loadingUiAutomationSettings) + return; + + DefaultSettings.UiAutomationPreferFocusedElement = UiAutomationPreferFocusedToggle.IsChecked is true; + DefaultSettings.Save(); + } + + private void UiAutomationIncludeOffscreenToggle_Checked(object sender, RoutedEventArgs e) + { + if (loadingUiAutomationSettings) + return; + + DefaultSettings.UiAutomationIncludeOffscreen = UiAutomationIncludeOffscreenToggle.IsChecked is true; + DefaultSettings.Save(); + } + + private void UiAutomationTraversalModeComboBox_SelectionChanged(object sender, SelectionChangedEventArgs e) + { + if (loadingUiAutomationSettings + || UiAutomationTraversalModeComboBox.SelectedItem is not UiAutomationTraversalMode traversalMode) + return; + + DefaultSettings.UiAutomationTraversalMode = traversalMode.ToString(); + DefaultSettings.Save(); + } + public async Task CopyFileWithElevatedPermissions(string sourcePath, string destinationPath) { string arguments = $"/c copy \"{sourcePath}\" \"{destinationPath}\""; diff --git a/Text-Grab/Properties/Settings.Designer.cs b/Text-Grab/Properties/Settings.Designer.cs index 05bf5d8e..6d174672 100644 --- a/Text-Grab/Properties/Settings.Designer.cs +++ b/Text-Grab/Properties/Settings.Designer.cs @@ -791,6 +791,66 @@ public bool OverrideAiArchCheck { } } + [global::System.Configuration.UserScopedSettingAttribute()] + [global::System.Diagnostics.DebuggerNonUserCodeAttribute()] + [global::System.Configuration.DefaultSettingValueAttribute("True")] + public bool UiAutomationEnabled { + get { + return ((bool)(this["UiAutomationEnabled"])); + } + set { + this["UiAutomationEnabled"] = value; + } + } + + [global::System.Configuration.UserScopedSettingAttribute()] + [global::System.Diagnostics.DebuggerNonUserCodeAttribute()] + [global::System.Configuration.DefaultSettingValueAttribute("True")] + public bool UiAutomationFallbackToOcr { + get { + return ((bool)(this["UiAutomationFallbackToOcr"])); + } + set { + this["UiAutomationFallbackToOcr"] = value; + } + } + + [global::System.Configuration.UserScopedSettingAttribute()] + [global::System.Diagnostics.DebuggerNonUserCodeAttribute()] + [global::System.Configuration.DefaultSettingValueAttribute("Balanced")] + public string UiAutomationTraversalMode { + get { + return ((string)(this["UiAutomationTraversalMode"])); + } + set { + this["UiAutomationTraversalMode"] = value; + } + } + + [global::System.Configuration.UserScopedSettingAttribute()] + [global::System.Diagnostics.DebuggerNonUserCodeAttribute()] + [global::System.Configuration.DefaultSettingValueAttribute("False")] + public bool UiAutomationIncludeOffscreen { + get { + return ((bool)(this["UiAutomationIncludeOffscreen"])); + } + set { + this["UiAutomationIncludeOffscreen"] = value; + } + } + + [global::System.Configuration.UserScopedSettingAttribute()] + [global::System.Diagnostics.DebuggerNonUserCodeAttribute()] + [global::System.Configuration.DefaultSettingValueAttribute("True")] + public bool UiAutomationPreferFocusedElement { + get { + return ((bool)(this["UiAutomationPreferFocusedElement"])); + } + set { + this["UiAutomationPreferFocusedElement"] = value; + } + } + [global::System.Configuration.UserScopedSettingAttribute()] [global::System.Diagnostics.DebuggerNonUserCodeAttribute()] [global::System.Configuration.DefaultSettingValueAttribute("False")] diff --git a/Text-Grab/Properties/Settings.settings b/Text-Grab/Properties/Settings.settings index ba7b7aa3..ec6032ba 100644 --- a/Text-Grab/Properties/Settings.settings +++ b/Text-Grab/Properties/Settings.settings @@ -194,6 +194,21 @@ False + + True + + + True + + + Balanced + + + False + + + True + False @@ -219,4 +234,4 @@ False - + \ No newline at end of file From b06e33316491dd623f717f2b957d3f587c1c1ca8 Mon Sep 17 00:00:00 2001 From: Joe Finney Date: Sat, 7 Mar 2026 11:56:05 -0600 Subject: [PATCH 04/13] Add UI Automation OCR mode and refactor language handling - Introduce UI Automation as a new OCR language mode, including traversal options. - Centralize language loading and selection logic using CaptureLanguageUtilities. - Unify language dropdown population for all OCR modes (Tesseract, Windows AI, UI Automation). - Update UI to reflect table output support based on selected language. - Invalidate OCR language cache on language reset for accurate UI updates. - Track static vs. live image sources in GrabFrame; notify user if UI Automation is selected with a static image. - Update OCR logic to use UI Automation APIs when appropriate; skip image-based corrections for UI Automation. - Refactor and simplify code for better maintainability and clarity. --- Text-Grab/Enums.cs | 8 ++ Text-Grab/Views/EditTextWindow.xaml.cs | 98 ++++----------- .../Views/FullscreenGrab.SelectionStyles.cs | 13 +- Text-Grab/Views/FullscreenGrab.xaml.cs | 119 +++++------------- Text-Grab/Views/GrabFrame.xaml.cs | 81 ++++++------ 5 files changed, 117 insertions(+), 202 deletions(-) diff --git a/Text-Grab/Enums.cs b/Text-Grab/Enums.cs index 272e1dcb..14cf7084 100644 --- a/Text-Grab/Enums.cs +++ b/Text-Grab/Enums.cs @@ -90,6 +90,14 @@ public enum LanguageKind Global = 0, Tesseract = 1, WindowsAi = 2, + UiAutomation = 3, +} + +public enum UiAutomationTraversalMode +{ + Fast = 0, + Balanced = 1, + Thorough = 2, } public enum FsgDefaultMode diff --git a/Text-Grab/Views/EditTextWindow.xaml.cs b/Text-Grab/Views/EditTextWindow.xaml.cs index 83015ca0..ff8ab171 100644 --- a/Text-Grab/Views/EditTextWindow.xaml.cs +++ b/Text-Grab/Views/EditTextWindow.xaml.cs @@ -371,7 +371,7 @@ private void LanguagePicker_LanguageChanged(object sender, RoutedEventArgs e) item.IsChecked = false; } - if (selectedILanguage is WindowsAiLang) + if (selectedILanguage is not GlobalLang) { SetCultureAndLanguageToDefault(); return; @@ -1160,17 +1160,25 @@ private void LanguageMenuItem_Click(object sender, RoutedEventArgs e) return; selectedILanguage = ILang; + CaptureLanguageUtilities.PersistSelectedLanguage(selectedILanguage); - try + if (selectedILanguage is not GlobalLang) { - CultureInfo cultureInfo = new(selectedILanguage.LanguageTag); - selectedCultureInfo = cultureInfo; - XmlLanguage xmlLang = XmlLanguage.GetLanguage(cultureInfo.IetfLanguageTag); - Language = xmlLang; + SetCultureAndLanguageToDefault(); } - catch (CultureNotFoundException) + else { - SetCultureAndLanguageToDefault(); + try + { + CultureInfo cultureInfo = new(selectedILanguage.LanguageTag); + selectedCultureInfo = cultureInfo; + XmlLanguage xmlLang = XmlLanguage.GetLanguage(cultureInfo.IetfLanguageTag); + Language = xmlLang; + } + catch (CultureNotFoundException) + { + SetCultureAndLanguageToDefault(); + } } foreach (object? child in BottomBarButtons.Children) @@ -1249,81 +1257,25 @@ private async void LoadLanguageMenuItems(MenuItem captureMenuItem) if (captureMenuItem.Items.Count > 0) return; - bool haveSetLastLang = false; - string lastTextLang = DefaultSettings.LastUsedLang; bool usingTesseract = DefaultSettings.UseTesseract && TesseractHelper.CanLocateTesseractExe(); + List availableLanguages = await CaptureLanguageUtilities.GetCaptureLanguagesAsync(usingTesseract); + int selectedIndex = CaptureLanguageUtilities.FindPreferredLanguageIndex( + availableLanguages, + DefaultSettings.LastUsedLang, + LanguageUtilities.GetOCRLanguage()); - if (WindowsAiUtilities.CanDeviceUseWinAI()) - { - WindowsAiLang windowsAiLang = new(); - - MenuItem languageMenuItem = new() - { - Header = windowsAiLang.DisplayName, - Tag = windowsAiLang, - IsCheckable = true, - }; - - languageMenuItem.Click += LanguageMenuItem_Click; - captureMenuItem.Items.Add(languageMenuItem); - if (!haveSetLastLang && windowsAiLang.CultureDisplayName == lastTextLang) - { - languageMenuItem.IsChecked = true; - haveSetLastLang = true; - } - } - - if (usingTesseract) - { - List tesseractLanguages = await TesseractHelper.TesseractLanguages(); - - foreach (TessLang language in tesseractLanguages.Cast()) - { - MenuItem languageMenuItem = new() - { - Header = language.DisplayName, - Tag = language, - IsCheckable = true, - }; - languageMenuItem.Click += LanguageMenuItem_Click; - - captureMenuItem.Items.Add(languageMenuItem); - - if (!haveSetLastLang && language.CultureDisplayName == lastTextLang) - { - languageMenuItem.IsChecked = true; - haveSetLastLang = true; - } - } - } - - IReadOnlyList possibleOCRLanguages = OcrEngine.AvailableRecognizerLanguages; - - ILanguage firstLang = LanguageUtilities.GetOCRLanguage(); - - foreach (Language language in possibleOCRLanguages) + for (int i = 0; i < availableLanguages.Count; i++) { + ILanguage language = availableLanguages[i]; MenuItem languageMenuItem = new() { Header = language.DisplayName, - Tag = new GlobalLang(language), + Tag = language, IsCheckable = true, + IsChecked = i == selectedIndex, }; languageMenuItem.Click += LanguageMenuItem_Click; - captureMenuItem.Items.Add(languageMenuItem); - - if (!haveSetLastLang && - (language.AbbreviatedName.Equals(firstLang?.AbbreviatedName.ToLower(), StringComparison.CurrentCultureIgnoreCase) - || language.LanguageTag.Equals(firstLang?.LanguageTag.ToLower(), StringComparison.CurrentCultureIgnoreCase))) - { - languageMenuItem.IsChecked = true; - haveSetLastLang = true; - } - } - if (!haveSetLastLang && captureMenuItem.Items[0] is MenuItem firstMenuItem) - { - firstMenuItem.IsChecked = true; } } diff --git a/Text-Grab/Views/FullscreenGrab.SelectionStyles.cs b/Text-Grab/Views/FullscreenGrab.SelectionStyles.cs index ef17d018..33fdaa40 100644 --- a/Text-Grab/Views/FullscreenGrab.SelectionStyles.cs +++ b/Text-Grab/Views/FullscreenGrab.SelectionStyles.cs @@ -1036,12 +1036,19 @@ private async Task CommitSelectionAsync(FullscreenCaptureResult selection, bool PresentationSource? presentationSource = PresentationSource.FromVisual(this); Matrix transformToDevice = presentationSource?.CompositionTarget?.TransformToDevice ?? Matrix.Identity; Rect selectionRect = GetCurrentSelectionRect(); - Point clickedPointForOcr = new( - Math.Round(selectionRect.Left * transformToDevice.M11), - Math.Round(selectionRect.Top * transformToDevice.M22)); + Point clickedPointForOcr = transformToDevice.Transform(new Point( + selectionRect.Left + (selectionRect.Width / 2.0), + selectionRect.Top + (selectionRect.Height / 2.0))); + clickedPointForOcr = new Point( + Math.Round(clickedPointForOcr.X), + Math.Round(clickedPointForOcr.Y)); TextFromOCR = await OcrUtilities.GetClickedWordAsync(this, clickedPointForOcr, selectedOcrLang); } + else if (selectedOcrLang is UiAutomationLang) + { + TextFromOCR = await OcrUtilities.GetTextFromAbsoluteRectAsync(selection.CaptureRegion, selectedOcrLang); + } else if (selection.CapturedImage is not null) { TextFromOCR = isTable diff --git a/Text-Grab/Views/FullscreenGrab.xaml.cs b/Text-Grab/Views/FullscreenGrab.xaml.cs index 78ffa652..37d87831 100644 --- a/Text-Grab/Views/FullscreenGrab.xaml.cs +++ b/Text-Grab/Views/FullscreenGrab.xaml.cs @@ -688,37 +688,33 @@ private void LanguagesComboBox_PreviewMouseDown(object sender, MouseButtonEventA { DefaultSettings.LastUsedLang = String.Empty; DefaultSettings.Save(); + LanguageUtilities.InvalidateOcrLanguageCache(); } } - private void LanguagesComboBox_SelectionChanged(object sender, SelectionChangedEventArgs e) + private void ApplySelectedLanguageState(ILanguage selectedLanguage) { - if (sender is not ComboBox languageCmbBox || !isComboBoxReady) - return; + bool supportsTableOutput = CaptureLanguageUtilities.SupportsTableOutput(selectedLanguage); + TableMenuItem.Visibility = supportsTableOutput ? Visibility.Visible : Visibility.Collapsed; + TableToggleButton.Visibility = supportsTableOutput ? Visibility.Visible : Visibility.Collapsed; - if (languageCmbBox.SelectedItem is TessLang tessLang) + if (!supportsTableOutput) { - DefaultSettings.LastUsedLang = tessLang.CultureDisplayName; - DefaultSettings.Save(); - - TableMenuItem.Visibility = Visibility.Collapsed; - TableToggleButton.Visibility = Visibility.Collapsed; + TableMenuItem.IsChecked = false; + TableToggleButton.IsChecked = false; + SelectSingleToggleButton(StandardModeToggleButton); } - else if (languageCmbBox.SelectedItem is Language pickedLang) - { - DefaultSettings.LastUsedLang = pickedLang.LanguageTag; - DefaultSettings.Save(); + } - TableMenuItem.Visibility = Visibility.Visible; - TableToggleButton.Visibility = Visibility.Visible; - } - else if (languageCmbBox.SelectedItem is WindowsAiLang winAiLang) - { - DefaultSettings.LastUsedLang = winAiLang.LanguageTag; - DefaultSettings.Save(); - TableMenuItem.Visibility = Visibility.Visible; - TableToggleButton.Visibility = Visibility.Visible; - } + private void LanguagesComboBox_SelectionChanged(object sender, SelectionChangedEventArgs e) + { + if (sender is not ComboBox languageCmbBox + || languageCmbBox.SelectedItem is not ILanguage selectedLanguage + || !isComboBoxReady) + return; + + CaptureLanguageUtilities.PersistSelectedLanguage(selectedLanguage); + ApplySelectedLanguageState(selectedLanguage); int selection = languageCmbBox.SelectedIndex; @@ -756,73 +752,22 @@ private void LanguagesComboBox_SelectionChanged(object sender, SelectionChangedE } } - private static async Task LoadOcrLanguages(ComboBox languagesComboBox, bool usingTesseract, List? tesseractIncompatibleElements = null) + private static async Task LoadOcrLanguages(ComboBox languagesComboBox, bool usingTesseract) { if (languagesComboBox.Items.Count > 0) return; - int count = 0; - // TODO Find a way to combine with the ETW language drop down - // or just put this logic into Language Utilities - - bool haveSetLastLang = false; - string lastTextLang = DefaultSettings.LastUsedLang; - - if (WindowsAiUtilities.CanDeviceUseWinAI()) - { - WindowsAiLang winAiLang = new(); - languagesComboBox.Items.Add(winAiLang); - - if (lastTextLang == winAiLang.LanguageTag) - { - languagesComboBox.SelectedIndex = 0; - } - } - - if (usingTesseract) - { - List tesseractLanguages = await TesseractHelper.TesseractLanguages(); - - foreach (ILanguage language in tesseractLanguages) - { - languagesComboBox.Items.Add(language); - - if (!haveSetLastLang && language.CultureDisplayName == lastTextLang) - { - languagesComboBox.SelectedIndex = count; - haveSetLastLang = true; - - if (tesseractIncompatibleElements is not null) - foreach (FrameworkElement element in tesseractIncompatibleElements) - element.Visibility = Visibility.Collapsed; - } - - count++; - } - } - - IReadOnlyList possibleOCRLanguages = OcrEngine.AvailableRecognizerLanguages; - - ILanguage firstLang = LanguageUtilities.GetOCRLanguage(); - - foreach (Language language in possibleOCRLanguages) - { + List availableLanguages = await CaptureLanguageUtilities.GetCaptureLanguagesAsync(usingTesseract); + foreach (ILanguage language in availableLanguages) languagesComboBox.Items.Add(language); - if (!haveSetLastLang && - (language.AbbreviatedName.Equals(firstLang?.AbbreviatedName.ToLower(), StringComparison.CurrentCultureIgnoreCase) - || language.LanguageTag.Equals(firstLang?.LanguageTag.ToLower(), StringComparison.CurrentCultureIgnoreCase))) - { - languagesComboBox.SelectedIndex = count; - haveSetLastLang = true; - } - - count++; - } + int selectedIndex = CaptureLanguageUtilities.FindPreferredLanguageIndex( + availableLanguages, + DefaultSettings.LastUsedLang, + LanguageUtilities.GetOCRLanguage()); - // if no lang is set, select the first one - if (languagesComboBox.SelectedIndex == -1) - languagesComboBox.SelectedIndex = 0; + if (selectedIndex >= 0) + languagesComboBox.SelectedIndex = selectedIndex; } private void NewEditTextMenuItem_Click(object sender, RoutedEventArgs e) @@ -1021,12 +966,10 @@ private async void Window_Loaded(object sender, RoutedEventArgs e) Topmost = false; #endif - List tesseractIncompatibleFrameworkElements = - [ - TableMenuItem, TableToggleButton - ]; - await LoadOcrLanguages(LanguagesComboBox, usingTesseract, tesseractIncompatibleFrameworkElements); + await LoadOcrLanguages(LanguagesComboBox, usingTesseract); isComboBoxReady = true; + if (LanguagesComboBox.SelectedItem is ILanguage selectedLanguage) + ApplySelectedLanguageState(selectedLanguage); // Load dynamic post-grab actions LoadDynamicPostGrabActions(); diff --git a/Text-Grab/Views/GrabFrame.xaml.cs b/Text-Grab/Views/GrabFrame.xaml.cs index 9bb01ab4..cfaf042d 100644 --- a/Text-Grab/Views/GrabFrame.xaml.cs +++ b/Text-Grab/Views/GrabFrame.xaml.cs @@ -65,6 +65,7 @@ public partial class GrabFrame : Window private bool isSearchSelectionOverridden = false; private bool isSelecting; private bool isSpaceJoining = true; + private bool isStaticImageSource = false; private readonly Dictionary movingWordBordersDictionary = []; private IOcrLinesWords? ocrResultOfWindow; private readonly DispatcherTimer reDrawTimer = new(); @@ -150,6 +151,7 @@ public GrabFrame(BitmapSource frozenImage) ShouldSaveOnClose = true; frameContentImageSource = frozenImage; hasLoadedImageSource = true; + isStaticImageSource = true; Loaded += (s, e) => { @@ -183,6 +185,7 @@ private async Task LoadTemplateForEditing(GrabTemplate template) if (!string.IsNullOrEmpty(template.SourceImagePath) && File.Exists(template.SourceImagePath)) { + isStaticImageSource = true; await TryLoadImageFromPath(template.SourceImagePath); reDrawTimer.Stop(); } @@ -266,6 +269,7 @@ private async Task LoadContentFromHistory(HistoryInfo history) { FrameText = history.TextContent; currentLanguage = history.OcrLanguage; + isStaticImageSource = true; string imageName = Path.GetFileName(history.ImagePath); @@ -285,6 +289,7 @@ private async Task LoadContentFromHistory(HistoryInfo history) hasLoadedImageSource = true; GrabFrameImage.Source = frameContentImageSource; FreezeGrabFrame(); + NotifyIfUiAutomationNeedsLiveSource(currentLanguage); if (history.PositionRect != Rect.Empty) { @@ -468,7 +473,7 @@ private void StandardInitialize() InitializeComponent(); App.SetTheme(); - LoadOcrLanguages(); + _ = LoadOcrLanguagesAsync(); SetRestoreState(); @@ -964,10 +969,10 @@ private async void AddNewWordBorder(Border selectBorder) ILanguage language = CurrentLanguage ?? LanguageUtilities.GetCurrentInputLanguage(); string ocrText = await OcrUtilities.GetTextFromAbsoluteRectAsync(rect.GetScaleSizeByFraction(viewBoxZoomFactor), language); - if (DefaultSettings.CorrectErrors) + if (language is not UiAutomationLang && DefaultSettings.CorrectErrors) ocrText = ocrText.TryFixEveryWordLetterNumberErrors(); - if (DefaultSettings.CorrectToLatin) + if (language is not UiAutomationLang && DefaultSettings.CorrectToLatin) ocrText = ocrText.ReplaceGreekOrCyrillicWithLatin(); if (frameContentImageSource is BitmapImage bmpImg) @@ -1465,6 +1470,7 @@ private void FreezeGrabFrame() GrabFrameImage.Source = frameContentImageSource; else { + isStaticImageSource = false; frameContentImageSource = ImageMethods.GetWindowBoundsImage(this); GrabFrameImage.Source = frameContentImageSource; } @@ -1710,6 +1716,7 @@ private async void GrabFrameWindow_Drop(object sender, DragEventArgs e) Activate(); frameContentImageSource = null; + isStaticImageSource = true; await TryLoadImageFromPath(fileName); @@ -1866,25 +1873,32 @@ private void LanguagesComboBox_MouseDown(object sender, MouseButtonEventArgs e) { DefaultSettings.LastUsedLang = string.Empty; DefaultSettings.Save(); + LanguageUtilities.InvalidateOcrLanguageCache(); } } - private void LanguagesComboBox_SelectionChanged(object sender, SelectionChangedEventArgs e) + private void NotifyIfUiAutomationNeedsLiveSource(ILanguage language) { - if (!isLanguageBoxLoaded || sender is not ComboBox langComboBox) + if (language is not UiAutomationLang || !isStaticImageSource) return; - ILanguage? pickedLang = langComboBox.SelectedItem as ILanguage; + string message = DefaultSettings.UiAutomationFallbackToOcr + ? "UI Automation reads live application controls. This Grab Frame currently contains a static image, so Text Grab will fall back to OCR for image-only operations." + : "UI Automation reads live application controls. This Grab Frame currently contains a static image, so image-only operations will not return UI Automation text."; - if (langComboBox.SelectedItem is WindowsAiLang winAiLang) - pickedLang = winAiLang; + MessageBox.Show(message, "Text Grab", MessageBoxButton.OK, MessageBoxImage.Information); + } - if (pickedLang != null) - { - currentLanguage = pickedLang; - DefaultSettings.LastUsedLang = pickedLang.LanguageTag; - DefaultSettings.Save(); - } + private void LanguagesComboBox_SelectionChanged(object sender, SelectionChangedEventArgs e) + { + if (!isLanguageBoxLoaded + || sender is not ComboBox langComboBox + || langComboBox.SelectedItem is not ILanguage pickedLang) + return; + + currentLanguage = pickedLang; + CaptureLanguageUtilities.PersistSelectedLanguage(pickedLang); + NotifyIfUiAutomationNeedsLiveSource(pickedLang); ResetGrabFrame(); @@ -1892,39 +1906,26 @@ private void LanguagesComboBox_SelectionChanged(object sender, SelectionChangedE reDrawTimer.Start(); } - private void LoadOcrLanguages() + private async Task LoadOcrLanguagesAsync() { if (LanguagesComboBox.Items.Count > 0) return; - IReadOnlyList possibleOCRLangs = OcrEngine.AvailableRecognizerLanguages; - ILanguage firstLang = LanguageUtilities.GetOCRLanguage(); - - foreach (Language language in possibleOCRLangs) - { - GlobalLang globalLang = new(language); - LanguagesComboBox.Items.Add(globalLang); - } + List availableLanguages = await CaptureLanguageUtilities.GetCaptureLanguagesAsync(includeTesseract: false); + foreach (ILanguage language in availableLanguages) + LanguagesComboBox.Items.Add(language); - if (WindowsAiUtilities.CanDeviceUseWinAI()) - { - WindowsAiLang winAiLang = new(); - LanguagesComboBox.Items.Insert(0, winAiLang); - } + int selectedIndex = CaptureLanguageUtilities.FindPreferredLanguageIndex( + availableLanguages, + DefaultSettings.LastUsedLang, + LanguageUtilities.GetOCRLanguage()); - for (int i = 0; i < LanguagesComboBox.Items.Count; i++) + if (selectedIndex >= 0) { - if (LanguagesComboBox.Items[i] is not ILanguage item) - continue; - - if (item.LanguageTag == firstLang.LanguageTag) - { - LanguagesComboBox.SelectedIndex = i; - break; - } + LanguagesComboBox.SelectedIndex = selectedIndex; + currentLanguage = availableLanguages[selectedIndex]; } - isLanguageBoxLoaded = true; } @@ -2064,9 +2065,11 @@ private async void PasteExecuted(object sender, ExecutedRoutedEventArgs? e = nul } hasLoadedImageSource = true; + isStaticImageSource = true; FreezeToggleButton.IsChecked = true; FreezeGrabFrame(); FreezeToggleButton.Visibility = Visibility.Collapsed; + NotifyIfUiAutomationNeedsLiveSource(CurrentLanguage); reDrawTimer.Start(); } @@ -2813,10 +2816,12 @@ private async Task TryLoadImageFromPath(string path) droppedImage.EndInit(); frameContentImageSource = droppedImage; hasLoadedImageSource = true; + isStaticImageSource = true; _currentImagePath = path; FreezeToggleButton.IsChecked = true; FreezeGrabFrame(); FreezeToggleButton.Visibility = Visibility.Collapsed; + NotifyIfUiAutomationNeedsLiveSource(CurrentLanguage); reDrawTimer.Start(); } From e07062f8ccff2a19e26a5ad45e9c8cbf09ed2bfc Mon Sep 17 00:00:00 2001 From: Joe Finney Date: Sat, 7 Mar 2026 11:56:16 -0600 Subject: [PATCH 05/13] Add unit tests for language and UI automation utilities Expanded test coverage for CaptureLanguageUtilities and UIAutomationUtilities, including language matching, selection, table output support, text normalization, deduplication, window selection logic, control type handling, and point sampling. Also added tests for UiAutomationLang handling in LanguageService and HistoryInfo. --- Tests/CaptureLanguageUtilitiesTests.cs | 51 ++++++++++++ Tests/LanguageServiceTests.cs | 32 ++++++++ Tests/UiAutomationUtilitiesTests.cs | 107 +++++++++++++++++++++++++ 3 files changed, 190 insertions(+) create mode 100644 Tests/CaptureLanguageUtilitiesTests.cs create mode 100644 Tests/UiAutomationUtilitiesTests.cs diff --git a/Tests/CaptureLanguageUtilitiesTests.cs b/Tests/CaptureLanguageUtilitiesTests.cs new file mode 100644 index 00000000..ccb5ba82 --- /dev/null +++ b/Tests/CaptureLanguageUtilitiesTests.cs @@ -0,0 +1,51 @@ +using Text_Grab.Models; +using Text_Grab.Utilities; + +namespace Tests; + +public class CaptureLanguageUtilitiesTests +{ + [Fact] + public void MatchesPersistedLanguage_MatchesByLanguageTag() + { + UiAutomationLang language = new(); + + bool matches = CaptureLanguageUtilities.MatchesPersistedLanguage(language, UiAutomationLang.Tag); + + Assert.True(matches); + } + + [Fact] + public void MatchesPersistedLanguage_MatchesLegacyTesseractDisplayName() + { + TessLang language = new("eng"); + + bool matches = CaptureLanguageUtilities.MatchesPersistedLanguage(language, language.CultureDisplayName); + + Assert.True(matches); + } + + [Fact] + public void FindPreferredLanguageIndex_PrefersPersistedMatchBeforeFallbackLanguage() + { + List languages = + [ + new UiAutomationLang(), + new WindowsAiLang(), + new GlobalLang("en-US") + ]; + + int index = CaptureLanguageUtilities.FindPreferredLanguageIndex( + languages, + UiAutomationLang.Tag, + new GlobalLang("en-US")); + + Assert.Equal(0, index); + } + + [Fact] + public void SupportsTableOutput_ReturnsFalseForUiAutomation() + { + Assert.False(CaptureLanguageUtilities.SupportsTableOutput(new UiAutomationLang())); + } +} diff --git a/Tests/LanguageServiceTests.cs b/Tests/LanguageServiceTests.cs index 649331f3..aa1164ed 100644 --- a/Tests/LanguageServiceTests.cs +++ b/Tests/LanguageServiceTests.cs @@ -34,6 +34,16 @@ public void GetLanguageTag_WithWindowsAiLang_ReturnsWinAI() Assert.Equal("WinAI", tag); } + [Fact] + public void GetLanguageTag_WithUiAutomationLang_ReturnsUiAutomationTag() + { + UiAutomationLang uiAutomationLang = new(); + + string tag = LanguageService.GetLanguageTag(uiAutomationLang); + + Assert.Equal(UiAutomationLang.Tag, tag); + } + [Fact] public void GetLanguageTag_WithTessLang_ReturnsRawTag() { @@ -86,6 +96,16 @@ public void GetLanguageKind_WithWindowsAiLang_ReturnsWindowsAi() Assert.Equal(LanguageKind.WindowsAi, kind); } + [Fact] + public void GetLanguageKind_WithUiAutomationLang_ReturnsUiAutomation() + { + UiAutomationLang uiAutomationLang = new(); + + LanguageKind kind = LanguageService.GetLanguageKind(uiAutomationLang); + + Assert.Equal(LanguageKind.UiAutomation, kind); + } + [Fact] public void GetLanguageKind_WithTessLang_ReturnsTesseract() { @@ -149,4 +169,16 @@ public void LanguageUtilities_DelegatesTo_LanguageService() Assert.Equal("en-US", tag); Assert.Equal(LanguageKind.Global, kind); } + + [Fact] + public void HistoryInfo_OcrLanguage_RehydratesUiAutomationLanguage() + { + HistoryInfo historyInfo = new() + { + LanguageTag = UiAutomationLang.Tag, + LanguageKind = LanguageKind.UiAutomation, + }; + + Assert.IsType(historyInfo.OcrLanguage); + } } diff --git a/Tests/UiAutomationUtilitiesTests.cs b/Tests/UiAutomationUtilitiesTests.cs new file mode 100644 index 00000000..601b2539 --- /dev/null +++ b/Tests/UiAutomationUtilitiesTests.cs @@ -0,0 +1,107 @@ +using System.Windows; +using System.Windows.Automation; +using Text_Grab.Models; +using Text_Grab.Utilities; + +namespace Tests; + +public class UiAutomationUtilitiesTests +{ + [Fact] + public void NormalizeText_TrimsWhitespaceAndCollapsesEmptyLines() + { + string normalized = UIAutomationUtilities.NormalizeText(" Hello world \r\n\r\n Second\tline "); + + Assert.Equal($"Hello world{Environment.NewLine}Second line", normalized); + } + + [Fact] + public void TryAddUniqueText_DeduplicatesNormalizedValues() + { + HashSet seen = []; + List output = []; + + bool addedFirst = UIAutomationUtilities.TryAddUniqueText(" Hello world ", seen, output); + bool addedSecond = UIAutomationUtilities.TryAddUniqueText("Hello world", seen, output); + + Assert.True(addedFirst); + Assert.False(addedSecond); + Assert.Single(output); + } + + [Fact] + public void FindTargetWindowCandidate_PrefersCenterPointHit() + { + WindowSelectionCandidate first = new((nint)1, new Rect(0, 0, 80, 80), "First", 1); + WindowSelectionCandidate second = new((nint)2, new Rect(90, 0, 80, 80), "Second", 2); + + WindowSelectionCandidate? candidate = UIAutomationUtilities.FindTargetWindowCandidate( + new Rect(100, 10, 20, 20), + [first, second]); + + Assert.Same(second, candidate); + } + + [Fact] + public void FindTargetWindowCandidate_FallsBackToLargestIntersection() + { + WindowSelectionCandidate first = new((nint)1, new Rect(0, 0, 50, 50), "First", 1); + WindowSelectionCandidate second = new((nint)2, new Rect(60, 0, 80, 80), "Second", 2); + + WindowSelectionCandidate? candidate = UIAutomationUtilities.FindTargetWindowCandidate( + new Rect(40, 40, 30, 30), + [first, second]); + + Assert.Same(second, candidate); + } + + [Fact] + public void ShouldUseNameFallback_SkipsStructuralControls() + { + Assert.False(UIAutomationUtilities.ShouldUseNameFallback(ControlType.Window)); + Assert.False(UIAutomationUtilities.ShouldUseNameFallback(ControlType.Group)); + Assert.False(UIAutomationUtilities.ShouldUseNameFallback(ControlType.Pane)); + Assert.False(UIAutomationUtilities.ShouldUseNameFallback(ControlType.Custom)); + } + + [Fact] + public void ShouldUseNameFallback_AllowsLeafControls() + { + Assert.True(UIAutomationUtilities.ShouldUseNameFallback(ControlType.Text)); + Assert.True(UIAutomationUtilities.ShouldUseNameFallback(ControlType.Button)); + Assert.True(UIAutomationUtilities.ShouldUseNameFallback(ControlType.ListItem)); + } + + [Fact] + public void GetSamplePoints_UsesCenterPointForSmallSelections() + { + IReadOnlyList samplePoints = UIAutomationUtilities.GetSamplePoints(new Rect(10, 20, 40, 30)); + + Point samplePoint = Assert.Single(samplePoints); + Assert.Equal(new Point(30, 35), samplePoint); + } + + [Fact] + public void GetSamplePoints_UsesGridForLargerSelections() + { + IReadOnlyList samplePoints = UIAutomationUtilities.GetSamplePoints(new Rect(0, 0, 100, 100)); + + Assert.Equal(9, samplePoints.Count); + Assert.Contains(new Point(50, 50), samplePoints); + Assert.Contains(new Point(20, 20), samplePoints); + Assert.Contains(new Point(80, 80), samplePoints); + } + + [Fact] + public void GetPointProbePoints_ReturnsCenterThenCrosshairNeighbors() + { + IReadOnlyList probePoints = UIAutomationUtilities.GetPointProbePoints(new Point(25, 40)); + + Assert.Equal(5, probePoints.Count); + Assert.Equal(new Point(25, 40), probePoints[0]); + Assert.Contains(new Point(23, 40), probePoints); + Assert.Contains(new Point(27, 40), probePoints); + Assert.Contains(new Point(25, 38), probePoints); + Assert.Contains(new Point(25, 42), probePoints); + } +} From 918474c574bf5cf114e3a47a04fbefb46d9b8805 Mon Sep 17 00:00:00 2001 From: Joe Finney Date: Sat, 7 Mar 2026 15:56:31 -0600 Subject: [PATCH 06/13] Add UIA overlay models & snapshot extraction, improve logic - Introduce UiAutomationOverlayItem/Snapshot models and enum for overlay representation and metadata. - Add overlay extraction methods to UIAutomationUtilities, including deduplication, sorting, and metadata helpers. - Support overlay snapshot extraction for regions, with optional window exclusion. - Refine region/point text extraction to handle excluded windows and improve accuracy with overlays. - Improve element text extraction: restrict Name fallback to specific control types and skip if visible text descendants exist. - Add ImageSource-to-Bitmap conversion and utility for live UIA source requirement. - Refactor history service to better handle image paths and deduplication. --- Text-Grab/Models/UiAutomationOverlayItem.cs | 18 + .../Models/UiAutomationOverlaySnapshot.cs | 12 + Text-Grab/Services/HistoryService.cs | 25 +- .../Utilities/CaptureLanguageUtilities.cs | 3 + Text-Grab/Utilities/ImageMethods.cs | 9 + Text-Grab/Utilities/OcrUtilities.cs | 17 +- Text-Grab/Utilities/UIAutomationUtilities.cs | 656 +++++++++++++++++- 7 files changed, 697 insertions(+), 43 deletions(-) create mode 100644 Text-Grab/Models/UiAutomationOverlayItem.cs create mode 100644 Text-Grab/Models/UiAutomationOverlaySnapshot.cs diff --git a/Text-Grab/Models/UiAutomationOverlayItem.cs b/Text-Grab/Models/UiAutomationOverlayItem.cs new file mode 100644 index 00000000..9522d699 --- /dev/null +++ b/Text-Grab/Models/UiAutomationOverlayItem.cs @@ -0,0 +1,18 @@ +using System.Windows; + +namespace Text_Grab.Models; + +public enum UiAutomationOverlaySource +{ + PointTextRange = 0, + VisibleTextRange = 1, + ElementBounds = 2, +} + +public record UiAutomationOverlayItem( + string Text, + Rect ScreenBounds, + UiAutomationOverlaySource Source, + string ControlTypeProgrammaticName = "", + string AutomationId = "", + string RuntimeId = ""); diff --git a/Text-Grab/Models/UiAutomationOverlaySnapshot.cs b/Text-Grab/Models/UiAutomationOverlaySnapshot.cs new file mode 100644 index 00000000..2bb4df59 --- /dev/null +++ b/Text-Grab/Models/UiAutomationOverlaySnapshot.cs @@ -0,0 +1,12 @@ +using System.Collections.Generic; +using System.Windows; + +namespace Text_Grab.Models; + +public record UiAutomationOverlaySnapshot( + Rect CaptureBounds, + WindowSelectionCandidate TargetWindow, + IReadOnlyList Items) +{ + public bool HasItems => Items.Count > 0; +} diff --git a/Text-Grab/Services/HistoryService.cs b/Text-Grab/Services/HistoryService.cs index 41ab782a..a3f42fcf 100644 --- a/Text-Grab/Services/HistoryService.cs +++ b/Text-Grab/Services/HistoryService.cs @@ -179,28 +179,31 @@ public void SaveToHistory(GrabFrame grabFrameToSave) HistoryInfo historyInfo = grabFrameToSave.AsHistoryItem(); string imgRandomName = Guid.NewGuid().ToString(); + HistoryInfo? prevHistory = string.IsNullOrEmpty(historyInfo.ID) + ? null + : HistoryWithImage.FirstOrDefault(h => h.ID == historyInfo.ID); - if (string.IsNullOrEmpty(historyInfo.ID)) + if (prevHistory is null) { if (historyInfo.ImageContent is null) return; - historyInfo.ID = Guid.NewGuid().ToString(); - - FileUtilities.SaveImageFile(historyInfo.ImageContent, $"{imgRandomName}.bmp", FileStorageKind.WithHistory); historyInfo.ImagePath = $"{imgRandomName}.bmp"; } else { - HistoryInfo? prevHistory = HistoryWithImage.Where(h => h.ID == historyInfo.ID).FirstOrDefault(); - - if (prevHistory is not null) - { - historyInfo.ImagePath = prevHistory.ImagePath; - HistoryWithImage.Remove(prevHistory); - } + historyInfo.ImagePath = string.IsNullOrWhiteSpace(prevHistory.ImagePath) + ? $"{imgRandomName}.bmp" + : prevHistory.ImagePath; + HistoryWithImage.Remove(prevHistory); } + if (string.IsNullOrEmpty(historyInfo.ID)) + historyInfo.ID = Guid.NewGuid().ToString(); + + if (historyInfo.ImageContent is not null && !string.IsNullOrWhiteSpace(historyInfo.ImagePath)) + FileUtilities.SaveImageFile(historyInfo.ImageContent, historyInfo.ImagePath, FileStorageKind.WithHistory); + HistoryWithImage.Add(historyInfo); saveTimer.Stop(); diff --git a/Text-Grab/Utilities/CaptureLanguageUtilities.cs b/Text-Grab/Utilities/CaptureLanguageUtilities.cs index f1762764..564177c3 100644 --- a/Text-Grab/Utilities/CaptureLanguageUtilities.cs +++ b/Text-Grab/Utilities/CaptureLanguageUtilities.cs @@ -79,4 +79,7 @@ public static bool SupportsTableOutput(ILanguage language) public static bool IsStaticImageCompatible(ILanguage language) => language is not UiAutomationLang; + + public static bool RequiresLiveUiAutomationSource(ILanguage language, bool isStaticImageSource, bool hasFrozenUiAutomationSnapshot) + => language is UiAutomationLang && isStaticImageSource && !hasFrozenUiAutomationSnapshot; } diff --git a/Text-Grab/Utilities/ImageMethods.cs b/Text-Grab/Utilities/ImageMethods.cs index 831b5b21..d569cb20 100644 --- a/Text-Grab/Utilities/ImageMethods.cs +++ b/Text-Grab/Utilities/ImageMethods.cs @@ -218,6 +218,15 @@ public static Bitmap BitmapSourceToBitmap(BitmapSource source) return bmp; } + public static Bitmap? ImageSourceToBitmap(ImageSource? source) + { + return source switch + { + BitmapSource bitmapSource => BitmapSourceToBitmap(bitmapSource), + _ => null + }; + } + public static Bitmap GetBitmapFromIRandomAccessStream(IRandomAccessStream stream) { Stream managedStream = stream.AsStream(); diff --git a/Text-Grab/Utilities/OcrUtilities.cs b/Text-Grab/Utilities/OcrUtilities.cs index 4a1439d4..fcaa4c20 100644 --- a/Text-Grab/Utilities/OcrUtilities.cs +++ b/Text-Grab/Utilities/OcrUtilities.cs @@ -42,6 +42,12 @@ private static ILanguage GetCompatibleOcrLanguage(ILanguage language) return language; } + private static IReadOnlyCollection? GetExcludedWindowHandles(Window passedWindow) + { + IntPtr handle = new System.Windows.Interop.WindowInteropHelper(passedWindow).Handle; + return handle == IntPtr.Zero ? null : [handle]; + } + public static void GetTextFromOcrLine(this IOcrLine ocrLine, bool isSpaceJoiningOCRLang, StringBuilder text) { // (when OCR language is zh or ja) @@ -87,11 +93,14 @@ public static void GetTextFromOcrLine(this IOcrLine ocrLine, bool isSpaceJoining text.ReplaceGreekOrCyrillicWithLatin(); } - public static async Task GetTextFromAbsoluteRectAsync(Rect rect, ILanguage language) + public static async Task GetTextFromAbsoluteRectAsync( + Rect rect, + ILanguage language, + IReadOnlyCollection? excludedHandles = null) { if (IsUiAutomationLanguage(language)) { - string uiAutomationText = await UIAutomationUtilities.GetTextFromRegionAsync(rect); + string uiAutomationText = await UIAutomationUtilities.GetTextFromRegionAsync(rect, excludedHandles); if (!string.IsNullOrWhiteSpace(uiAutomationText) || !DefaultSettings.UiAutomationFallbackToOcr) return uiAutomationText; @@ -112,7 +121,7 @@ public static async Task GetRegionsTextAsync(Window passedWindow, Rectan int thisCorrectedTop = (int)absPosPoint.Y + selectedRegion.Top; Rectangle correctedRegion = new(thisCorrectedLeft, thisCorrectedTop, selectedRegion.Width, selectedRegion.Height); - return await GetTextFromAbsoluteRectAsync(correctedRegion.AsRect(), language); + return await GetTextFromAbsoluteRectAsync(correctedRegion.AsRect(), language, GetExcludedWindowHandles(passedWindow)); } public static async Task GetRegionsTextAsTableAsync(Window passedWindow, Rectangle selectedRegion, ILanguage objLang) @@ -460,7 +469,7 @@ public static async Task GetClickedWordAsync(Window passedWindow, Point { Point absoluteWindowPosition = passedWindow.GetAbsolutePosition(); Point absoluteClickedPoint = new(absoluteWindowPosition.X + clickedPoint.X, absoluteWindowPosition.Y + clickedPoint.Y); - string uiAutomationText = await UIAutomationUtilities.GetTextFromPointAsync(absoluteClickedPoint); + string uiAutomationText = await UIAutomationUtilities.GetTextFromPointAsync(absoluteClickedPoint, GetExcludedWindowHandles(passedWindow)); if (!string.IsNullOrWhiteSpace(uiAutomationText) || !DefaultSettings.UiAutomationFallbackToOcr) return uiAutomationText.Trim(); diff --git a/Text-Grab/Utilities/UIAutomationUtilities.cs b/Text-Grab/Utilities/UIAutomationUtilities.cs index ec9b30b5..571e5098 100644 --- a/Text-Grab/Utilities/UIAutomationUtilities.cs +++ b/Text-Grab/Utilities/UIAutomationUtilities.cs @@ -1,5 +1,6 @@ using System; using System.Collections.Generic; +using System.Globalization; using System.Linq; using System.Threading.Tasks; using System.Windows; @@ -27,17 +28,25 @@ private enum AutomationTextSource } private readonly record struct TextExtractionCandidate(string Text, AutomationTextSource Source, int Depth); + private readonly record struct WindowPointCandidate(TextExtractionCandidate Candidate, double Area); + private readonly record struct OverlayCandidate(UiAutomationOverlayItem Item, AutomationTextSource Source, int Depth); public static Task GetTextFromPointAsync(Point screenPoint) + => GetTextFromPointAsync(screenPoint, null); + + public static Task GetTextFromPointAsync(Point screenPoint, IReadOnlyCollection? excludedHandles) { UiAutomationOptions options = GetOptionsFromSettings(); - return Task.Run(() => GetTextFromPoint(screenPoint, options)); + return Task.Run(() => GetTextFromPoint(screenPoint, options, excludedHandles)); } public static Task GetTextFromRegionAsync(Rect screenRect) + => GetTextFromRegionAsync(screenRect, null); + + public static Task GetTextFromRegionAsync(Rect screenRect, IReadOnlyCollection? excludedHandles) { UiAutomationOptions options = GetOptionsFromSettings(screenRect); - return Task.Run(() => GetTextFromRegion(screenRect, options)); + return Task.Run(() => GetTextFromRegion(screenRect, options, excludedHandles)); } public static Task GetTextFromWindowAsync(IntPtr windowHandle, Rect? filterBounds = null) @@ -46,6 +55,15 @@ public static Task GetTextFromWindowAsync(IntPtr windowHandle, Rect? fil return Task.Run(() => GetTextFromWindow(windowHandle, options)); } + public static Task GetOverlaySnapshotFromRegionAsync(Rect screenRect) + => GetOverlaySnapshotFromRegionAsync(screenRect, null); + + public static Task GetOverlaySnapshotFromRegionAsync(Rect screenRect, IReadOnlyCollection? excludedHandles) + { + UiAutomationOptions options = GetOptionsFromSettings(screenRect); + return Task.Run(() => GetOverlaySnapshotFromRegion(screenRect, options, excludedHandles)); + } + internal static UiAutomationOptions GetOptionsFromSettings(Rect? filterBounds = null) { UiAutomationTraversalMode traversalMode = UiAutomationTraversalMode.Balanced; @@ -77,6 +95,17 @@ internal static UiAutomationOptions GetOptionsFromSettings(Rect? filterBounds = .FirstOrDefault(); } + internal static WindowSelectionCandidate? FindPointTargetWindowCandidate(Point screenPoint, IReadOnlyCollection? excludedHandles) + { + List candidates = WindowSelectionUtilities.GetCapturableWindows(excludedHandles); + WindowSelectionCandidate? directCandidate = WindowSelectionUtilities.FindWindowAtPoint(candidates, screenPoint); + if (directCandidate is not null) + return directCandidate; + + Rect searchRect = new(screenPoint.X - 1, screenPoint.Y - 1, 2, 2); + return FindTargetWindowCandidate(searchRect, candidates); + } + internal static string NormalizeText(string? text) { if (string.IsNullOrWhiteSpace(text)) @@ -103,27 +132,14 @@ internal static bool TryAddUniqueText(string? text, ISet seenText, List< internal static bool ShouldUseNameFallback(ControlType controlType) { - return controlType != ControlType.Window - && controlType != ControlType.Pane - && controlType != ControlType.Group - && controlType != ControlType.Custom - && controlType != ControlType.Table - && controlType != ControlType.List - && controlType != ControlType.Tree - && controlType != ControlType.Menu - && controlType != ControlType.MenuBar - && controlType != ControlType.ToolBar - && controlType != ControlType.TitleBar - && controlType != ControlType.StatusBar - && controlType != ControlType.ScrollBar - && controlType != ControlType.Separator - && controlType != ControlType.ProgressBar - && controlType != ControlType.Slider - && controlType != ControlType.Spinner - && controlType != ControlType.Calendar - && controlType != ControlType.DataGrid - && controlType != ControlType.Header - && controlType != ControlType.Tab; + return controlType == ControlType.Text + || controlType == ControlType.Hyperlink + || controlType == ControlType.ListItem + || controlType == ControlType.DataItem + || controlType == ControlType.TreeItem + || controlType == ControlType.MenuItem + || controlType == ControlType.TabItem + || controlType == ControlType.HeaderItem; } internal static IReadOnlyList GetSamplePoints(Rect selectionRect) @@ -168,8 +184,66 @@ internal static IReadOnlyList GetPointProbePoints(Point screenPoint) ]; } - private static string GetTextFromPoint(Point screenPoint, UiAutomationOptions options) + internal static bool TryClipBounds(Rect bounds, Rect? filterBounds, out Rect clippedBounds) { + clippedBounds = bounds; + + if (bounds == Rect.Empty || bounds.Width < 1 || bounds.Height < 1) + return false; + + if (filterBounds is Rect clipBounds) + { + clippedBounds = Rect.Intersect(bounds, clipBounds); + if (clippedBounds == Rect.Empty || clippedBounds.Width < 1 || clippedBounds.Height < 1) + return false; + } + + return true; + } + + internal static string BuildOverlayDedupKey(UiAutomationOverlayItem item) + { + return string.Join( + '|', + NormalizeText(item.Text), + Math.Round(item.ScreenBounds.X, 1).ToString(CultureInfo.InvariantCulture), + Math.Round(item.ScreenBounds.Y, 1).ToString(CultureInfo.InvariantCulture), + Math.Round(item.ScreenBounds.Width, 1).ToString(CultureInfo.InvariantCulture), + Math.Round(item.ScreenBounds.Height, 1).ToString(CultureInfo.InvariantCulture)); + } + + internal static bool TryAddUniqueOverlayItem(UiAutomationOverlayItem item, ISet seenItems, List output) + { + if (string.IsNullOrWhiteSpace(NormalizeText(item.Text))) + return false; + + string dedupKey = BuildOverlayDedupKey(item); + if (!seenItems.Add(dedupKey)) + return false; + + output.Add(item); + return true; + } + + internal static IReadOnlyList SortOverlayItems(IEnumerable items) + { + return + [ + .. items.OrderBy(item => Math.Round(item.ScreenBounds.Top, 1)) + .ThenBy(item => Math.Round(item.ScreenBounds.Left, 1)) + .ThenBy(item => item.Text, StringComparer.CurrentCulture) + ]; + } + + private static string GetTextFromPoint(Point screenPoint, UiAutomationOptions options, IReadOnlyCollection? excludedHandles) + { + if (excludedHandles is not null && excludedHandles.Count > 0) + { + string excludedWindowText = GetTextFromPointInUnderlyingWindow(screenPoint, options, excludedHandles); + if (!string.IsNullOrWhiteSpace(excludedWindowText)) + return excludedWindowText; + } + TextExtractionCandidate? bestCandidate = null; foreach (Point probePoint in GetPointProbePoints(screenPoint)) @@ -195,9 +269,51 @@ private static string GetTextFromPoint(Point screenPoint, UiAutomationOptions op return bestCandidate?.Text ?? string.Empty; } - private static string GetTextFromRegion(Rect screenRect, UiAutomationOptions options) + private static string GetTextFromPointInUnderlyingWindow( + Point screenPoint, + UiAutomationOptions options, + IReadOnlyCollection excludedHandles) + { + WindowSelectionCandidate? targetWindow = FindPointTargetWindowCandidate(screenPoint, excludedHandles); + if (targetWindow is null || targetWindow.Handle == IntPtr.Zero) + return string.Empty; + + try + { + AutomationElement root = AutomationElement.FromHandle(targetWindow.Handle); + WindowPointCandidate? bestCandidate = null; + + foreach ((AutomationElement element, _) in EnumerateElementsWithDepth(root, options)) + { + if (ShouldSkipElementText(element, options)) + continue; + + if (!TryGetElementBounds(element, options.FilterBounds, out Rect bounds) || !bounds.Contains(screenPoint)) + continue; + + if (!TryCreatePointTextCandidate(element, screenPoint, 0, TextUnit.Line, out TextExtractionCandidate candidate)) + continue; + + WindowPointCandidate windowPointCandidate = new(candidate, Math.Max(1, bounds.Width * bounds.Height)); + if (IsBetterWindowPointCandidate(windowPointCandidate, bestCandidate)) + bestCandidate = windowPointCandidate; + } + + return bestCandidate?.Candidate.Text ?? string.Empty; + } + catch (ElementNotAvailableException) + { + return string.Empty; + } + catch (ArgumentException) + { + return string.Empty; + } + } + + private static string GetTextFromRegion(Rect screenRect, UiAutomationOptions options, IReadOnlyCollection? excludedHandles) { - List candidates = WindowSelectionUtilities.GetCapturableWindows(); + List candidates = WindowSelectionUtilities.GetCapturableWindows(excludedHandles); WindowSelectionCandidate? targetWindow = FindTargetWindowCandidate(screenRect, candidates); if (targetWindow is null) return string.Empty; @@ -246,6 +362,40 @@ private static string GetTextFromWindow(IntPtr windowHandle, UiAutomationOptions } } + private static UiAutomationOverlaySnapshot? GetOverlaySnapshotFromRegion( + Rect screenRect, + UiAutomationOptions options, + IReadOnlyCollection? excludedHandles) + { + if (screenRect == Rect.Empty || screenRect.Width <= 0 || screenRect.Height <= 0) + return null; + + List candidates = WindowSelectionUtilities.GetCapturableWindows(excludedHandles); + WindowSelectionCandidate? targetWindow = FindTargetWindowCandidate(screenRect, candidates); + if (targetWindow is null || targetWindow.Handle == IntPtr.Zero) + return null; + + try + { + AutomationElement root = AutomationElement.FromHandle(targetWindow.Handle); + HashSet seenItems = new(StringComparer.CurrentCulture); + List items = []; + + AppendOverlayItemsFromSamplePoints(root, screenRect, options, seenItems, items); + AppendOverlayItemsFromElementTree(root, options, seenItems, items); + + return new UiAutomationOverlaySnapshot(screenRect, targetWindow, SortOverlayItems(items)); + } + catch (ElementNotAvailableException) + { + return null; + } + catch (ArgumentException) + { + return null; + } + } + private static string ExtractTextFromElementTree(AutomationElement root, UiAutomationOptions options) { HashSet seenText = new(StringComparer.CurrentCulture); @@ -272,6 +422,24 @@ private static void AppendTextFromElementTree( } } + private static void AppendOverlayItemsFromElementTree( + AutomationElement root, + UiAutomationOptions options, + ISet seenItems, + List overlayItems) + { + if (options.PreferFocusedElement) + TryExtractFocusedElementOverlayItems(root, options, seenItems, overlayItems); + + foreach (AutomationElement element in EnumerateElements(root, options)) + { + if (ShouldSkipElementText(element, options)) + continue; + + TryAddOverlayItemsFromElement(element, options, seenItems, overlayItems); + } + } + private static void AppendTextFromSamplePoints( AutomationElement root, Rect selectionRect, @@ -292,6 +460,25 @@ private static void AppendTextFromSamplePoints( } } + private static void AppendOverlayItemsFromSamplePoints( + AutomationElement root, + Rect selectionRect, + UiAutomationOptions options, + ISet seenItems, + List overlayItems) + { + foreach (Point samplePoint in GetSamplePoints(selectionRect)) + { + AutomationElement? element = GetElementAtPoint(samplePoint); + if (element is null || !IsDescendantOrSelf(root, element)) + continue; + + OverlayCandidate? candidate = GetBestPointOverlayCandidate(element, samplePoint, options, TextUnit.Line); + if (candidate is not null) + TryAddUniqueOverlayItem(candidate.Value.Item, seenItems, overlayItems); + } + } + private static string GetBestPointText( AutomationElement element, Point screenPoint, @@ -328,6 +515,33 @@ private static string GetBestPointText( return bestCandidate; } + private static OverlayCandidate? GetBestPointOverlayCandidate( + AutomationElement element, + Point screenPoint, + UiAutomationOptions options, + TextUnit pointTextUnit) + { + OverlayCandidate? bestCandidate = null; + AutomationElement? current = element; + + for (int depth = 0; current is not null && depth <= MaxPointAncestorDepth; depth++) + { + if (!ShouldSkipElementText(current, options) + && TryCreatePointOverlayCandidate(current, screenPoint, depth, pointTextUnit, options.FilterBounds, out OverlayCandidate candidate) + && IsBetterCandidate(candidate, bestCandidate)) + { + bestCandidate = candidate; + + if (candidate.Source == AutomationTextSource.PointTextPattern && candidate.Depth == 0) + break; + } + + current = GetParentElement(current); + } + + return bestCandidate; + } + private static bool TryCreatePointTextCandidate( AutomationElement element, Point screenPoint, @@ -364,6 +578,31 @@ private static bool TryCreatePointTextCandidate( return false; } + private static bool TryCreatePointOverlayCandidate( + AutomationElement element, + Point screenPoint, + int depth, + TextUnit pointTextUnit, + Rect? filterBounds, + out OverlayCandidate candidate) + { + candidate = default; + + if (TryCreatePointTextRangeOverlayItem(element, screenPoint, pointTextUnit, filterBounds, out UiAutomationOverlayItem pointTextItem)) + { + candidate = new(pointTextItem, AutomationTextSource.PointTextPattern, depth); + return true; + } + + if (TryCreateElementBoundsOverlayItem(element, filterBounds, out UiAutomationOverlayItem elementBoundsItem, out AutomationTextSource source)) + { + candidate = new(elementBoundsItem, source, depth); + return true; + } + + return false; + } + private static bool IsBetterCandidate(TextExtractionCandidate candidate, TextExtractionCandidate? currentBest) { if (currentBest is null) @@ -375,6 +614,28 @@ private static bool IsBetterCandidate(TextExtractionCandidate candidate, TextExt return candidate.Depth < currentBest.Value.Depth; } + private static bool IsBetterCandidate(OverlayCandidate candidate, OverlayCandidate? currentBest) + { + if (currentBest is null) + return true; + + if (candidate.Source != currentBest.Value.Source) + return candidate.Source > currentBest.Value.Source; + + return candidate.Depth < currentBest.Value.Depth; + } + + private static bool IsBetterWindowPointCandidate(WindowPointCandidate candidate, WindowPointCandidate? currentBest) + { + if (currentBest is null) + return true; + + if (candidate.Candidate.Source != currentBest.Value.Candidate.Source) + return candidate.Candidate.Source > currentBest.Value.Candidate.Source; + + return candidate.Area < currentBest.Value.Area; + } + private static void TryExtractFocusedElementText( AutomationElement root, UiAutomationOptions options, @@ -398,7 +659,30 @@ private static void TryExtractFocusedElementText( } } - private static IEnumerable EnumerateElements(AutomationElement root, UiAutomationOptions options) + private static void TryExtractFocusedElementOverlayItems( + AutomationElement root, + UiAutomationOptions options, + ISet seenItems, + List overlayItems) + { + try + { + AutomationElement? focusedElement = AutomationElement.FocusedElement; + if (focusedElement is null || !IsDescendantOrSelf(root, focusedElement)) + return; + + if (!ShouldSkipElementText(focusedElement, options)) + TryAddOverlayItemsFromElement(focusedElement, options, seenItems, overlayItems); + } + catch (ElementNotAvailableException) + { + } + catch (InvalidOperationException) + { + } + } + + private static IEnumerable<(AutomationElement Element, int Depth)> EnumerateElementsWithDepth(AutomationElement root, UiAutomationOptions options) { Queue<(AutomationElement Element, int Depth)> queue = new(); queue.Enqueue((root, 0)); @@ -410,7 +694,7 @@ private static IEnumerable EnumerateElements(AutomationElemen while (queue.Count > 0) { (AutomationElement element, int depth) = queue.Dequeue(); - yield return element; + yield return (element, depth); if (depth >= maxDepth) continue; @@ -440,6 +724,12 @@ private static IEnumerable EnumerateElements(AutomationElemen } } + private static IEnumerable EnumerateElements(AutomationElement root, UiAutomationOptions options) + { + foreach ((AutomationElement element, _) in EnumerateElementsWithDepth(root, options)) + yield return element; + } + private static bool ShouldSkipElementText(AutomationElement element, UiAutomationOptions options) { try @@ -622,6 +912,313 @@ private static bool TryExtractValuePatternText(AutomationElement element, out st return false; } + private static void TryAddOverlayItemsFromElement( + AutomationElement element, + UiAutomationOptions options, + ISet seenItems, + List overlayItems) + { + bool hasVisibleTextRanges = options.FilterBounds is Rect filterBounds + && TryAddVisibleTextRangeOverlayItems(element, filterBounds, seenItems, overlayItems); + + if (hasVisibleTextRanges) + return; + + if (TryCreateElementBoundsOverlayItem(element, options.FilterBounds, out UiAutomationOverlayItem overlayItem, out _)) + TryAddUniqueOverlayItem(overlayItem, seenItems, overlayItems); + } + + private static bool TryAddVisibleTextRangeOverlayItems( + AutomationElement element, + Rect filterBounds, + ISet seenItems, + List overlayItems) + { + try + { + if (!element.TryGetCurrentPattern(TextPattern.Pattern, out object pattern) + || pattern is not TextPattern textPattern) + { + return false; + } + + TextPatternRange[] visibleRanges = textPattern.GetVisibleRanges(); + bool createdAnyRange = false; + + foreach (TextPatternRange range in visibleRanges) + { + if (!TryCreateTextRangeOverlayItem(element, range, filterBounds, UiAutomationOverlaySource.VisibleTextRange, out UiAutomationOverlayItem overlayItem)) + continue; + + createdAnyRange = true; + TryAddUniqueOverlayItem(overlayItem, seenItems, overlayItems); + } + + return createdAnyRange; + } + catch (ElementNotAvailableException) + { + return false; + } + catch (InvalidOperationException) + { + return false; + } + } + + private static bool TryCreatePointTextRangeOverlayItem( + AutomationElement element, + Point screenPoint, + TextUnit preferredUnit, + Rect? filterBounds, + out UiAutomationOverlayItem overlayItem) + { + overlayItem = default!; + + try + { + if (!element.TryGetCurrentPattern(TextPattern.Pattern, out object pattern) + || pattern is not TextPattern textPattern) + { + return false; + } + + TextPatternRange range = textPattern.RangeFromPoint(screenPoint); + range.ExpandToEnclosingUnit(preferredUnit); + + if (TryCreateTextRangeOverlayItem(element, range, filterBounds, UiAutomationOverlaySource.PointTextRange, out overlayItem)) + return true; + + if (preferredUnit == TextUnit.Line) + return false; + + range = textPattern.RangeFromPoint(screenPoint); + range.ExpandToEnclosingUnit(TextUnit.Line); + return TryCreateTextRangeOverlayItem(element, range, filterBounds, UiAutomationOverlaySource.PointTextRange, out overlayItem); + } + catch (ArgumentException) + { + return false; + } + catch (ElementNotAvailableException) + { + return false; + } + catch (InvalidOperationException) + { + return false; + } + } + + private static bool TryCreateTextRangeOverlayItem( + AutomationElement element, + TextPatternRange range, + Rect? filterBounds, + UiAutomationOverlaySource source, + out UiAutomationOverlayItem overlayItem) + { + overlayItem = default!; + string text = NormalizeText(range.GetText(-1)); + if (string.IsNullOrWhiteSpace(text)) + return false; + + if (!TryGetRangeBounds(range, filterBounds, out Rect rangeBounds)) + return false; + + GetElementMetadata(element, out string controlTypeProgrammaticName, out string automationId, out string runtimeId); + overlayItem = new UiAutomationOverlayItem(text, rangeBounds, source, controlTypeProgrammaticName, automationId, runtimeId); + return true; + } + + private static bool TryGetRangeBounds(TextPatternRange range, Rect? filterBounds, out Rect bounds) + { + bounds = Rect.Empty; + + try + { + Rect aggregateBounds = Rect.Empty; + + foreach (Rect rectangle in range.GetBoundingRectangles()) + { + if (!TryClipBounds(rectangle, filterBounds, out Rect clippedBounds)) + continue; + + aggregateBounds = aggregateBounds == Rect.Empty ? clippedBounds : Rect.Union(aggregateBounds, clippedBounds); + } + + return TryClipBounds(aggregateBounds, filterBounds, out bounds); + } + catch (InvalidOperationException) + { + return false; + } + } + + private static bool TryCreateElementBoundsOverlayItem( + AutomationElement element, + Rect? filterBounds, + out UiAutomationOverlayItem overlayItem, + out AutomationTextSource source) + { + overlayItem = default!; + source = AutomationTextSource.None; + + if (!TryGetElementBounds(element, filterBounds, out Rect bounds)) + return false; + + string text; + if (TryExtractValuePatternText(element, out string valuePatternText)) + { + text = NormalizeText(valuePatternText); + source = AutomationTextSource.ValuePattern; + } + else if (TryExtractTextPatternText(element, filterBounds, out string textPatternText)) + { + text = NormalizeText(textPatternText); + source = AutomationTextSource.TextPattern; + } + else if (TryExtractNameText(element, out string nameText)) + { + text = NormalizeText(nameText); + source = AutomationTextSource.NameFallback; + } + else + { + return false; + } + + if (string.IsNullOrWhiteSpace(text)) + return false; + + GetElementMetadata(element, out string controlTypeProgrammaticName, out string automationId, out string runtimeId); + overlayItem = new UiAutomationOverlayItem(text, bounds, UiAutomationOverlaySource.ElementBounds, controlTypeProgrammaticName, automationId, runtimeId); + return true; + } + + private static bool TryGetElementBounds(AutomationElement element, Rect? filterBounds, out Rect bounds) + { + bounds = Rect.Empty; + + try + { + return TryClipBounds(element.Current.BoundingRectangle, filterBounds, out bounds); + } + catch (ElementNotAvailableException) + { + return false; + } + catch (InvalidOperationException) + { + return false; + } + } + + private static bool HasVisibleTextDescendant(AutomationElement element) + { + const int maxDepth = 2; + Queue<(AutomationElement Element, int Depth)> queue = new(); + + try + { + AutomationElement? child = TreeWalker.ControlViewWalker.GetFirstChild(element); + while (child is not null) + { + queue.Enqueue((child, 1)); + child = TreeWalker.ControlViewWalker.GetNextSibling(child); + } + } + catch (ElementNotAvailableException) + { + return false; + } + catch (InvalidOperationException) + { + return false; + } + + while (queue.Count > 0) + { + (AutomationElement currentElement, int depth) = queue.Dequeue(); + + try + { + ControlType controlType = currentElement.Current.ControlType; + if (controlType == ControlType.Text + || controlType == ControlType.Edit + || controlType == ControlType.Document) + { + return true; + } + } + catch (ElementNotAvailableException) + { + continue; + } + catch (InvalidOperationException) + { + continue; + } + + if (depth >= maxDepth) + continue; + + try + { + AutomationElement? child = TreeWalker.ControlViewWalker.GetFirstChild(currentElement); + while (child is not null) + { + queue.Enqueue((child, depth + 1)); + child = TreeWalker.ControlViewWalker.GetNextSibling(child); + } + } + catch (ElementNotAvailableException) + { + } + catch (InvalidOperationException) + { + } + } + + return false; + } + + private static void GetElementMetadata( + AutomationElement element, + out string controlTypeProgrammaticName, + out string automationId, + out string runtimeId) + { + controlTypeProgrammaticName = string.Empty; + automationId = string.Empty; + runtimeId = string.Empty; + + try + { + AutomationElement.AutomationElementInformation current = element.Current; + controlTypeProgrammaticName = current.ControlType?.ProgrammaticName ?? string.Empty; + automationId = current.AutomationId ?? string.Empty; + } + catch (ElementNotAvailableException) + { + } + catch (InvalidOperationException) + { + } + + try + { + int[]? rawRuntimeId = element.GetRuntimeId(); + if (rawRuntimeId is { Length: > 0 }) + runtimeId = string.Join('-', rawRuntimeId); + } + catch (ElementNotAvailableException) + { + } + catch (InvalidOperationException) + { + } + } + private static bool TryExtractNameText(AutomationElement element, out string text) { text = string.Empty; @@ -632,6 +1229,9 @@ private static bool TryExtractNameText(AutomationElement element, out string tex if (!ShouldUseNameFallback(current.ControlType)) return false; + if (current.ControlType != ControlType.Text && HasVisibleTextDescendant(element)) + return false; + text = current.Name; return !string.IsNullOrWhiteSpace(text); } From e81c3c52bfdb6a923b9bc25058bb807b5e481dc2 Mon Sep 17 00:00:00 2001 From: Joe Finney Date: Sat, 7 Mar 2026 15:56:44 -0600 Subject: [PATCH 07/13] Add comprehensive unit tests for capture and UI automation Expanded test coverage for CaptureLanguageUtilities and UIAutomationUtilities, including new tests for RequiresLiveUiAutomationSource, TryClipBounds, TryAddUniqueOverlayItem, and SortOverlayItems. Added ImageMethodsTests to verify ImageSourceToBitmap behavior. Updated ShouldUseNameFallback tests and improved using directives. --- Tests/CaptureLanguageUtilitiesTests.cs | 33 ++++++++++++++ Tests/ImageMethodsTests.cs | 49 ++++++++++++++++++++ Tests/UiAutomationUtilitiesTests.cs | 62 +++++++++++++++++++++++++- 3 files changed, 142 insertions(+), 2 deletions(-) create mode 100644 Tests/ImageMethodsTests.cs diff --git a/Tests/CaptureLanguageUtilitiesTests.cs b/Tests/CaptureLanguageUtilitiesTests.cs index ccb5ba82..f54a88eb 100644 --- a/Tests/CaptureLanguageUtilitiesTests.cs +++ b/Tests/CaptureLanguageUtilitiesTests.cs @@ -48,4 +48,37 @@ public void SupportsTableOutput_ReturnsFalseForUiAutomation() { Assert.False(CaptureLanguageUtilities.SupportsTableOutput(new UiAutomationLang())); } + + [Fact] + public void RequiresLiveUiAutomationSource_ReturnsTrueForStaticUiAutomationWithoutSnapshot() + { + bool requiresLiveSource = CaptureLanguageUtilities.RequiresLiveUiAutomationSource( + new UiAutomationLang(), + isStaticImageSource: true, + hasFrozenUiAutomationSnapshot: false); + + Assert.True(requiresLiveSource); + } + + [Fact] + public void RequiresLiveUiAutomationSource_ReturnsFalseWhenFrozenSnapshotExists() + { + bool requiresLiveSource = CaptureLanguageUtilities.RequiresLiveUiAutomationSource( + new UiAutomationLang(), + isStaticImageSource: true, + hasFrozenUiAutomationSnapshot: true); + + Assert.False(requiresLiveSource); + } + + [Fact] + public void RequiresLiveUiAutomationSource_ReturnsFalseForOcrLanguageOnStaticImage() + { + bool requiresLiveSource = CaptureLanguageUtilities.RequiresLiveUiAutomationSource( + new GlobalLang("en-US"), + isStaticImageSource: true, + hasFrozenUiAutomationSnapshot: false); + + Assert.False(requiresLiveSource); + } } diff --git a/Tests/ImageMethodsTests.cs b/Tests/ImageMethodsTests.cs new file mode 100644 index 00000000..4d166e39 --- /dev/null +++ b/Tests/ImageMethodsTests.cs @@ -0,0 +1,49 @@ +using System.Drawing; +using System.Windows; +using System.Windows.Media; +using System.Windows.Media.Imaging; +using Text_Grab; + +namespace Tests; + +public class ImageMethodsTests +{ + [WpfFact] + public void ImageSourceToBitmap_ConvertsBitmapSourceDerivedImages() + { + byte[] pixels = + [ + 0, 0, 255, 255, + 0, 255, 0, 255, + 255, 0, 0, 255, + 255, 255, 255, 255 + ]; + + BitmapSource source = BitmapSource.Create( + 2, + 2, + 96, + 96, + PixelFormats.Bgra32, + null, + pixels, + 8); + CroppedBitmap cropped = new(source, new Int32Rect(1, 0, 1, 2)); + + using Bitmap? bitmap = ImageMethods.ImageSourceToBitmap(cropped); + + Assert.NotNull(bitmap); + Assert.Equal(1, bitmap!.Width); + Assert.Equal(2, bitmap.Height); + } + + [WpfFact] + public void ImageSourceToBitmap_ReturnsNullForNonBitmapImageSources() + { + DrawingImage drawingImage = new(); + + Bitmap? bitmap = ImageMethods.ImageSourceToBitmap(drawingImage); + + Assert.Null(bitmap); + } +} diff --git a/Tests/UiAutomationUtilitiesTests.cs b/Tests/UiAutomationUtilitiesTests.cs index 601b2539..7d073e32 100644 --- a/Tests/UiAutomationUtilitiesTests.cs +++ b/Tests/UiAutomationUtilitiesTests.cs @@ -1,3 +1,4 @@ +using System.Linq; using System.Windows; using System.Windows.Automation; using Text_Grab.Models; @@ -62,14 +63,18 @@ public void ShouldUseNameFallback_SkipsStructuralControls() Assert.False(UIAutomationUtilities.ShouldUseNameFallback(ControlType.Group)); Assert.False(UIAutomationUtilities.ShouldUseNameFallback(ControlType.Pane)); Assert.False(UIAutomationUtilities.ShouldUseNameFallback(ControlType.Custom)); + Assert.False(UIAutomationUtilities.ShouldUseNameFallback(ControlType.Button)); + Assert.False(UIAutomationUtilities.ShouldUseNameFallback(ControlType.SplitButton)); + Assert.False(UIAutomationUtilities.ShouldUseNameFallback(ControlType.ComboBox)); } [Fact] - public void ShouldUseNameFallback_AllowsLeafControls() + public void ShouldUseNameFallback_AllowsVisibleTextContainers() { Assert.True(UIAutomationUtilities.ShouldUseNameFallback(ControlType.Text)); - Assert.True(UIAutomationUtilities.ShouldUseNameFallback(ControlType.Button)); Assert.True(UIAutomationUtilities.ShouldUseNameFallback(ControlType.ListItem)); + Assert.True(UIAutomationUtilities.ShouldUseNameFallback(ControlType.MenuItem)); + Assert.True(UIAutomationUtilities.ShouldUseNameFallback(ControlType.TabItem)); } [Fact] @@ -104,4 +109,57 @@ public void GetPointProbePoints_ReturnsCenterThenCrosshairNeighbors() Assert.Contains(new Point(25, 38), probePoints); Assert.Contains(new Point(25, 42), probePoints); } + + [Fact] + public void TryClipBounds_ReturnsIntersectionForOverlappingRects() + { + bool clipped = UIAutomationUtilities.TryClipBounds( + new Rect(10, 10, 50, 50), + new Rect(30, 25, 50, 50), + out Rect result); + + Assert.True(clipped); + Assert.Equal(new Rect(30, 25, 30, 35), result); + } + + [Fact] + public void TryClipBounds_ReturnsFalseWhenBoundsDoNotIntersect() + { + bool clipped = UIAutomationUtilities.TryClipBounds( + new Rect(10, 10, 20, 20), + new Rect(100, 100, 20, 20), + out Rect result); + + Assert.False(clipped); + Assert.Equal(Rect.Empty, result); + } + + [Fact] + public void TryAddUniqueOverlayItem_DeduplicatesNormalizedTextAndBounds() + { + HashSet seen = []; + List output = []; + UiAutomationOverlayItem first = new(" Hello world ", new Rect(10.01, 20.01, 30.01, 40.01), UiAutomationOverlaySource.ElementBounds); + UiAutomationOverlayItem second = new("Hello world", new Rect(10.04, 20.04, 30.04, 40.04), UiAutomationOverlaySource.VisibleTextRange); + + bool addedFirst = UIAutomationUtilities.TryAddUniqueOverlayItem(first, seen, output); + bool addedSecond = UIAutomationUtilities.TryAddUniqueOverlayItem(second, seen, output); + + Assert.True(addedFirst); + Assert.False(addedSecond); + Assert.Single(output); + } + + [Fact] + public void SortOverlayItems_OrdersTopThenLeft() + { + IReadOnlyList sorted = UIAutomationUtilities.SortOverlayItems( + [ + new UiAutomationOverlayItem("Bottom", new Rect(40, 30, 10, 10), UiAutomationOverlaySource.ElementBounds), + new UiAutomationOverlayItem("Right", new Rect(25, 10, 10, 10), UiAutomationOverlaySource.ElementBounds), + new UiAutomationOverlayItem("Left", new Rect(10, 10, 10, 10), UiAutomationOverlaySource.ElementBounds), + ]); + + Assert.Equal(["Left", "Right", "Bottom"], sorted.Select(item => item.Text)); + } } From ca7cefa3540a9d87c40efc97e71a919e1293c01f Mon Sep 17 00:00:00 2001 From: Joe Finney Date: Sat, 7 Mar 2026 15:56:59 -0600 Subject: [PATCH 08/13] Add UI Automation overlay support to GrabFrame window Enables rendering of UI Automation overlays in the GrabFrame window, allowing users to view and interact with detected UI elements when a UI Automation language is selected. Adds logic to capture overlays, render them as word borders, and fall back to OCR when overlays are unavailable. Introduces user feedback messaging for unsupported scenarios, improves language selection synchronization, and refactors word border management. Updates XAML to include a message border for user notifications. Also fixes bitmap handling and ensures robust state management when switching between live and static image modes. --- .../Views/FullscreenGrab.SelectionStyles.cs | 17 +- Text-Grab/Views/GrabFrame.xaml | 26 ++ Text-Grab/Views/GrabFrame.xaml.cs | 391 +++++++++++++++--- 3 files changed, 367 insertions(+), 67 deletions(-) diff --git a/Text-Grab/Views/FullscreenGrab.SelectionStyles.cs b/Text-Grab/Views/FullscreenGrab.SelectionStyles.cs index 33fdaa40..c518740e 100644 --- a/Text-Grab/Views/FullscreenGrab.SelectionStyles.cs +++ b/Text-Grab/Views/FullscreenGrab.SelectionStyles.cs @@ -978,10 +978,16 @@ private Rect GetHistoryPositionRect(FullscreenCaptureResult selection) return ImageMethods.BitmapToImageSource(capturedBitmap); } - private Task PlaceGrabFrameInSelectionRectAsync(FullscreenCaptureResult selection) + private async Task PlaceGrabFrameInSelectionRectAsync(FullscreenCaptureResult selection) { BitmapSource? frozenImage = GetBitmapSourceForGrabFrame(selection); - GrabFrame grabFrame = frozenImage is not null ? new GrabFrame(frozenImage) : new GrabFrame(); + ILanguage selectedLanguage = LanguagesComboBox.SelectedItem as ILanguage ?? LanguageUtilities.GetOCRLanguage(); + IntPtr fullscreenGrabHandle = new WindowInteropHelper(this).Handle; + IReadOnlyCollection? excludedHandles = fullscreenGrabHandle == IntPtr.Zero ? null : [fullscreenGrabHandle]; + UiAutomationOverlaySnapshot? uiAutomationSnapshot = selectedLanguage is UiAutomationLang + ? await UIAutomationUtilities.GetOverlaySnapshotFromRegionAsync(selection.CaptureRegion, excludedHandles) + : null; + GrabFrame grabFrame = frozenImage is not null ? new GrabFrame(frozenImage, uiAutomationSnapshot) : new GrabFrame(); DpiScale dpi = VisualTreeHelper.GetDpi(this); Rect selectionRect = new( @@ -1008,7 +1014,6 @@ private Task PlaceGrabFrameInSelectionRectAsync(FullscreenCaptureResult selectio DisposeBitmapSource(BackgroundImage); WindowUtilities.CloseAllFullscreenGrabs(); - return Task.CompletedTask; } private static bool IsTemplateAction(ButtonInfo action) => action.ClickEvent == "ApplyTemplate_Click"; @@ -1029,6 +1034,8 @@ private async Task CommitSelectionAsync(FullscreenCaptureResult selection, bool bool isSingleLine = SingleLineMenuItem is not null && SingleLineMenuItem.IsChecked; bool isTable = TableMenuItem is not null && TableMenuItem.IsChecked; TextFromOCR = string.Empty; + IntPtr fullscreenGrabHandle = new WindowInteropHelper(this).Handle; + IReadOnlyCollection? excludedHandles = fullscreenGrabHandle == IntPtr.Zero ? null : [fullscreenGrabHandle]; if (isSmallClick && selection.SelectionStyle == FsgSelectionStyle.Region) { @@ -1047,7 +1054,7 @@ private async Task CommitSelectionAsync(FullscreenCaptureResult selection, bool } else if (selectedOcrLang is UiAutomationLang) { - TextFromOCR = await OcrUtilities.GetTextFromAbsoluteRectAsync(selection.CaptureRegion, selectedOcrLang); + TextFromOCR = await OcrUtilities.GetTextFromAbsoluteRectAsync(selection.CaptureRegion, selectedOcrLang, excludedHandles); } else if (selection.CapturedImage is not null) { @@ -1062,7 +1069,7 @@ private async Task CommitSelectionAsync(FullscreenCaptureResult selection, bool } else { - TextFromOCR = await OcrUtilities.GetTextFromAbsoluteRectAsync(selection.CaptureRegion, selectedOcrLang); + TextFromOCR = await OcrUtilities.GetTextFromAbsoluteRectAsync(selection.CaptureRegion, selectedOcrLang, excludedHandles); } if (DefaultSettings.UseHistory && !isSmallClick) diff --git a/Text-Grab/Views/GrabFrame.xaml b/Text-Grab/Views/GrabFrame.xaml index a768d5ea..7e43a8f4 100644 --- a/Text-Grab/Views/GrabFrame.xaml +++ b/Text-Grab/Views/GrabFrame.xaml @@ -615,6 +615,32 @@ + + + + + + diff --git a/Text-Grab/Views/GrabFrame.xaml.cs b/Text-Grab/Views/GrabFrame.xaml.cs index cfaf042d..949000f6 100644 --- a/Text-Grab/Views/GrabFrame.xaml.cs +++ b/Text-Grab/Views/GrabFrame.xaml.cs @@ -68,6 +68,9 @@ public partial class GrabFrame : Window private bool isStaticImageSource = false; private readonly Dictionary movingWordBordersDictionary = []; private IOcrLinesWords? ocrResultOfWindow; + private UiAutomationOverlaySnapshot? frozenUiAutomationSnapshot; + private UiAutomationOverlaySnapshot? liveUiAutomationSnapshot; + private readonly DispatcherTimer frameMessageTimer = new(); private readonly DispatcherTimer reDrawTimer = new(); private readonly DispatcherTimer reSearchTimer = new(); private Side resizingSide = Side.None; @@ -75,6 +78,7 @@ public partial class GrabFrame : Window private Point startingMovingPoint; private readonly UndoRedo UndoRedo = new(); private bool wasAltHeld = false; + private bool isSyncingLanguageSelection = false; private double windowFrameImageScale = 1; private readonly ObservableCollection wordBorders = []; private static readonly Settings DefaultSettings = AppUtilities.TextGrabSettings; @@ -141,10 +145,11 @@ public GrabFrame(string imagePath) /// /// Creates a GrabFrame pre-loaded with a frozen image cropped from a Fullscreen Grab selection. - /// The frame opens in freeze mode showing the provided bitmap and immediately runs OCR. + /// The frame opens in freeze mode showing the provided bitmap and can render either OCR results + /// or a pre-captured UI Automation snapshot, depending on the selected language. /// /// The cropped bitmap to display as the initial frozen background. - public GrabFrame(BitmapSource frozenImage) + public GrabFrame(BitmapSource frozenImage, UiAutomationOverlaySnapshot? uiAutomationSnapshot = null) { StandardInitialize(); @@ -152,6 +157,7 @@ public GrabFrame(BitmapSource frozenImage) frameContentImageSource = frozenImage; hasLoadedImageSource = true; isStaticImageSource = true; + frozenUiAutomationSnapshot = uiAutomationSnapshot; Loaded += (s, e) => { @@ -269,7 +275,10 @@ private async Task LoadContentFromHistory(HistoryInfo history) { FrameText = history.TextContent; currentLanguage = history.OcrLanguage; + SyncLanguageComboBoxSelection(currentLanguage); isStaticImageSource = true; + frozenUiAutomationSnapshot = null; + liveUiAutomationSnapshot = null; string imageName = Path.GetFileName(history.ImagePath); @@ -289,7 +298,14 @@ private async Task LoadContentFromHistory(HistoryInfo history) hasLoadedImageSource = true; GrabFrameImage.Source = frameContentImageSource; FreezeGrabFrame(); - NotifyIfUiAutomationNeedsLiveSource(currentLanguage); + + List? wbInfoList = null; + + if (!string.IsNullOrWhiteSpace(history.WordBorderInfoJson)) + wbInfoList = JsonSerializer.Deserialize>(history.WordBorderInfoJson); + + if (wbInfoList is not { Count: > 0 }) + NotifyIfUiAutomationNeedsLiveSource(currentLanguage); if (history.PositionRect != Rect.Empty) { @@ -309,11 +325,6 @@ private async Task LoadContentFromHistory(HistoryInfo history) } } - List? wbInfoList = null; - - if (!string.IsNullOrWhiteSpace(history.WordBorderInfoJson)) - wbInfoList = JsonSerializer.Deserialize>(history.WordBorderInfoJson); - if (wbInfoList is not null && wbInfoList.Count > 0) { ScaleHistoryWordBordersToCanvas(history, wbInfoList); @@ -487,6 +498,9 @@ private void StandardInitialize() translationTimer.Interval = new(0, 0, 0, 0, 1000); translationTimer.Tick += TranslationTimer_Tick; + frameMessageTimer.Interval = TimeSpan.FromSeconds(4); + frameMessageTimer.Tick += FrameMessageTimer_Tick; + _ = UndoRedo.HasUndoOperations(); _ = UndoRedo.HasRedoOperations(); @@ -496,6 +510,55 @@ private void StandardInitialize() DataContext = this; } + private void FrameMessageTimer_Tick(object? sender, EventArgs e) + { + frameMessageTimer.Stop(); + HideFrameMessage(); + } + + private void HideFrameMessage() + { + FrameMessageBorder.Visibility = Visibility.Collapsed; + FrameMessageTextBlock.Text = string.Empty; + } + + private void ShowFrameMessage(string message) + { + if (string.IsNullOrWhiteSpace(message)) + return; + + FrameMessageTextBlock.Text = message; + FrameMessageBorder.Visibility = Visibility.Visible; + frameMessageTimer.Stop(); + frameMessageTimer.Start(); + } + + private void SyncLanguageComboBoxSelection(ILanguage language) + { + if (LanguagesComboBox.Items.Count == 0) + return; + + List availableLanguages = [.. LanguagesComboBox.Items.OfType()]; + int selectedIndex = CaptureLanguageUtilities.FindPreferredLanguageIndex( + availableLanguages, + language.LanguageTag, + language); + + if (selectedIndex < 0 || LanguagesComboBox.SelectedIndex == selectedIndex) + return; + + isSyncingLanguageSelection = true; + try + { + LanguagesComboBox.SelectedIndex = selectedIndex; + currentLanguage = availableLanguages[selectedIndex]; + } + finally + { + isSyncingLanguageSelection = false; + } + } + #endregion Constructors #region Properties @@ -551,10 +614,7 @@ public static bool CheckKey(VirtualKeyCodes code) public HistoryInfo AsHistoryItem() { - System.Drawing.Bitmap? bitmap = null; - - if (frameContentImageSource is BitmapImage image) - bitmap = ImageMethods.BitmapImageToBitmap(image); + System.Drawing.Bitmap? bitmap = ImageMethods.ImageSourceToBitmap(frameContentImageSource); List wbInfoList = []; @@ -727,6 +787,9 @@ public void GrabFrame_Unloaded(object sender, RoutedEventArgs e) reDrawTimer.Stop(); reDrawTimer.Tick -= ReDrawTimer_Tick; + frameMessageTimer.Stop(); + frameMessageTimer.Tick -= FrameMessageTimer_Tick; + translationTimer.Stop(); translationTimer.Tick -= TranslationTimer_Tick; translationSemaphore.Dispose(); @@ -967,7 +1030,10 @@ private async void AddNewWordBorder(Border selectBorder) rect = new(rect.X + 4, rect.Y, (rect.Width * dpi.DpiScaleX) + 10, rect.Height * dpi.DpiScaleY); // Language language = CurrentLanguage.AsLanguage() ?? LanguageUtilities.GetCurrentInputLanguage().AsLanguage() ?? new Language("en-US"); ILanguage language = CurrentLanguage ?? LanguageUtilities.GetCurrentInputLanguage(); - string ocrText = await OcrUtilities.GetTextFromAbsoluteRectAsync(rect.GetScaleSizeByFraction(viewBoxZoomFactor), language); + string ocrText = await OcrUtilities.GetTextFromAbsoluteRectAsync( + rect.GetScaleSizeByFraction(viewBoxZoomFactor), + language, + GetUiAutomationExcludedHandles()); if (language is not UiAutomationLang && DefaultSettings.CorrectErrors) ocrText = ocrText.TryFixEveryWordLetterNumberErrors(); @@ -1221,7 +1287,78 @@ private void DeleteWordBordersExecuted(object sender, ExecutedRoutedEventArgs? e reSearchTimer.Start(); } - private async Task DrawRectanglesAroundWords(string searchWord = "") + private void ClearRenderedWordBorders() + { + RectanglesCanvas.Children.Clear(); + wordBorders.Clear(); + } + + private IReadOnlyCollection? GetUiAutomationExcludedHandles() + { + IntPtr handle = new System.Windows.Interop.WindowInteropHelper(this).Handle; + return handle == IntPtr.Zero ? null : [handle]; + } + + private (double ViewBoxZoomFactor, double BorderToCanvasX, double BorderToCanvasY) GetOverlayRenderMetrics() + { + double viewBoxZoomFactor = CanvasViewBox.GetHorizontalScaleFactor(); + if (!double.IsFinite(viewBoxZoomFactor) || viewBoxZoomFactor <= 0 || viewBoxZoomFactor > 4) + viewBoxZoomFactor = 1; + + Point canvasOriginInBorder = RectanglesCanvas.TranslatePoint(new Point(0, 0), RectanglesBorder); + return (viewBoxZoomFactor, -canvasOriginInBorder.X, -canvasOriginInBorder.Y); + } + + private WordBorder CreateWordBorderFromSourceRect( + Windows.Foundation.Rect sourceRect, + double sourceScale, + string text, + int lineNumber, + SolidColorBrush backgroundBrush, + DpiScale dpi, + double viewBoxZoomFactor, + double borderToCanvasX, + double borderToCanvasY) + { + return new() + { + Width = ((sourceRect.Width / (dpi.DpiScaleX * sourceScale)) + 2) / viewBoxZoomFactor, + Height = ((sourceRect.Height / (dpi.DpiScaleY * sourceScale)) + 2) / viewBoxZoomFactor, + Top = ((sourceRect.Y / (dpi.DpiScaleY * sourceScale) - 1) + borderToCanvasY) / viewBoxZoomFactor, + Left = ((sourceRect.X / (dpi.DpiScaleX * sourceScale) - 1) + borderToCanvasX) / viewBoxZoomFactor, + Word = text, + OwnerGrabFrame = this, + LineNumber = lineNumber, + IsFromEditWindow = IsFromEditWindow, + MatchingBackground = backgroundBrush, + }; + } + + private void AddRenderedWordBorder(WordBorder wordBorderBox) + { + if (!IsOcrValid) + return; + + wordBorders.Add(wordBorderBox); + _ = RectanglesCanvas.Children.Add(wordBorderBox); + + UndoRedo.InsertUndoRedoOperation(UndoRedoOperation.AddWordBorder, + new GrabFrameOperationArgs() + { + WordBorder = wordBorderBox, + WordBorders = wordBorders, + GrabFrameCanvas = RectanglesCanvas + }); + } + + private Task DrawRectanglesAroundWords(string searchWord = "") + { + return CurrentLanguage is UiAutomationLang + ? DrawUiAutomationRectanglesAsync(searchWord) + : DrawOcrRectanglesAsync(searchWord); + } + + private async Task DrawOcrRectanglesAsync(string searchWord = "") { if (isDrawing || IsDragOver) return; @@ -1232,8 +1369,7 @@ private async Task DrawRectanglesAroundWords(string searchWord = "") if (string.IsNullOrWhiteSpace(searchWord)) searchWord = SearchBox.Text; - RectanglesCanvas.Children.Clear(); - wordBorders.Clear(); + ClearRenderedWordBorders(); DpiScale dpi = VisualTreeHelper.GetDpi(this); System.Drawing.Rectangle rectCanvasSize = GetContentAreaScreenRect(); @@ -1255,22 +1391,22 @@ private async Task DrawRectanglesAroundWords(string searchWord = "") isSpaceJoining = CurrentLanguage!.IsSpaceJoining(); - System.Drawing.Bitmap? bmp = Singleton.Instance.CachedBitmap; + System.Drawing.Bitmap? bmp = null; bool shouldDisposeBmp = false; - if (bmp is null && frameContentImageSource is BitmapSource bmpImg) + if (isStaticImageSource && frameContentImageSource is BitmapSource bmpImg) { bmp = ImageMethods.BitmapSourceToBitmap(bmpImg); shouldDisposeBmp = true; } + else + { + bmp = ImageMethods.GetRegionOfScreenAsBitmap(rectCanvasSize, cacheResult: false); + shouldDisposeBmp = true; + } int lineNumber = 0; - double viewBoxZoomFactor = CanvasViewBox.GetHorizontalScaleFactor(); - if (!double.IsFinite(viewBoxZoomFactor) || viewBoxZoomFactor <= 0 || viewBoxZoomFactor > 4) - viewBoxZoomFactor = 1; - Point canvasOriginInBorder = RectanglesCanvas.TranslatePoint(new Point(0, 0), RectanglesBorder); - double borderToCanvasX = -canvasOriginInBorder.X; - double borderToCanvasY = -canvasOriginInBorder.Y; + (double viewBoxZoomFactor, double borderToCanvasX, double borderToCanvasY) = GetOverlayRenderMetrics(); foreach (IOcrLine ocrLine in ocrResultOfWindow.Lines) { @@ -1293,18 +1429,16 @@ private async Task DrawRectanglesAroundWords(string searchWord = "") if (DefaultSettings.CorrectToLatin) ocrText = ocrText.ReplaceGreekOrCyrillicWithLatin(); - WordBorder wordBorderBox = new() - { - Width = ((lineRect.Width / (dpi.DpiScaleX * windowFrameImageScale)) + 2) / viewBoxZoomFactor, - Height = ((lineRect.Height / (dpi.DpiScaleY * windowFrameImageScale)) + 2) / viewBoxZoomFactor, - Top = ((lineRect.Y / (dpi.DpiScaleY * windowFrameImageScale) - 1) + borderToCanvasY) / viewBoxZoomFactor, - Left = ((lineRect.X / (dpi.DpiScaleX * windowFrameImageScale) - 1) + borderToCanvasX) / viewBoxZoomFactor, - Word = ocrText, - OwnerGrabFrame = this, - LineNumber = lineNumber, - IsFromEditWindow = IsFromEditWindow, - MatchingBackground = backgroundBrush, - }; + WordBorder wordBorderBox = CreateWordBorderFromSourceRect( + lineRect, + windowFrameImageScale, + ocrText, + lineNumber, + backgroundBrush, + dpi, + viewBoxZoomFactor, + borderToCanvasX, + borderToCanvasY); if (CurrentLanguage!.IsRightToLeft()) { @@ -1314,19 +1448,7 @@ private async Task DrawRectanglesAroundWords(string searchWord = "") wordBorderBox.Word = sb.ToString(); } - if (IsOcrValid) - { - wordBorders.Add(wordBorderBox); - _ = RectanglesCanvas.Children.Add(wordBorderBox); - - UndoRedo.InsertUndoRedoOperation(UndoRedoOperation.AddWordBorder, - new GrabFrameOperationArgs() - { - WordBorder = wordBorderBox, - WordBorders = wordBorders, - GrabFrameCanvas = RectanglesCanvas - }); - } + AddRenderedWordBorder(wordBorderBox); lineNumber++; } @@ -1353,6 +1475,114 @@ private async Task DrawRectanglesAroundWords(string searchWord = "") } } + private async Task DrawUiAutomationRectanglesAsync(string searchWord = "") + { + if (isDrawing || IsDragOver) + return; + + isDrawing = true; + IsOcrValid = true; + + if (string.IsNullOrWhiteSpace(searchWord)) + searchWord = SearchBox.Text; + + ClearRenderedWordBorders(); + + DpiScale dpi = VisualTreeHelper.GetDpi(this); + System.Drawing.Rectangle rectCanvasSize = GetContentAreaScreenRect(); + if (rectCanvasSize.Width <= 0 || rectCanvasSize.Height <= 0) + { + isDrawing = false; + reDrawTimer.Start(); + return; + } + + UiAutomationOverlaySnapshot? overlaySnapshot = null; + if (isStaticImageSource && frozenUiAutomationSnapshot is not null) + { + overlaySnapshot = frozenUiAutomationSnapshot; + } + else + { + liveUiAutomationSnapshot = await UIAutomationUtilities.GetOverlaySnapshotFromRegionAsync( + new Rect(rectCanvasSize.X, rectCanvasSize.Y, rectCanvasSize.Width, rectCanvasSize.Height), + GetUiAutomationExcludedHandles()); + overlaySnapshot = liveUiAutomationSnapshot; + } + + if (overlaySnapshot is null || overlaySnapshot.Items.Count == 0) + { + isDrawing = false; + + if (DefaultSettings.UiAutomationFallbackToOcr) + { + await DrawOcrRectanglesAsync(searchWord); + return; + } + + reSearchTimer.Start(); + return; + } + + System.Drawing.Bitmap? bmp = Singleton.Instance.CachedBitmap; + bool shouldDisposeBmp = false; + + if (bmp is null && frameContentImageSource is BitmapSource bmpImg) + { + bmp = ImageMethods.BitmapSourceToBitmap(bmpImg); + shouldDisposeBmp = true; + } + + (double viewBoxZoomFactor, double borderToCanvasX, double borderToCanvasY) = GetOverlayRenderMetrics(); + Rect sourceBounds = overlaySnapshot.CaptureBounds; + int lineNumber = 0; + + foreach (UiAutomationOverlayItem overlayItem in overlaySnapshot.Items) + { + Rect relativeBounds = new( + overlayItem.ScreenBounds.X - sourceBounds.X, + overlayItem.ScreenBounds.Y - sourceBounds.Y, + overlayItem.ScreenBounds.Width, + overlayItem.ScreenBounds.Height); + + if (relativeBounds == Rect.Empty || relativeBounds.Width < 1 || relativeBounds.Height < 1) + continue; + + Windows.Foundation.Rect sourceRect = new(relativeBounds.X, relativeBounds.Y, relativeBounds.Width, relativeBounds.Height); + SolidColorBrush backgroundBrush = new(Colors.Black); + + if (bmp is not null) + backgroundBrush = GetBackgroundBrushFromBitmap(ref dpi, 1, bmp, ref sourceRect); + + WordBorder wordBorderBox = CreateWordBorderFromSourceRect( + sourceRect, + 1, + overlayItem.Text, + lineNumber, + backgroundBrush, + dpi, + viewBoxZoomFactor, + borderToCanvasX, + borderToCanvasY); + + AddRenderedWordBorder(wordBorderBox); + lineNumber++; + } + + isDrawing = false; + + if (shouldDisposeBmp) + bmp?.Dispose(); + + reSearchTimer.Start(); + + if (isTranslationEnabled && WindowsAiUtilities.CanDeviceUseWinAI()) + { + translationTimer.Stop(); + translationTimer.Start(); + } + } + private void EditMatchesMenuItem_Click(object sender, RoutedEventArgs e) { List selectedWords = [.. wordBorders.Where(m => m.IsSelected)]; @@ -1471,6 +1701,7 @@ private void FreezeGrabFrame() else { isStaticImageSource = false; + frozenUiAutomationSnapshot = null; frameContentImageSource = ImageMethods.GetWindowBoundsImage(this); GrabFrameImage.Source = frameContentImageSource; } @@ -1879,7 +2110,10 @@ private void LanguagesComboBox_MouseDown(object sender, MouseButtonEventArgs e) private void NotifyIfUiAutomationNeedsLiveSource(ILanguage language) { - if (language is not UiAutomationLang || !isStaticImageSource) + if (!CaptureLanguageUtilities.RequiresLiveUiAutomationSource( + language, + isStaticImageSource, + frozenUiAutomationSnapshot is not null)) return; string message = DefaultSettings.UiAutomationFallbackToOcr @@ -1891,11 +2125,20 @@ private void NotifyIfUiAutomationNeedsLiveSource(ILanguage language) private void LanguagesComboBox_SelectionChanged(object sender, SelectionChangedEventArgs e) { - if (!isLanguageBoxLoaded - || sender is not ComboBox langComboBox + if (sender is not ComboBox langComboBox || langComboBox.SelectedItem is not ILanguage pickedLang) return; + if (isSyncingLanguageSelection) + { + currentLanguage = pickedLang; + return; + } + + if (!isLanguageBoxLoaded) + return; + + HideFrameMessage(); currentLanguage = pickedLang; CaptureLanguageUtilities.PersistSelectedLanguage(pickedLang); NotifyIfUiAutomationNeedsLiveSource(pickedLang); @@ -1915,15 +2158,24 @@ private async Task LoadOcrLanguagesAsync() foreach (ILanguage language in availableLanguages) LanguagesComboBox.Items.Add(language); + ILanguage preferredLanguage = currentLanguage ?? LanguageUtilities.GetOCRLanguage(); int selectedIndex = CaptureLanguageUtilities.FindPreferredLanguageIndex( availableLanguages, - DefaultSettings.LastUsedLang, - LanguageUtilities.GetOCRLanguage()); + currentLanguage?.LanguageTag ?? DefaultSettings.LastUsedLang, + preferredLanguage); if (selectedIndex >= 0) { - LanguagesComboBox.SelectedIndex = selectedIndex; - currentLanguage = availableLanguages[selectedIndex]; + isSyncingLanguageSelection = true; + try + { + LanguagesComboBox.SelectedIndex = selectedIndex; + currentLanguage = availableLanguages[selectedIndex]; + } + finally + { + isSyncingLanguageSelection = false; + } } isLanguageBoxLoaded = true; @@ -2066,6 +2318,8 @@ private async void PasteExecuted(object sender, ExecutedRoutedEventArgs? e = nul hasLoadedImageSource = true; isStaticImageSource = true; + frozenUiAutomationSnapshot = null; + liveUiAutomationSnapshot = null; FreezeToggleButton.IsChecked = true; FreezeGrabFrame(); FreezeToggleButton.Visibility = Visibility.Collapsed; @@ -2277,6 +2531,16 @@ private async void ReDrawTimer_Tick(object? sender, EventArgs? e) private async void RefreshBTN_Click(object? sender = null, RoutedEventArgs? e = null) { + if (CaptureLanguageUtilities.RequiresLiveUiAutomationSource( + CurrentLanguage, + isStaticImageSource, + frozenUiAutomationSnapshot is not null)) + { + ShowFrameMessage("Cannot use UI Automation on a saved image. Switch to an OCR language to refresh."); + return; + } + + HideFrameMessage(); reDrawTimer.Stop(); UndoRedo.StartTransaction(); @@ -2298,8 +2562,7 @@ private async void RefreshBTN_Click(object? sender = null, RoutedEventArgs? e = RectanglesCanvas.RenderTransform = Transform.Identity; IsOcrValid = false; ocrResultOfWindow = null; - RectanglesCanvas.Children.Clear(); - wordBorders.Clear(); + ClearRenderedWordBorders(); MatchesTXTBLK.Text = "- Matches"; UpdateFrameText(); @@ -2419,12 +2682,12 @@ private void ResetGrabFrame() GrabFrameImage.ClearValue(HeightProperty); IsOcrValid = false; ocrResultOfWindow = null; + liveUiAutomationSnapshot = null; if (!hasLoadedImageSource) frameContentImageSource = null; - RectanglesCanvas.Children.Clear(); - wordBorders.Clear(); + ClearRenderedWordBorders(); MatchesTXTBLK.Text = "- Matches"; UpdateFrameText(); } @@ -2817,6 +3080,8 @@ private async Task TryLoadImageFromPath(string path) frameContentImageSource = droppedImage; hasLoadedImageSource = true; isStaticImageSource = true; + frozenUiAutomationSnapshot = null; + liveUiAutomationSnapshot = null; _currentImagePath = path; FreezeToggleButton.IsChecked = true; FreezeGrabFrame(); @@ -2957,6 +3222,9 @@ private void UnfreezeGrabFrame() { reDrawTimer.Stop(); hasLoadedImageSource = false; + isStaticImageSource = false; + frozenUiAutomationSnapshot = null; + liveUiAutomationSnapshot = null; ResetGrabFrame(); Topmost = true; GrabFrameImage.Opacity = 0; @@ -3211,8 +3479,7 @@ private void InvertColorsMI_Click(object sender, RoutedEventArgs e) }); reDrawTimer.Stop(); - RectanglesCanvas.Children.Clear(); - wordBorders.Clear(); + ClearRenderedWordBorders(); if (!IsFreezeMode) FreezeGrabFrame(); From eff76434027533ffbd6d26a5acd6da206039224d Mon Sep 17 00:00:00 2001 From: Joe Finney Date: Sat, 7 Mar 2026 19:03:25 -0600 Subject: [PATCH 09/13] Rename "UI Automation" language option to "Direct Text" Update all references from "UI Automation" to "Direct Text" in both code and UI. This includes changing the abbreviated name to "DT" and updating display, native, and culture names in UiAutomationLang. Adjust UI labels, descriptions, and toggle switches in LanguageSettings.xaml to reflect the new terminology. No functional changes, only terminology updates for clarity. --- Text-Grab/Models/UiAutomationLang.cs | 8 ++++---- Text-Grab/Pages/LanguageSettings.xaml | 8 ++++---- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/Text-Grab/Models/UiAutomationLang.cs b/Text-Grab/Models/UiAutomationLang.cs index fb993a60..713df09c 100644 --- a/Text-Grab/Models/UiAutomationLang.cs +++ b/Text-Grab/Models/UiAutomationLang.cs @@ -7,19 +7,19 @@ public class UiAutomationLang : ILanguage { public const string Tag = "UIAutomation"; - public string AbbreviatedName => "UIA"; + public string AbbreviatedName => "DT"; - public string DisplayName => "UI Automation Text"; + public string DisplayName => "Direct Text"; public string CurrentInputMethodLanguageTag => string.Empty; - public string CultureDisplayName => "UI Automation Text"; + public string CultureDisplayName => "Direct Text"; public string LanguageTag => Tag; public LanguageLayoutDirection LayoutDirection => LanguageLayoutDirection.Ltr; - public string NativeName => "UI Automation Text"; + public string NativeName => "Direct Text"; public string Script => string.Empty; } diff --git a/Text-Grab/Pages/LanguageSettings.xaml b/Text-Grab/Pages/LanguageSettings.xaml index 0d240b19..bd668561 100644 --- a/Text-Grab/Pages/LanguageSettings.xaml +++ b/Text-Grab/Pages/LanguageSettings.xaml @@ -62,19 +62,19 @@ FontSize="18" FontWeight="Bold" Style="{StaticResource TextBodyNormal}" - Text="UI Automation Text" /> + Text="Direct Text" /> - When the UI Automation language is selected, Text Grab will try to read native accessibility text from live application controls before falling back to OCR. + When the Direct Text language is selected, Text Grab will try to read native accessibility text from live application controls before falling back to OCR. - Show UI Automation as a language option + Show Direct Text as a language option - Include offscreen UI Automation elements + Include offscreen Direct Text elements From a1f9a28f96b2fe2fe7233ce680ccb3075991ec43 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Sun, 8 Mar 2026 01:16:29 +0000 Subject: [PATCH 10/13] Initial plan From 06c4f510c25275707fd90b29de18024a69136bde Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Sun, 8 Mar 2026 01:20:35 +0000 Subject: [PATCH 11/13] Fix bitmap disposal, isDrawing reset, and indentation per review comments Co-authored-by: TheJoeFin <7809853+TheJoeFin@users.noreply.github.com> --- Text-Grab/Utilities/OcrUtilities.cs | 4 ++-- Text-Grab/Views/GrabFrame.xaml.cs | 6 +++++- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/Text-Grab/Utilities/OcrUtilities.cs b/Text-Grab/Utilities/OcrUtilities.cs index fcaa4c20..cc794ebf 100644 --- a/Text-Grab/Utilities/OcrUtilities.cs +++ b/Text-Grab/Utilities/OcrUtilities.cs @@ -133,7 +133,7 @@ public static async Task GetRegionsTextAsTableAsync(Window passedWindow, int thisCorrectedTop = (int)absPosPoint.Y + selectedRegion.Top; Rectangle correctedRegion = new(thisCorrectedLeft, thisCorrectedTop, selectedRegion.Width, selectedRegion.Height); - Bitmap bmp = ImageMethods.GetRegionOfScreenAsBitmap(correctedRegion); + using Bitmap bmp = ImageMethods.GetRegionOfScreenAsBitmap(correctedRegion); double scale = await GetIdealScaleFactorForOcrAsync(bmp, compatibleLanguage); using Bitmap scaledBitmap = ImageMethods.ScaleBitmapUniform(bmp, scale); DpiScale dpiScale = VisualTreeHelper.GetDpi(passedWindow); @@ -212,7 +212,7 @@ public static async Task GetTextFromBitmapSourceAsTableAsync(BitmapSourc public static async Task<(IOcrLinesWords?, double)> GetOcrResultFromRegionAsync(Rectangle region, ILanguage language) { language = GetCompatibleOcrLanguage(language); - Bitmap bmp = ImageMethods.GetRegionOfScreenAsBitmap(region); + using Bitmap bmp = ImageMethods.GetRegionOfScreenAsBitmap(region); if (language is WindowsAiLang) { diff --git a/Text-Grab/Views/GrabFrame.xaml.cs b/Text-Grab/Views/GrabFrame.xaml.cs index 949000f6..44d9a1e3 100644 --- a/Text-Grab/Views/GrabFrame.xaml.cs +++ b/Text-Grab/Views/GrabFrame.xaml.cs @@ -1387,7 +1387,11 @@ private async Task DrawOcrRectanglesAsync(string searchWord = "") } if (ocrResultOfWindow is null) + { + isDrawing = false; + reDrawTimer.Start(); return; + } isSpaceJoining = CurrentLanguage!.IsSpaceJoining(); @@ -2687,7 +2691,7 @@ private void ResetGrabFrame() if (!hasLoadedImageSource) frameContentImageSource = null; - ClearRenderedWordBorders(); + ClearRenderedWordBorders(); MatchesTXTBLK.Text = "- Matches"; UpdateFrameText(); } From 1fcc08a4c9696bb523b6697fd80c6f1af0a987b2 Mon Sep 17 00:00:00 2001 From: Joe Finney Date: Sat, 7 Mar 2026 19:41:32 -0600 Subject: [PATCH 12/13] Remove using statement for selectionBitmap in table OCR Disposing selectionBitmap with a using statement caused app crashes. Now, the bitmap is not disposed immediately, and a comment was added to highlight the issue and the need for further investigation. --- Text-Grab/Views/FullscreenGrab.SelectionStyles.cs | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/Text-Grab/Views/FullscreenGrab.SelectionStyles.cs b/Text-Grab/Views/FullscreenGrab.SelectionStyles.cs index c518740e..777e916d 100644 --- a/Text-Grab/Views/FullscreenGrab.SelectionStyles.cs +++ b/Text-Grab/Views/FullscreenGrab.SelectionStyles.cs @@ -1064,7 +1064,9 @@ private async Task CommitSelectionAsync(FullscreenCaptureResult selection, bool } else if (isTable) { - using Bitmap selectionBitmap = ImageMethods.GetRegionOfScreenAsBitmap(selection.CaptureRegion.AsRectangle()); + // TODO: Look into why this happens and find a better way to dispose the bitmap + // DO NOT add a using statement to this selected bitmap, it crashes the app + Bitmap selectionBitmap = ImageMethods.GetRegionOfScreenAsBitmap(selection.CaptureRegion.AsRectangle()); TextFromOCR = await OcrUtilities.GetTextFromBitmapAsTableAsync(selectionBitmap, selectedOcrLang); } else From 4036c0dfd35b28dcbc6cbd916b33e39b14aa5490 Mon Sep 17 00:00:00 2001 From: Joe Finney Date: Sun, 8 Mar 2026 22:59:52 -0500 Subject: [PATCH 13/13] Improve language picker to use real and keyboard languages Refined LanguagePicker to filter out internal OCR engine languages (UiAutomationLang, WindowsAiLang) and instead use the current keyboard input language for selection when needed. Updated imports and clarified parameter naming in GlobalLang. Changed UiAutomationLang tag and display values for clarity. This ensures the picker only shows real, user-facing languages and improves user experience. --- Text-Grab/Controls/LanguagePicker.xaml.cs | 17 +++++++++++++++++ Text-Grab/Models/GlobalLang.cs | 10 +++++----- Text-Grab/Models/UiAutomationLang.cs | 2 +- 3 files changed, 23 insertions(+), 6 deletions(-) diff --git a/Text-Grab/Controls/LanguagePicker.xaml.cs b/Text-Grab/Controls/LanguagePicker.xaml.cs index 0ee7c2e7..1edc9075 100644 --- a/Text-Grab/Controls/LanguagePicker.xaml.cs +++ b/Text-Grab/Controls/LanguagePicker.xaml.cs @@ -1,7 +1,10 @@ using System.Collections.ObjectModel; +using System.Globalization; using System.Windows; using System.Windows.Controls; +using System.Windows.Input; using Text_Grab.Interfaces; +using Text_Grab.Models; using Text_Grab.Utilities; namespace Text_Grab.Controls; @@ -33,10 +36,24 @@ private void UserControl_Loaded(object sender, RoutedEventArgs e) ILanguage currentSelectedLanguage = LanguageUtilities.GetOCRLanguage(); + // get current keyboard language + CultureInfo keyboardLanguage = InputLanguageManager.Current.CurrentInputLanguage; + + // The challenge here is that UI Automation and Windows AI support any langauage + // since this picker will set the spell checker language and stuff like that + // it needs to represent real languages and not just OCR engine target languages + // As new models are supported they will need to be caught and filtered here too + + if (currentSelectedLanguage is UiAutomationLang or WindowsAiLang) + currentSelectedLanguage = new GlobalLang(keyboardLanguage.Name); + int selectedIndex = 0; int i = 0; foreach (ILanguage langFromUtil in LanguageUtilities.GetAllLanguages()) { + if (langFromUtil is UiAutomationLang or WindowsAiLang) + continue; + Languages.Add(langFromUtil); if (langFromUtil.LanguageTag == currentSelectedLanguage.LanguageTag) selectedIndex = i; diff --git a/Text-Grab/Models/GlobalLang.cs b/Text-Grab/Models/GlobalLang.cs index 09193fcf..7f4a1288 100644 --- a/Text-Grab/Models/GlobalLang.cs +++ b/Text-Grab/Models/GlobalLang.cs @@ -15,19 +15,19 @@ public GlobalLang(Windows.Globalization.Language lang) OriginalLanguage = lang; } - public GlobalLang(string inputLang) + public GlobalLang(string inputLangTag) { - if (inputLang == "English") - inputLang = "en-US"; + if (inputLangTag == "English") + inputLangTag = "en-US"; Windows.Globalization.Language language = new(System.Globalization.CultureInfo.CurrentCulture.Name); try { - language = new(inputLang); + language = new(inputLangTag); } catch (System.ArgumentException ex) { - System.Diagnostics.Debug.WriteLine($"Failed to initialize language '{inputLang}': {ex.Message}"); + System.Diagnostics.Debug.WriteLine($"Failed to initialize language '{inputLangTag}': {ex.Message}"); } AbbreviatedName = language.AbbreviatedName; CultureDisplayName = language.DisplayName; diff --git a/Text-Grab/Models/UiAutomationLang.cs b/Text-Grab/Models/UiAutomationLang.cs index 713df09c..e7cc18d3 100644 --- a/Text-Grab/Models/UiAutomationLang.cs +++ b/Text-Grab/Models/UiAutomationLang.cs @@ -5,7 +5,7 @@ namespace Text_Grab.Models; public class UiAutomationLang : ILanguage { - public const string Tag = "UIAutomation"; + public const string Tag = "Direct-Txt"; public string AbbreviatedName => "DT";