From 1f0389571c669b336c70b1801be5a02de795c2d0 Mon Sep 17 00:00:00 2001 From: blindfs Date: Mon, 4 May 2026 07:48:50 +0800 Subject: [PATCH] feat: make workflow general for all elements, filtered by starting_role --- README.md | 3 +- src/app_executor.rs | 159 +++++++++++++++++++++++++++----------------- src/ax_element.rs | 52 +++++++-------- src/config.rs | 95 ++++++++++++++++++++------ src/key_listener.rs | 68 ++++++++++++------- 5 files changed, 244 insertions(+), 133 deletions(-) diff --git a/README.md b/README.md index 1d05065..a1977b6 100644 --- a/README.md +++ b/README.md @@ -52,7 +52,7 @@ you can ### Workflow -Users can define their own workflows (currently text only) in terms of sequences +Users can define their own workflows in terms of sequences of primitive actions. For instance, with the following configuration snippet, @@ -64,6 +64,7 @@ we can swiftly execute the utilities of the apple intelligence writing tool. [[text_workflows]] display = " Rewrite" key = "R" +starting_role = "TextField" actions = [ "Focus", "SelectAll", diff --git a/src/app_executor.rs b/src/app_executor.rs index 5f123bd..ba5ef0d 100644 --- a/src/app_executor.rs +++ b/src/app_executor.rs @@ -1,15 +1,14 @@ use crate::{ - AppSignal, DASH_BOARD_MENU_ITEMS, FilterMode, MenuItem, Mode, SCROLLBAR_MENU_ITEMS, - ScrollAction, TEXT_ACTION_MENU_ITEMS, TextAction, + AppSignal, DASH_BOARD_MENU_ITEMS, FilterMode, IMAGE_ACTION_MENU_ITEMS, MenuItem, Mode, + SCROLLBAR_MENU_ITEMS, ScrollAction, TEXT_ACTION_MENU_ITEMS, TextAction, action::{ OCRResult, WordPicker, get_dictionary_attributed_string, perform_ocr, screen_shot, text_from_clipboard, text_to_clipboard, }, ax_element::{ - ElementCache, ElementOfInterest, GetAttribute, RoleOfInterest, SetAttribute, Target, - traverse_elements, + ElementCache, ElementOfInterest, GetAttribute, SetAttribute, Target, traverse_elements, }, - config::{GlyphlowConfig, VisibilityCheckingLevel, WorkFlowAction}, + config::{GlyphlowConfig, RoleOfInterest, WorkFlow, WorkFlowAction}, drawer::GlyphlowDrawingLayer, os_util::get_focused_pid, util::{Frame, HintBox, estimate_frame_for_text, hint_boxes_from_frames, select_range_helper}, @@ -206,6 +205,17 @@ impl AppExecutor { self.draw_selected_frame(); } + fn draw_image_action_menu(&self) { + let mut msg = "Pick an Action for Image".to_string(); + msg.push_str(&Self::menu_string(&IMAGE_ACTION_MENU_ITEMS)); + for workflow in self.config.workflows.iter() { + if self.is_workflow_valid(workflow) { + msg.push_str(&format!("\n({}) {}", workflow.key, workflow.display)); + } + } + self.draw_menu(&msg); + } + fn draw_text_action_menu(&self, text: &str) { // Truncate long text let text = if text.len() > MAX_TEXT_DISPLAY_LEN { @@ -222,8 +232,10 @@ impl AppExecutor { for action in self.config.text_actions.iter() { msg.push_str(&format!("\n({}) {}", action.key, action.display)); } - for workflow in self.config.text_workflows.iter() { - msg.push_str(&format!("\n({}) {}", workflow.key, workflow.display)); + for workflow in self.config.workflows.iter() { + if self.is_workflow_valid(workflow) { + msg.push_str(&format!("\n({}) {}", workflow.key, workflow.display)); + } } self.draw_menu(&msg); } @@ -240,6 +252,12 @@ impl AppExecutor { if let Some(editor) = self.config.editor.as_ref() { msg.push_str(&format!("\n({}) {}", editor.key, editor.display)); } + // Workflows for current selected element + for workflow in self.config.workflows.iter() { + if self.is_workflow_valid(workflow) { + msg.push_str(&format!("\n({}) {}", workflow.key, workflow.display)); + } + } self.clear_drawing(); self.draw_selected_frame(); self.draw_menu(&msg); @@ -330,7 +348,7 @@ impl AppExecutor { self.selected = Some(ElementOfInterest::new( Some(focused_window), None, - RoleOfInterest::GenericNode, + RoleOfInterest::Generic, window_frame, )); @@ -356,12 +374,6 @@ impl AppExecutor { .. }) = self.selected.as_ref() { - let vis_level = if target == Target::MenuItem { - VisibilityCheckingLevel::Loose - } else { - self.config.visibility_checking_level - }; - traverse_elements( element, // Very loose visibility constraint @@ -369,7 +381,7 @@ impl AppExecutor { frame, &mut self.element_cache, &target, - vis_level, + self.config.visibility_checking_level, ); } } @@ -619,7 +631,8 @@ impl AppExecutor { } Target::Image => { self.selected = Some(eoi.clone()); - self.right_click_menu_on_selected(); + self.set_mode(Mode::ImageActionMenu); + self.draw_image_action_menu(); } Target::Custom(_) => { self.selected = Some(eoi.clone()); @@ -786,15 +799,70 @@ impl AppExecutor { } } + /// Check if a workflow's starting_role matches current selected element + fn is_workflow_valid(&self, wf: &WorkFlow) -> bool { + match wf.starting_role { + RoleOfInterest::Empty => self.selected.is_none(), + RoleOfInterest::Generic => self.selected.is_some(), + _ => self + .selected + .as_ref() + .is_some_and(|s| s.role == wf.starting_role), + } + } + async fn execute_workflow(&mut self, idx: usize) { let workflow = self .config - .text_workflows + .workflows .get(idx) .cloned() .expect("Internal Error: text workflow index: {idx} out of bounds."); - for act in workflow.actions.iter() { + for (act_idx, act) in workflow.actions.iter().enumerate() { + // Check starting_role, nothing happens if not match + if act_idx == 0 && !self.is_workflow_valid(&workflow) { + return; + } + + // Actions don't need a selected element + match act { + WorkFlowAction::Sleep(ms) => { + std::thread::sleep(Duration::from_millis(*ms)); + continue; + } + WorkFlowAction::SearchFor(ct) => { + self.selected = None; + self.activate(Target::Custom(ct.clone())); + if self.element_cache.cache.len() == 1 { + self.quick_follow().await; + } else if self.element_cache.cache.len() > 1 { + self.notify_then_deactivate( + "Multiple elements found.\nOperation canceled.\nPlease run manually", + Level::Warn, + ); + return; + } else { + return; + } + continue; + } + WorkFlowAction::KeyCombo(kb) => { + self.set_simulating_key(true); + for k in kb.keys.iter() { + Self::simulate_event(&EventType::KeyPress(*k)); + std::thread::sleep(Duration::from_millis(20)); + } + for k in kb.keys.iter().rev() { + Self::simulate_event(&EventType::KeyRelease(*k)); + } + self.set_simulating_key(false); + continue; + } + _ => (), + } + + // Actions that require a selected element let Some(ElementOfInterest { element: Some(element), context, @@ -804,11 +872,12 @@ impl AppExecutor { }) = self.selected.as_ref() else { self.notify_then_deactivate( - "Running workflow without any selected element.", + &format!("Running a workflow action with no element selected. {act:?} at idx {act_idx}"), Level::Error, ); return; }; + match act { WorkFlowAction::Focus => { Self::focus_on_element(element); @@ -821,24 +890,6 @@ impl AppExecutor { let center = frame.center(); self.right_click_menu_on_element(element, center); } - WorkFlowAction::Sleep(ms) => { - std::thread::sleep(Duration::from_millis(*ms)); - } - WorkFlowAction::SearchFor(ct) => { - self.selected = None; - self.activate(Target::Custom(ct.clone())); - if self.element_cache.cache.len() == 1 { - self.quick_follow().await; - } else if self.element_cache.cache.len() > 1 { - self.notify_then_deactivate( - "Multiple elements found.\nOperation canceled.\nPlease run manually", - Level::Warn, - ); - return; - } else { - return; - } - } WorkFlowAction::SelectAll => { let len = context .clone() @@ -846,17 +897,7 @@ impl AppExecutor { .unwrap_or(0) as isize; element.set_selected_range(0, len); } - WorkFlowAction::ComboKey(kb) => { - self.set_simulating_key(true); - for k in kb.keys.iter() { - Self::simulate_event(&EventType::KeyPress(*k)); - std::thread::sleep(Duration::from_millis(20)); - } - for k in kb.keys.iter().rev() { - Self::simulate_event(&EventType::KeyRelease(*k)); - } - self.set_simulating_key(false); - } + _ => (), } } } @@ -902,6 +943,16 @@ impl AppExecutor { AppSignal::DeActivate => { self.deactivate(); } + AppSignal::Press => { + self.click_on_selected(); + self.deactivate(); + } + AppSignal::ShowMenu => { + self.right_click_menu_on_selected(); + } + AppSignal::RunWorkFlow(idx) => { + self.execute_workflow(idx).await; + } AppSignal::ToggleMultiSelection => match self.target { Target::Text | Target::ImageOCR => { self.multi_selection.toggle(); @@ -1039,14 +1090,6 @@ impl AppExecutor { // TODO: // 1. URL handling let keep_drawing = match ta { - TextAction::Press => { - self.click_on_selected(); - false - } - TextAction::ShowMenu => { - self.right_click_menu_on_selected(); - true - } TextAction::Copy => { text_to_clipboard(&text); self.notify_then_deactivate("Copied to clipboard.", Level::Info); @@ -1097,10 +1140,6 @@ impl AppExecutor { self.take_external_action(idx, &text); true } - TextAction::WorkFlow(idx) => { - self.execute_workflow(idx).await; - true - } }; if !keep_drawing { diff --git a/src/ax_element.rs b/src/ax_element.rs index 001dca1..6a64400 100644 --- a/src/ax_element.rs +++ b/src/ax_element.rs @@ -1,5 +1,7 @@ use crate::{ - config::{CustomTarget, GlyphlowConfig, GlyphlowTheme, VisibilityCheckingLevel}, + config::{ + CustomTarget, GlyphlowConfig, GlyphlowTheme, RoleOfInterest, VisibilityCheckingLevel, + }, util::{Frame, HintBox, hint_boxes_from_frames, select_range_helper}, }; use accessibility::{AXAttribute, AXUIElement, AXUIElementAttributes}; @@ -21,19 +23,6 @@ use core_foundation::{ use objc2_core_foundation::{CGPoint, CGSize}; use std::collections::HashMap; -#[derive(Debug, PartialEq, Clone)] -pub enum RoleOfInterest { - Button, - GenericNode, - Image, - MenuItem, - ScrollBar, - StaticText, - TextField, - Cell, - CustomTarget, -} - const BASIC_ATTRIBUTES: [&str; 4] = [ kAXRoleAttribute, kAXPositionAttribute, @@ -165,7 +154,7 @@ impl ElementOfInterest { Self { element: None, context, - role: RoleOfInterest::GenericNode, + role: RoleOfInterest::Generic, frame, } } @@ -231,15 +220,18 @@ impl ElementCache { let (w, h) = frame.size(); match role { // NOTE: some roles to keep - RoleOfInterest::GenericNode - | RoleOfInterest::ScrollBar - | RoleOfInterest::TextField - | RoleOfInterest::CustomTarget => {} + RoleOfInterest::Generic | RoleOfInterest::ScrollBar | RoleOfInterest::CustomTarget => {} RoleOfInterest::Image if w.min(h) < self.image_min_size => { return; } // Keep large enough images RoleOfInterest::Image => (), + // Keep large enough text fields even if the text can be empty + RoleOfInterest::TextField + if (w < self.element_min_width || h < self.element_min_height) => + { + return; + } // Check text before size, keep small texts _ if context.is_some() => { // Skip elements with empty/nonsense text @@ -265,7 +257,7 @@ impl ElementCache { // NOTE: de-duplication for DOM elements let new_ele = ElementOfInterest::new(Some(element.clone()), context, role.clone(), frame); // Keep all nodes with Target::ChildElement/GenericNode, as it's basically a debugging mode - if role != RoleOfInterest::GenericNode + if role != RoleOfInterest::Generic && let Some(idx) = self.seen_center.get(¢er) { self.cache[*idx] = new_ele; @@ -531,7 +523,7 @@ pub fn traverse_elements( }) && child_fp.visible_frame(parent_frame).is_some() { - cache.add(&child, None, RoleOfInterest::GenericNode, child_fp.frame); + cache.add(&child, None, RoleOfInterest::Generic, child_fp.frame); } } } @@ -563,14 +555,18 @@ pub fn traverse_elements( // HACK: exclude electron elements scrolled off y axis, // but some menu items' ancestors (Discord) are of zero height - if *target != Target::MenuItem - && ele_fp.frame.is_some_and(|f| { + let vis_level = match target { + // NOTE: loose visibility checking for specific targets + Target::MenuItem | Target::Custom(_) => VisibilityCheckingLevel::Loose, + _ if ele_fp.frame.is_some_and(|f| { let (_, h) = f.size(); h == 1.0 || h == 0.0 - }) - { - return; - } + }) => + { + return; + } + _ => vis_level, + }; match vis_level { VisibilityCheckingLevel::Medium => { @@ -683,7 +679,7 @@ pub fn traverse_elements( cache.add( element, Some(value), - RoleOfInterest::StaticText, + RoleOfInterest::TextField, ele_fp.frame, ); } diff --git a/src/config.rs b/src/config.rs index b10ed23..f74482b 100644 --- a/src/config.rs +++ b/src/config.rs @@ -8,6 +8,20 @@ use serde::{Deserialize, Serialize}; use std::fs; use std::path::PathBuf; +#[derive(Debug, PartialEq, Clone, Serialize, Deserialize)] +pub enum RoleOfInterest { + Button, + Generic, + Empty, + Image, + MenuItem, + ScrollBar, + StaticText, + TextField, + Cell, + CustomTarget, +} + /// Custom target element to search for in a workflow #[derive(Serialize, Deserialize, Debug, Default, PartialEq, Clone)] pub struct CustomTarget { @@ -25,7 +39,7 @@ pub enum WorkFlowAction { Focus, Press, ShowMenu, - ComboKey(KeyBinding), + KeyCombo(KeyBinding), SearchFor(CustomTarget), Sleep(u64), } @@ -34,9 +48,15 @@ pub enum WorkFlowAction { pub struct WorkFlow { pub display: String, pub key: char, + #[serde(default = "default_starting_role")] + pub starting_role: RoleOfInterest, pub actions: Vec, } +fn default_starting_role() -> RoleOfInterest { + RoleOfInterest::Generic +} + #[derive(Serialize, Deserialize, Debug)] pub struct CommandAction { pub command: String, @@ -285,8 +305,8 @@ pub struct GlyphlowConfig { pub theme: GlyphlowTheme, #[serde(default = "default_text_actions")] pub text_actions: Vec, - #[serde(default = "default_text_workflows")] - pub text_workflows: Vec, + #[serde(default = "default_workflows")] + pub workflows: Vec, #[serde(default = "default_scroll_distance")] pub scroll_distance: f64, #[serde(default = "default_element_min_width")] @@ -315,23 +335,56 @@ fn default_global_keybinding() -> KeyBinding { fn default_text_actions() -> Vec { vec![] } -fn default_text_workflows() -> Vec { - vec![WorkFlow { - key: 'R', - display: " ProofRead".into(), - actions: vec![ - WorkFlowAction::Focus, - WorkFlowAction::SelectAll, - WorkFlowAction::ShowMenu, - WorkFlowAction::Sleep(150), - WorkFlowAction::SearchFor(CustomTarget { - role: "MenuItem".into(), - title: Some("Proofread".into()), - ..Default::default() - }), - WorkFlowAction::Press, - ], - }] +fn default_workflows() -> Vec { + vec![ + WorkFlow { + key: 'R', + display: " ProofRead".into(), + starting_role: RoleOfInterest::TextField, + actions: vec![ + WorkFlowAction::Focus, + WorkFlowAction::SelectAll, + WorkFlowAction::ShowMenu, + WorkFlowAction::Sleep(150), + WorkFlowAction::SearchFor(CustomTarget { + role: "MenuItem".into(), + title: Some("Proofread".into()), + ..Default::default() + }), + WorkFlowAction::Press, + ], + }, + WorkFlow { + key: 'C', + display: "⮺ Copy".into(), + starting_role: RoleOfInterest::Image, + actions: vec![ + WorkFlowAction::ShowMenu, + WorkFlowAction::Sleep(150), + WorkFlowAction::SearchFor(CustomTarget { + role: "MenuItem".into(), + title: Some("Copy Image".into()), + ..Default::default() + }), + WorkFlowAction::Press, + ], + }, + WorkFlow { + key: 'L', + display: " Copy Link".into(), + starting_role: RoleOfInterest::Image, + actions: vec![ + WorkFlowAction::ShowMenu, + WorkFlowAction::Sleep(150), + WorkFlowAction::SearchFor(CustomTarget { + role: "MenuItem".into(), + title: Some("Copy Image Address".into()), + ..Default::default() + }), + WorkFlowAction::Press, + ], + }, + ] } fn default_scroll_distance() -> f64 { 0.05 @@ -369,7 +422,7 @@ impl Default for GlyphlowConfig { editor: None, theme: GlyphlowTheme::default(), text_actions: default_text_actions(), - text_workflows: default_text_workflows(), + workflows: default_workflows(), scroll_distance: default_scroll_distance(), element_min_width: default_element_min_width(), element_min_height: default_element_min_height(), diff --git a/src/key_listener.rs b/src/key_listener.rs index 49b9bfb..7098f6a 100644 --- a/src/key_listener.rs +++ b/src/key_listener.rs @@ -18,12 +18,9 @@ pub enum TextAction { Copy, Dictionary, Split, - Press, - ShowMenu, Editor, /// index of the action in the config UserDefined(usize), - WorkFlow(usize), } #[derive(Debug, PartialEq, Clone)] @@ -43,18 +40,25 @@ pub enum FilterMode { #[derive(Debug, PartialEq, Clone)] pub enum AppSignal { + // State signals DashBoard, Activate(Target), DeActivate, Filter(char, FilterMode), + // Sub state signals + FileUpdate(PathBuf), + ClearNotification, ToggleMultiSelection, + // Menu specific TextAction(TextAction), ScrollAction(ScrollAction), + // Generic Actions + RunWorkFlow(usize), ReadClipboard, ScreenShot, FrameOCR, - FileUpdate(PathBuf), - ClearNotification, + Press, + ShowMenu, } #[derive(Debug, PartialEq)] @@ -126,16 +130,14 @@ pub const TEXT_ACTION_MENU_ITEMS: [MenuItem; 5] = [ AppSignal::TextAction(TextAction::Dictionary), ), MenuItem::new("󰃻 Split", 'S', AppSignal::TextAction(TextAction::Split)), - MenuItem::new( - "󰳽 Press [Left Click]", - 'P', - AppSignal::TextAction(TextAction::Press), - ), - MenuItem::new( - " Menu [Right Click]", - 'M', - AppSignal::TextAction(TextAction::ShowMenu), - ), + MenuItem::new("󰳽 Press [Left Click]", 'P', AppSignal::Press), + MenuItem::new(" Menu [Right Click]", 'M', AppSignal::ShowMenu), +]; + +pub const IMAGE_ACTION_MENU_ITEMS: [MenuItem; 3] = [ + MenuItem::new("󱄺 Image OCR", 'O', AppSignal::FrameOCR), + MenuItem::new("󰳽 Press [Left Click]", 'P', AppSignal::Press), + MenuItem::new(" Menu [Right Click]", 'M', AppSignal::ShowMenu), ]; #[derive(Debug, PartialEq)] @@ -145,6 +147,7 @@ pub enum Mode { Idle, Scrolling, TextActionMenu, + ImageActionMenu, Editing, WordPicking, OCRResultFiltering, @@ -154,6 +157,7 @@ pub enum Mode { #[derive(Debug)] pub struct KeyListener { pub text_actions: HashMap, + pub image_actions: HashMap, pub dashboard_actions: HashMap, pub scroll_actions: HashMap, sender: Sender, @@ -167,20 +171,36 @@ impl KeyListener { pub fn new(sender: Sender, config: &GlyphlowConfig) -> KeyListener { let mut text_actions = - Self::iter_from(TEXT_ACTION_MENU_ITEMS) + // Order matters! + config + .workflows + .iter() + .enumerate() + .map(|(idx, wf)| (wf.key, AppSignal::RunWorkFlow(idx))) .chain( config.text_actions.iter().enumerate().map(|(idx, ca)| { (ca.key, AppSignal::TextAction(TextAction::UserDefined(idx))) }), ) - .chain( - config.text_workflows.iter().enumerate().map(|(idx, wf)| { - (wf.key, AppSignal::TextAction(TextAction::WorkFlow(idx))) - }), - ) + .chain(Self::iter_from(TEXT_ACTION_MENU_ITEMS)) .collect::>(); - let mut dashboard_actions = - Self::iter_from(DASH_BOARD_MENU_ITEMS).collect::>(); + + let mut dashboard_actions = config + .workflows + .iter() + .enumerate() + .map(|(idx, wf)| (wf.key, AppSignal::RunWorkFlow(idx))) + .chain(Self::iter_from(DASH_BOARD_MENU_ITEMS)) + .collect::>(); + + let image_actions = config + .workflows + .iter() + .enumerate() + .map(|(idx, wf)| (wf.key, AppSignal::RunWorkFlow(idx))) + .chain(Self::iter_from(IMAGE_ACTION_MENU_ITEMS)) + .collect::>(); + let scroll_actions = Self::iter_from(SCROLLBAR_MENU_ITEMS).collect::>(); if let Some(editor_command) = config.editor.as_ref() { @@ -193,6 +213,7 @@ impl KeyListener { KeyListener { text_actions, + image_actions, dashboard_actions, scroll_actions, sender, @@ -271,6 +292,7 @@ impl KeyListener { Mode::Filtering => self.filter_helper(&key, state, FilterMode::Generic), Mode::OCRResultFiltering => self.filter_helper(&key, state, FilterMode::OCR), Mode::TextActionMenu => self.helper(&key, &self.text_actions, state), + Mode::ImageActionMenu => self.helper(&key, &self.image_actions, state), Mode::Scrolling => self.helper(&key, &self.scroll_actions, state), Mode::WaitAndDeactivate => { self.send(AppSignal::DeActivate);