-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathscan.py
More file actions
117 lines (90 loc) · 3.57 KB
/
scan.py
File metadata and controls
117 lines (90 loc) · 3.57 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
from queue import Queue
import speech_recognition as sr
import pygetwindow as gw
import pyautogui
import threading
import argparse
import logging
import time
logging.getLogger().setLevel(logging.INFO)
class VoiceRecognizer(threading.Thread):
WORDS = ['print', 'scan', 'scam', 'skin', 'plane', 'green', 'flynn', 'step', 'stem', 'yeah', 'ten', 'stan']
def __init__(self, recognizer, queue, window_offset, window_title):
threading.Thread.__init__(self, daemon=True)
self.recognizer = recognizer
self.queue = queue
self.window_offset = window_offset
self.window_title = window_title
self.next_print = 0
def recognize_command(self, recognizer, audio):
try:
logging.info('Recognizing...')
command = recognizer.recognize_vosk(audio, language='en-US').lower()
logging.info(f'Command: {command}')
return command
except sr.UnknownValueError:
logging.info("Could not understand audio")
except sr.RequestError as e:
logging.info(f"Error: {e}")
def click_button(self):
try:
windows = gw.getWindowsWithTitle(self.window_title)
if not windows:
return False
window = windows[0]
window.activate()
x, y = self.window_offset
button_position = window.left + x, window.top + y
for _ in range(3):
pyautogui.click(button_position)
time.sleep(0.2)
except gw.PyGetWindowException as e:
# Ignore correct Windows errors
if '0' in str(e):
return
raise e
def is_scan_command(self, command):
if not command:
return False
for word in self.WORDS:
if word in command:
return True
return False
def work(self, audio):
command = self.recognize_command(self.recognizer, audio)
if not self.is_scan_command(command):
return
logging.info("Scanning...")
if self.next_print > time.time():
logging.info('Skipping due to next print')
return
self.next_print = time.time() + 6.0
self.click_button()
def run(self):
while True:
added_time, item = self.queue.get()
if time.time() - added_time > 5.0:
logging.info('Skipping old audio...')
else:
self.work(item)
self.queue.task_done()
def main(window_offset, window_title):
recognizer = sr.Recognizer()
microphone = sr.Microphone()
queue = Queue()
voice_recognizer = VoiceRecognizer(recognizer, queue, window_offset, window_title)
voice_recognizer.start()
with microphone as source:
logging.info('Adjusting for ambient noise...')
recognizer.adjust_for_ambient_noise(source)
while True:
logging.info('Listening for command...')
audio = recognizer.listen(source, phrase_time_limit=2)
queue.put((time.time(), audio))
if __name__ == '__main__':
parser = argparse.ArgumentParser(description='Voice Recognition Script')
parser.add_argument('--x', type=int, help='X offset for button click', default=40)
parser.add_argument('--y', type=int, help='Y offset for button click', default=70)
parser.add_argument('--window-title', type=str, help='Window title of scanner', default='Not Another PDF Scanner 2')
args = parser.parse_args()
main(window_offset=(args.x, args.y), window_title=args.window_title)