import cv2 import mediapipe as mp import time import numpy as np import pyautogui import math
SMOOTHING = 7
CLICK_THRESHOLD = 40
CLICK_COOLDOWN = 0.35
DRAG_THRESHOLD = 0.6
SHOW_CAMERA = True
MAX_FRAME_WIDTH = 640
mp_hands = mp.solutions.hands mp_draw = mp.solutions.drawing_utils hands = mp_hands.Hands( max_num_hands=1, min_detection_confidence=0.6, min_tracking_confidence=0.6 )
screen_w, screen_h = pyautogui.size()
prev_x, prev_y = 0, 0 curr_x, curr_y = 0, 0 last_click_time = 0 pinch_start_time = None dragging = False
def lm_to_pixel(lm, frame_w, frame_h): """Convert normalized landmark to pixel coordinates in frame.""" return int(lm.x * frame_w), int(lm.y * frame_h)
def distance(p1, p2): return math.hypot(p1[0] - p2[0], p1[1] - p2[1])
cap = cv2.VideoCapture(0) time.sleep(1.0)
while True: success, frame = cap.read() if not success: print("Unable to read from webcam. Exiting.") break
h0, w0 = frame.shape[:2]
scale = MAX_FRAME_WIDTH / w0 if w0 > MAX_FRAME_WIDTH else 1.0
if scale != 1.0:
frame = cv2.resize(frame, (int(w0 * scale), int(h0 * scale)))
frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
frame_h, frame_w = frame.shape[:2]
results = hands.process(frame_rgb)
if results.multi_hand_landmarks:
hand = results.multi_hand_landmarks[0]
lm_index_tip = hand.landmark[8]
lm_middle_tip = hand.landmark[12]
lm_thumb_tip = hand.landmark[4]
ix, iy = lm_to_pixel(lm_index_tip, frame_w, frame_h)
mx, my = lm_to_pixel(lm_middle_tip, frame_w, frame_h)
tx, ty = lm_to_pixel(lm_thumb_tip, frame_w, frame_h)
if SHOW_CAMERA:
cv2.circle(frame, (ix, iy), 6, (0, 255, 255), cv2.FILLED)
cv2.circle(frame, (mx, my), 6, (255, 0, 255), cv2.FILLED)
cv2.circle(frame, (tx, ty), 6, (0, 255, 0), cv2.FILLED)
screen_x = np.interp(ix, (0, frame_w), (screen_w, 0))
screen_y = np.interp(iy, (0, frame_h), (0, screen_h))
curr_x = prev_x + (screen_x - prev_x) / SMOOTHING
curr_y = prev_y + (screen_y - prev_y) / SMOOTHING
prev_x, prev_y = curr_x, curr_y
# Move mouse
try:
pyautogui.moveTo(curr_x, curr_y, _pause=False)
except Exception as e:
print("pyautogui.moveTo() failed:", e)
inter_dist = distance((ix, iy), (mx, my))
# Thumb-index distance for right click (optional)
thumb_index_dist = distance((ix, iy), (tx, ty))
now = time.time()
# LEFT CLICK logic (index + middle pinch)
if inter_dist < CLICK_THRESHOLD:
if pinch_start_time is None:
pinch_start_time = now
held_time = now - pinch_start_time if pinch_start_time else 0.0
if not dragging and held_time >= DRAG_THRESHOLD:
# Start dragging (mouse down)
try:
pyautogui.mouseDown(_pause=False)
dragging = True
if SHOW_CAMERA:
cv2.putText(frame, "Dragging...", (10, 60), cv2.FONT_HERSHEY_SIMPLEX, 0.8, (0,0,255), 2)
except Exception:
pass
if not dragging and (now - last_click_time) > CLICK_COOLDOWN and held_time < DRAG_THRESHOLD:
# perform a left click and set cooldown
try:
pyautogui.click(_pause=False)
last_click_time = now
if SHOW_CAMERA:
cv2.putText(frame, "Click", (10, 60), cv2.FONT_HERSHEY_SIMPLEX, 0.8, (0,200,0), 2)
except Exception:
pass
else:
# Pinch released
if dragging:
try:
pyautogui.mouseUp(_pause=False)
except Exception:
pass
dragging = False
pinch_start_time = None
# RIGHT CLICK logic (thumb-index pinch)
if thumb_index_dist < CLICK_THRESHOLD * 0.8:
if (now - last_click_time) > CLICK_COOLDOWN:
try:
pyautogui.click(button='right', _pause=False)
last_click_time = now
if SHOW_CAMERA:
cv2.putText(frame, "Right Click", (10, 90), cv2.FONT_HERSHEY_SIMPLEX, 0.8, (200,0,200), 2)
except Exception:
pass
if SHOW_CAMERA:
mp_draw.draw_landmarks(frame, hand, mp_hands.HAND_CONNECTIONS)
if SHOW_CAMERA:
cv2.putText(frame, "Index: move | Pinch(Index+Middle): left-click/drag | Thumb+Index: right-click",
(10, frame_h - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.45, (220,220,220), 1)
cv2.imshow("Hand Mouse Control", frame)
# Controls: press 'q' to quit
key = cv2.waitKey(1) & 0xFF
if key == ord('q'):
break
cap.release() cv2.destroyAllWindows()