""" gaze_pinch_v0.py — Vision-Pro-style webcam controller, v0 (raw feel test)

What it does:

Tracks your iris (gaze) from the built-in webcam -> moves the macOS cursor.
Detects a thumb+index PINCH -> left click.
9-point calibration maps your raw gaze ratios to screen coordinates.

This is a DELIBERATELY RAW prototype. The gaze cursor WILL jitter — that is the point. Feeling that jitter is what motivates the v1 "snap-to-UI-element" design.

Requirements (run once): pip install mediapipe opencv-python pyautogui numpy

macOS permissions (System Settings > Privacy & Security):

Camera: allow Terminal (or whatever runs python)
Accessibility: allow Terminal <-- REQUIRED or the cursor won't move

Run: python gaze_pinch_v0.py

Keys: During calibration: look at the dot, press SPACE to capture (9 times) During run: 'q' or ESC to quit, 'c' to recalibrate """

import time import math import numpy as np import cv2 import mediapipe as mp import pyautogui

----------------------------- tuning knobs --------------------------------

PINCH_ON = 0.45 # pinch detected when (thumb-index dist / hand size) < this PINCH_OFF = 0.60 # release threshold (hysteresis to avoid flicker) CLICK_COOLDOWN = 0.40 # seconds between clicks CALIB_SAMPLES = 12 # gaze samples averaged per calibration point EURO_MIN_CUTOFF = 0.8 # lower = smoother but laggier EURO_BETA = 0.012 # higher = more responsive to fast movement CAM_INDEX = 0

---------------------------------------------------------------------------

pyautogui.FAILSAFE = False pyautogui.PAUSE = 0.0 SCREEN_W, SCREEN_H = pyautogui.size()

MediaPipe FaceMesh iris landmark indices (refine_landmarks=True)

L_IRIS, R_IRIS = 468, 473 L_EYE_OUT, L_EYE_IN = 33, 133 R_EYE_IN, R_EYE_OUT = 362, 263 L_LID_TOP, L_LID_BOT = 159, 145 R_LID_TOP, R_LID_BOT = 386, 374

Hand landmark indices

WRIST, THUMB_TIP, INDEX_TIP, INDEX_MCP = 0, 4, 8, 5

--------------------------- One Euro filter -------------------------------

class OneEuro: def init(self, min_cutoff=1.0, beta=0.0, d_cutoff=1.0): self.min_cutoff, self.beta, self.d_cutoff = min_cutoff, beta, d_cutoff self.x_prev = None self.dx_prev = 0.0 self.t_prev = None


@staticmethod
def _alpha(cutoff, dt):
    tau = 1.0 / (2 * math.pi * cutoff)
    return 1.0 / (1.0 + tau / dt)

def __call__(self, x, t):
    if self.x_prev is None:
        self.x_prev, self.t_prev = x, t
        return x
    dt = max(1e-3, t - self.t_prev)
    dx = (x - self.x_prev) / dt
    a_d = self._alpha(self.d_cutoff, dt)
    dx_hat = a_d * dx + (1 - a_d) * self.dx_prev
    cutoff = self.min_cutoff + self.beta * abs(dx_hat)
    a = self._alpha(cutoff, dt)
    x_hat = a * x + (1 - a) * self.x_prev
    self.x_prev, self.dx_prev, self.t_prev = x_hat, dx_hat, t
    return x_hat

def _ratio(val, lo, hi): if abs(hi - lo) < 1e-6: return 0.5 return float(np.clip((val - lo) / (hi - lo), 0.0, 1.0))

def gaze_ratio(lm): """Return (h, v) in roughly 0..1 from iris position within the eyes. Calibration absorbs the exact semantics; we just need stable features.""" def eye_h(iris, outer, inner): lo, hi = sorted([lm[outer].x, lm[inner].x]) return _ratio(lm[iris].x, lo, hi)


def eye_v(iris, top, bot):
    lo, hi = sorted([lm[top].y, lm[bot].y])
    return _ratio(lm[iris].y, lo, hi)

h = (eye_h(L_IRIS, L_EYE_OUT, L_EYE_IN) + eye_h(R_IRIS, R_EYE_OUT, R_EYE_IN)) / 2
v = (eye_v(L_IRIS, L_LID_TOP, L_LID_BOT) + eye_v(R_IRIS, R_LID_TOP, R_LID_BOT)) / 2
return h, v

def quad_features(h, v): return np.array([1.0, h, v, h * v, h * h, v * v], dtype=np.float64)

def fit_map(samples, targets): """samples: list of (h, v). targets: list of (sx, sy). Least-squares quadratic.""" A = np.array([quad_features(h, v) for (h, v) in samples]) X = np.array([t[0] for t in targets]) Y = np.array([t[1] for t in targets]) cx, *_ = np.linalg.lstsq(A, X, rcond=None) cy, *_ = np.linalg.lstsq(A, Y, rcond=None) return cx, cy

def apply_map(cx, cy, h, v): f = quad_features(h, v) return float(f @ cx), float(f @ cy)

def pinch_strength(hand_lm): """Normalized thumb-index distance (smaller = pinching).""" def d(a, b): return math.hypot(hand_lm[a].x - hand_lm[b].x, hand_lm[a].y - hand_lm[b].y) hand_size = d(WRIST, INDEX_MCP) + 1e-6 return d(THUMB_TIP, INDEX_TIP) / hand_size

------------------------------- main --------------------------------------

def main(): mp_face = mp.solutions.face_mesh mp_hands = mp.solutions.hands face = mp_face.FaceMesh(refine_landmarks=True, max_num_faces=1, min_detection_confidence=0.6, min_tracking_confidence=0.6) hands = mp_hands.Hands(max_num_hands=1, min_detection_confidence=0.6, min_tracking_confidence=0.6)


cap = cv2.VideoCapture(CAM_INDEX)
if not cap.isOpened():
    raise SystemExit("Cannot open webcam. Check camera permission / CAM_INDEX.")

# ---------- calibration ----------
pts = [(0.1, 0.1), (0.5, 0.1), (0.9, 0.1),
       (0.1, 0.5), (0.5, 0.5), (0.9, 0.5),
       (0.1, 0.9), (0.5, 0.9), (0.9, 0.9)]
win = "calibration"
cv2.namedWindow(win, cv2.WND_PROP_FULLSCREEN)
cv2.setWindowProperty(win, cv2.WND_PROP_FULLSCREEN, cv2.WINDOW_FULLSCREEN)

samples, targets = [], []
for (px, py) in pts:
    tx, ty = int(px * SCREEN_W), int(py * SCREEN_H)
    captured = None
    while captured is None:
        ok, frame = cap.read()
        if not ok:
            continue
        frame = cv2.flip(frame, 1)
        rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        res = face.process(rgb)
        canvas = np.zeros((SCREEN_H, SCREEN_W, 3), dtype=np.uint8)
        cv2.circle(canvas, (tx, ty), 18, (0, 0, 255), -1)
        cv2.circle(canvas, (tx, ty), 6, (255, 255, 255), -1)
        cv2.putText(canvas, "Look at the dot, press SPACE", (60, 60),
                    cv2.FONT_HERSHEY_SIMPLEX, 1.0, (200, 200, 200), 2)
        cv2.imshow(win, canvas)
        key = cv2.waitKey(1) & 0xFF
        if key == 27:
            cap.release(); cv2.destroyAllWindows(); return
        if key == 32 and res.multi_face_landmarks:
            lm = res.multi_face_landmarks[0].landmark
            acc = np.zeros(2)
            for _ in range(CALIB_SAMPLES):
                ok2, f2 = cap.read()
                if not ok2:
                    continue
                f2 = cv2.flip(f2, 1)
                r2 = face.process(cv2.cvtColor(f2, cv2.COLOR_BGR2RGB))
                if r2.multi_face_landmarks:
                    acc += np.array(gaze_ratio(r2.multi_face_landmarks[0].landmark))
            captured = (acc / CALIB_SAMPLES).tolist()
    samples.append(tuple(captured)); targets.append((tx, ty))
cx, cy = fit_map(samples, targets)
cv2.destroyWindow(win)

# ---------- run loop ----------
fx, fy = OneEuro(EURO_MIN_CUTOFF, EURO_BETA), OneEuro(EURO_MIN_CUTOFF, EURO_BETA)
pinching = False
last_click = 0.0
print("Running. 'q'/ESC to quit, 'c' to recalibrate.")

while True:
    ok, frame = cap.read()
    if not ok:
        continue
    frame = cv2.flip(frame, 1)
    rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    fres = face.process(rgb)
    hres = hands.process(rgb)
    t = time.time()

    # gaze -> cursor
    if fres.multi_face_landmarks:
        h, v = gaze_ratio(fres.multi_face_landmarks[0].landmark)
        sx, sy = apply_map(cx, cy, h, v)
        sx = fx(sx, t); sy = fy(sy, t)
        sx = int(np.clip(sx, 0, SCREEN_W - 1))
        sy = int(np.clip(sy, 0, SCREEN_H - 1))
        pyautogui.moveTo(sx, sy)

    # pinch -> click (edge-triggered with hysteresis + cooldown)
    if hres.multi_hand_landmarks:
        s = pinch_strength(hres.multi_hand_landmarks[0].landmark)
        if not pinching and s < PINCH_ON:
            pinching = True
            if t - last_click > CLICK_COOLDOWN:
                pyautogui.click()
                last_click = t
        elif pinching and s > PINCH_OFF:
            pinching = False
        cv2.putText(frame, f"pinch {s:.2f}", (10, 30),
                    cv2.FONT_HERSHEY_SIMPLEX, 0.8,
                    (0, 255, 0) if pinching else (200, 200, 200), 2)

    cv2.imshow("preview (q to quit)", frame)
    key = cv2.waitKey(1) & 0xFF
    if key in (ord('q'), 27):
        break
    if key == ord('c'):
        cap.release(); cv2.destroyAllWindows()
        return main()  # quick & dirty recalibrate

cap.release()
cv2.destroyAllWindows()

if name == "main": main()