capsolver-documentation-examples/python/image_recognition/vision_engine.py at main · returnyso/capsolver-documentation-examples · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
import os
import base64
import requests
from dotenv import load_dotenv

# Load environment variables from .env file
load_dotenv()

# CapSolver API configuration
CAPSOLVER_API_KEY = os.getenv("CAPSOLVER_API_KEY")
CREATE_TASK_URL = "https://api.capsolver.com/createTask"

# Vision Engine modules
MODULES = {
    "slider_1": "Slider captcha - requires image and imageBackground",
    "rotate_1": "Rotation detection - requires image and imageBackground",
    "rotate_2": "Standalone rotation - requires only image",
    "shein": "Multi-object selection with coordinates - requires image and question"
}


def solve_slider(image_base64, background_base64):
    """
    Solve slider captcha.

    Args:
        image_base64: Base64 encoded slider piece image
        background_base64: Base64 encoded background image

    Returns:
        Solution containing distance to slide
    """
    payload = {
        "clientKey": CAPSOLVER_API_KEY,
        "task": {
            "type": "VisionEngine",
            "module": "slider_1",
            "image": image_base64,
            "imageBackground": background_base64
        }
    }

    response = requests.post(CREATE_TASK_URL, json=payload)
    result = response.json()

    if result.get("errorId") != 0:
        raise Exception(f"Failed to solve: {result.get('errorDescription')}")

    return result.get("solution", {})


def solve_rotate(image_base64, background_base64=None, module="rotate_1"):
    """
    Solve rotation captcha.

    Args:
        image_base64: Base64 encoded image to rotate
        background_base64: Base64 encoded background (required for rotate_1)
        module: "rotate_1" (with background) or "rotate_2" (standalone)

    Returns:
        Solution containing angle to rotate
    """
    payload = {
        "clientKey": CAPSOLVER_API_KEY,
        "task": {
            "type": "VisionEngine",
            "module": module,
            "image": image_base64
        }
    }

    if background_base64 and module == "rotate_1":
        payload["task"]["imageBackground"] = background_base64

    response = requests.post(CREATE_TASK_URL, json=payload)
    result = response.json()

    if result.get("errorId") != 0:
        raise Exception(f"Failed to solve: {result.get('errorDescription')}")

    return result.get("solution", {})


def solve_shein(image_base64, question):
    """
    Solve Shein-style multi-object selection captcha.

    Args:
        image_base64: Base64 encoded image
        question: The object type to identify

    Returns:
        Solution containing rects (coordinate arrays)
    """
    payload = {
        "clientKey": CAPSOLVER_API_KEY,
        "task": {
            "type": "VisionEngine",
            "module": "shein",
            "image": image_base64,
            "question": question
        }
    }

    response = requests.post(CREATE_TASK_URL, json=payload)
    result = response.json()

    if result.get("errorId") != 0:
        raise Exception(f"Failed to solve: {result.get('errorDescription')}")

    return result.get("solution", {})


def load_image_as_base64(image_path):
    """Helper to load image as base64."""
    with open(image_path, "rb") as f:
        return base64.b64encode(f.read()).decode("utf-8")


def main():
    if not CAPSOLVER_API_KEY:
        print("Error: CAPSOLVER_API_KEY not found in .env file")
        print("Please create a .env file with your API key:")
        print("CAPSOLVER_API_KEY=your_api_key_here")
        return

    print("Vision Engine - Slider/Rotate Captcha Solver")

    print("\nAvailable modules:")
    for module, desc in MODULES.items():
        print(f"  {module}: {desc}")

    print("\nExample usage:")
    print("\n  # Slider captcha:")
    print('  slider_img = load_image_as_base64("slider.png")')
    print('  bg_img = load_image_as_base64("background.png")')
    print("  solution = solve_slider(slider_img, bg_img)")
    print("  # Returns: {'distance': 150}  # pixels to slide")

    print("\n  # Rotation captcha (with background):")
    print("  solution = solve_rotate(image_base64, background_base64, module='rotate_1')")
    print("  # Returns: {'angle': 45}  # degrees to rotate")

    print("\n  # Rotation captcha (standalone):")
    print("  solution = solve_rotate(image_base64, module='rotate_2')")
    print("  # Returns: {'angle': 90}")

    print("\n  # Shein multi-object selection:")
    print('  solution = solve_shein(image_base64, "Select all shoes")')
    print("  # Returns: {'rects': [[x1,y1,x2,y2], [x1,y1,x2,y2]]}  # object coordinates")


if __name__ == "__main__":
    main()