capsolver-spanish-examples/python/image_recognition/vision_engine.py at main · returnyso/capsolver-spanish-examples · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
import os
import base64
import requests
from dotenv import load_dotenv

# Cargar variables de entorno desde el archivo .env
load_dotenv()

# Configuracion de la API de CapSolver
CAPSOLVER_API_KEY = os.getenv("CAPSOLVER_API_KEY")
CREATE_TASK_URL = "https://api.capsolver.com/createTask"

# Modulos de Vision Engine
MODULES = {
    "slider_1": "Captcha slider - requiere imagen e imagenFondo",
    "rotate_1": "Deteccion de rotacion - requiere imagen e imagenFondo",
    "rotate_2": "Rotacion independiente - requiere solo imagen",
    "shein": "Seleccion multi-objeto con coordenadas - requiere imagen y pregunta"
}


def solve_slider(image_base64, background_base64):
    """
    Resolver captcha de slider.

    Args:
        image_base64: Imagen de la pieza del slider codificada en Base64
        background_base64: Imagen del fondo codificada en Base64

    Returns:
        Solucion conteniendo la distancia a deslizar
    """
    payload = {
        "clientKey": CAPSOLVER_API_KEY,
        "task": {
            "type": "VisionEngine",
            "module": "slider_1",
            "image": image_base64,
            "imageBackground": background_base64
        }
    }

    response = requests.post(CREATE_TASK_URL, json=payload)
    result = response.json()

    if result.get("errorId") != 0:
        raise Exception(f"Error al resolver: {result.get('errorDescription')}")

    return result.get("solution", {})


def solve_rotate(image_base64, background_base64=None, module="rotate_1"):
    """
    Resolver captcha de rotacion.

    Args:
        image_base64: Imagen a rotar codificada en Base64
        background_base64: Fondo codificado en Base64 (requerido para rotate_1)
        module: "rotate_1" (con fondo) o "rotate_2" (independiente)

    Returns:
        Solucion conteniendo el angulo a rotar
    """
    payload = {
        "clientKey": CAPSOLVER_API_KEY,
        "task": {
            "type": "VisionEngine",
            "module": module,
            "image": image_base64
        }
    }

    if background_base64 and module == "rotate_1":
        payload["task"]["imageBackground"] = background_base64

    response = requests.post(CREATE_TASK_URL, json=payload)
    result = response.json()

    if result.get("errorId") != 0:
        raise Exception(f"Error al resolver: {result.get('errorDescription')}")

    return result.get("solution", {})


def solve_shein(image_base64, question):
    """
    Resolver captcha de seleccion multi-objeto estilo Shein.

    Args:
        image_base64: Imagen codificada en Base64
        question: El tipo de objeto a identificar

    Returns:
        Solucion conteniendo rects (arrays de coordenadas)
    """
    payload = {
        "clientKey": CAPSOLVER_API_KEY,
        "task": {
            "type": "VisionEngine",
            "module": "shein",
            "image": image_base64,
            "question": question
        }
    }

    response = requests.post(CREATE_TASK_URL, json=payload)
    result = response.json()

    if result.get("errorId") != 0:
        raise Exception(f"Error al resolver: {result.get('errorDescription')}")

    return result.get("solution", {})


def load_image_as_base64(image_path):
    """Funcion auxiliar para cargar imagen como base64."""
    with open(image_path, "rb") as f:
        return base64.b64encode(f.read()).decode("utf-8")


def main():
    if not CAPSOLVER_API_KEY:
        print("Error: CAPSOLVER_API_KEY no encontrada en el archivo .env")
        print("Por favor, crea un archivo .env con tu clave API:")
        print("CAPSOLVER_API_KEY=tu_clave_api_aqui")
        return

    print("Vision Engine - Resolutor de Captcha Slider/Rotacion")

    print("\nModulos disponibles:")
    for module, desc in MODULES.items():
        print(f"  {module}: {desc}")

    print("\nEjemplo de uso:")
    print("\n  # Captcha slider:")
    print('  slider_img = load_image_as_base64("slider.png")')
    print('  bg_img = load_image_as_base64("fondo.png")')
    print("  solution = solve_slider(slider_img, bg_img)")
    print("  # Devuelve: {'distance': 150}  # pixeles a deslizar")

    print("\n  # Captcha de rotacion (con fondo):")
    print("  solution = solve_rotate(image_base64, background_base64, module='rotate_1')")
    print("  # Devuelve: {'angle': 45}  # grados a rotar")

    print("\n  # Captcha de rotacion (independiente):")
    print("  solution = solve_rotate(image_base64, module='rotate_2')")
    print("  # Devuelve: {'angle': 90}")

    print("\n  # Seleccion multi-objeto Shein:")
    print('  solution = solve_shein(image_base64, "Selecciona todos los zapatos")')
    print("  # Devuelve: {'rects': [[x1,y1,x2,y2], [x1,y1,x2,y2]]}  # coordenadas de objetos")


if __name__ == "__main__":
    main()