From 807aafcbf3145dbaf680bf6c9b7cec20fb44cc9a Mon Sep 17 00:00:00 2001 From: jhao104 Date: Fri, 8 May 2026 21:57:53 +0800 Subject: [PATCH 1/2] =?UTF-8?q?[update]=20=E5=BC=95=E5=85=A5tox=E8=87=AA?= =?UTF-8?q?=E5=8A=A8=E5=8C=96=E6=B5=8B=E8=AF=95=EF=BC=8C=E6=94=BE=E5=BC=83?= =?UTF-8?q?Python=203.7=E4=BB=A5=E4=B8=8B=E7=89=88=E6=9C=AC=E6=94=AF?= =?UTF-8?q?=E6=8C=81?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - 新增tox.ini配置,支持Python 3.8-3.11多版本测试 - 新增GitHub Actions comprehensive-test.yml替代Travis CI - 移除.travis.yml,不再维护 - 更新README.md,移除Python 2.7/3.5/3.6/3.7版本标识 - .gitignore添加.tox目录忽略 --- .github/workflows/comprehensive-test.yml | 48 ++++++++++++++++++ .gitignore | 1 + .travis.yml | 16 ------ CLAUDE.md | 62 ++++++++++++++++++++++++ README.md | 5 +- tox.ini | 18 +++++++ 6 files changed, 130 insertions(+), 20 deletions(-) create mode 100644 .github/workflows/comprehensive-test.yml delete mode 100644 .travis.yml create mode 100644 CLAUDE.md create mode 100644 tox.ini diff --git a/.github/workflows/comprehensive-test.yml b/.github/workflows/comprehensive-test.yml new file mode 100644 index 000000000..3d8b3ada4 --- /dev/null +++ b/.github/workflows/comprehensive-test.yml @@ -0,0 +1,48 @@ +name: Comprehensive Tests + +on: + push: + branches: [master, develop] + pull_request: + branches: [master, develop] + +jobs: + test: + name: Python ${{ matrix.python-version }} + runs-on: ubuntu-latest + + strategy: + fail-fast: false + matrix: + python-version: ["3.8", "3.9", "3.10", "3.11"] + + services: + redis: + image: redis:latest + ports: + - 6379:6379 + options: >- + --health-cmd "redis-cli ping" + --health-interval 10s + --health-timeout 5s + --health-retries 5 + + steps: + - uses: actions/checkout@v4 + + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v5 + with: + python-version: ${{ matrix.python-version }} + + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install tox-uv + + - name: Run tests with tox + run: | + TOX_ENV="py$(echo ${{ matrix.python-version }} | tr -d '.')" + tox -e $TOX_ENV + env: + DB_CONN: redis://localhost:6379/0 \ No newline at end of file diff --git a/.gitignore b/.gitignore index 4dae2645e..1a3f1739f 100644 --- a/.gitignore +++ b/.gitignore @@ -2,3 +2,4 @@ docs/_build *.pyc *.log +.tox diff --git a/.travis.yml b/.travis.yml deleted file mode 100644 index 529d8cf2d..000000000 --- a/.travis.yml +++ /dev/null @@ -1,16 +0,0 @@ -language: python -python: - - "2.7" - - "3.5" - - "3.6" - - "3.7" - - "3.8" - - "3.9" - - "3.10" - - "3.11" -os: - - linux -install: - - pip install -r requirements.txt - -script: python test.py diff --git a/CLAUDE.md b/CLAUDE.md new file mode 100644 index 000000000..e430dcc01 --- /dev/null +++ b/CLAUDE.md @@ -0,0 +1,62 @@ +# CLAUDE.md + +本文件为 Claude Code (claude.ai/code) 在本仓库中工作时提供指导。 + +## 技术栈 +Python (3.8–3.12)、Flask (API)、Redis/SSDB (存储)、APScheduler (调度)、click (CLI)、gunicorn (生产服务器)。依赖版本固定记录在 `requirements.txt` 中。 + +## 常用命令 +- 安装依赖:`pip install -r requirements.txt` +- 运行代理爬取/验证调度器:`python proxyPool.py schedule` +- 运行 API 服务器:`python proxyPool.py server` +- 运行全部测试:`pytest` +- 运行单个测试:`pytest test/testProxyFetcher.py::test_freeProxy01` +- Docker 部署:`docker-compose up -d` 或 `docker run --env DB_CONN=redis://:password@ip:port/0 -p 5010:5010 jhao104/proxy_pool:latest` + +## 高层架构 +免费代理池项目,爬取公开代理源、验证代理可用性、持久化存储到 Redis/SSDB,并通过 Flask RESTful API 提供代理服务。 + +### 核心组件 +- **爬取器** (`fetcher/proxyFetcher.py`):`ProxyFetcher` 类,每个代理源对应一个静态方法,yield 出 `host:port` 字符串。通过 `setting.py` 中的 `PROXY_FETCHER` 列表启用对应爬取器。 +- **数据库层** (`db/`):抽象 `dbClient` 接口,包含 Redis (`redisClient.py`) 和 SSDB (`ssdbClient.py`) 两种实现。通过 `setting.py` 中的 `DB_CONN` 配置连接(格式:`redis://:pwd@ip:port/db` 或 `ssdb://:pwd@ip:port`)。 +- **调度器** (`helper/scheduler.py`):基于 APScheduler 的定时任务,驱动爬取器运行并触发验证。时区通过 `setting.py` 中的 `TIMEZONE` 配置。 +- **验证器** (`helper/validator.py`):使用 `HTTP_URL` (http://httpbin.org) 和 `HTTPS_URL` (https://www.qq.com) 测试代理,超时时间由 `VERIFY_TIMEOUT` 指定(默认 10 秒)。超过 `MAX_FAIL_COUNT` 的代理会被移除。当代理池数量低于 `POOL_SIZE_MIN`(默认 20)时触发重新爬取。 +- **API** (`api/proxyApi.py`):Flask 接口,包含以下端点: + - `/get`:随机获取一个代理(`?type=https` 可筛选 HTTPS 代理) + - `/pop`:获取并删除一个代理 + - `/all`:列出所有代理 + - `/count`:代理数量统计 + - `/delete`:通过 `?proxy=host:port` 删除指定代理 + - 服务运行在 `HOST:PORT`(默认 `0.0.0.0:5010`),配置来自 `setting.py`。 +- **命令行入口** (`proxyPool.py`):基于 click 的命令行工具,包含 `schedule` 和 `server` 两个子命令。 + +### 扩展代理源 +1. 在 `fetcher/proxyFetcher.py` 的 `ProxyFetcher` 类中新增一个静态方法,yield 出 `host:port` 字符串。 +2. 将该方名添加到 `setting.py` 的 `PROXY_FETCHER` 列表中。调度器会自动识别并启用新的代理源。 + +## 关键配置 +所有运行时配置均在 `setting.py` 中: +- `HOST`/`PORT`:API 绑定的地址和端口 +- `DB_CONN`:数据库连接字符串 +- `PROXY_FETCHER`:已启用的爬取器方法名列表 +- `HTTP_URL`/`HTTPS_URL`:验证目标 URL +- `VERIFY_TIMEOUT`:验证超时时间(默认 10 秒) +- `MAX_FAIL_COUNT`:代理被移除前允许的最大失败次数 +- `POOL_SIZE_MIN`:触发重新爬取的最小代理池数量阈值 +- `PROXY_REGION`:是否启用代理地区属性(默认 `True`) +- `TIMEZONE`:调度器时区(默认 `Asia/Shanghai`) + +## 代码风格与命名规范 +- **缩进**:4 个空格(Python 标准) +- **文件命名**:驼峰命名,如 `proxyFetcher.py`、`dbClient.py`、`redisClient.py`、`webRequest.py` +- **类命名**:帕斯卡命名,如 `ProxyFetcher`、`RedisClient`、`SsdbClient`、`ProxyValidator` +- **方法命名**:混合风格——数据库/爬取器方法使用驼峰命名(`getAll`、`getCount`、`changeTable`、`freeProxy01`),属性和辅助方法使用下划线命名(`user_agent`、`fail_count`、`check_count`) +- **爬取器方法**:命名为 `freeProxy` + 两位数字(如 `freeProxy01`、`freeProxy02`)。新增爬取器必须遵循此模式 +- **常量**(在 `setting.py` 中):大写下划线命名(`DB_CONN`、`PROXY_FETCHER`、`HTTP_URL`、`MAX_FAIL_COUNT`) +- **变量**:下划线命名(`proxy_obj`、`proxy_str`、`https`) +- **注释/文档字符串**:源文件头部和行内注释通常使用中文(普通话) +- **单例模式**:使用自定义 `Singleton` 元类(`util/singleton.py`)结合 `six.withMetaclass` 实现 + +## 注意事项 +- 免费代理稳定性较差,本项目仅供学习/演示使用。生产环境建议使用付费代理(如 Bright Data,详见 README)。 +- 本仓库中不存在 `.cursorrules` 或 GitHub Copilot 配置文件。 \ No newline at end of file diff --git a/README.md b/README.md index 0c970a345..89b0c2135 100644 --- a/README.md +++ b/README.md @@ -22,10 +22,7 @@ ProxyPool 爬虫代理IP池 * 文档: [document](https://proxy-pool.readthedocs.io/zh/latest/) [![Documentation Status](https://readthedocs.org/projects/proxy-pool/badge/?version=latest)](https://proxy-pool.readthedocs.io/zh/latest/?badge=latest) -* 支持版本: [![](https://img.shields.io/badge/Python-2.7-green.svg)](https://docs.python.org/2.7/) -[![](https://img.shields.io/badge/Python-3.5-blue.svg)](https://docs.python.org/3.5/) -[![](https://img.shields.io/badge/Python-3.6-blue.svg)](https://docs.python.org/3.6/) -[![](https://img.shields.io/badge/Python-3.7-blue.svg)](https://docs.python.org/3.7/) +* 支持版本: [![](https://img.shields.io/badge/Python-3.8-blue.svg)](https://docs.python.org/3.8/) [![](https://img.shields.io/badge/Python-3.9-blue.svg)](https://docs.python.org/3.9/) [![](https://img.shields.io/badge/Python-3.10-blue.svg)](https://docs.python.org/3.10/) diff --git a/tox.ini b/tox.ini new file mode 100644 index 000000000..6f993ff95 --- /dev/null +++ b/tox.ini @@ -0,0 +1,18 @@ +[tox] +envlist = py38,py39,py310,py311 +skip_missing_interpreters = true + +[testenv] +deps = + six + requests + pyquery + gunicorn + lxml + redis + APScheduler==3.2.0;python_version<"3.10" + APScheduler==3.10.0;python_version>="3.10" + click==8.0.1 + Flask==2.1.1 + werkzeug==2.1.0 +commands = python test.py \ No newline at end of file From abb36a07651d4dea38fab80baaea30f503461b44 Mon Sep 17 00:00:00 2001 From: J_hao104 Date: Fri, 8 May 2026 22:07:20 +0800 Subject: [PATCH 2/2] Update README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index fc967ee5e..0eb8a7157 100644 --- a/README.md +++ b/README.md @@ -1,7 +1,7 @@ ProxyPool 爬虫代理IP池 ======= -[![Build Status](https://travis-ci.org/jhao104/proxy_pool.svg?branch=master)](https://travis-ci.org/jhao104/proxy_pool) +[![Comprehensive Tests](https://github.com/jhao104/proxy_pool/actions/workflows/comprehensive-test.yml/badge.svg?branch=master)](https://github.com/jhao104/proxy_pool/actions/workflows/comprehensive-test.yml) [![](https://img.shields.io/badge/Powered%20by-@j_hao104-green.svg)](http://www.spiderpy.cn/blog/) [![Packagist](https://img.shields.io/packagist/l/doctrine/orm.svg)](https://github.com/jhao104/proxy_pool/blob/master/LICENSE) [![GitHub contributors](https://img.shields.io/github/contributors/jhao104/proxy_pool.svg)](https://github.com/jhao104/proxy_pool/graphs/contributors)