From 72af7eb9c395e2d592f9a334b6ace98508e4c60b Mon Sep 17 00:00:00 2001 From: Marfin <248203147+arifinahmad99-cloud@users.noreply.github.com> Date: Sun, 11 Jan 2026 07:36:43 +0800 Subject: [PATCH 01/19] Implement multi-language support for UI Added multi-language support with English, Chinese, Spanish, French, German, and Japanese translations for various UI elements and messages. --- website/Teranslate.js | 331 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 331 insertions(+) create mode 100644 website/Teranslate.js diff --git a/website/Teranslate.js b/website/Teranslate.js new file mode 100644 index 0000000..a713821 --- /dev/null +++ b/website/Teranslate.js @@ -0,0 +1,331 @@ +-- ============================================= +-- EXPLOREPI LANGUAGE DATA INSTALLATION +-- Complete Multi-Language Support +-- ============================================= + +USE explorepi; + +-- Clear existing language data (optional) +-- TRUNCATE TABLE language_data; + +-- ============================================= +-- ENGLISH (en) - Complete Translation +-- ============================================= + +INSERT INTO language_data (lang_code, lang_key, lang_value, category) VALUES +-- Navigation +('en', 'nav.home', 'Home', 'navigation'), +('en', 'nav.blocks', 'Blocks', 'navigation'), +('en', 'nav.transactions', 'Transactions', 'navigation'), +('en', 'nav.tokens', 'Tokens', 'navigation'), +('en', 'nav.contracts', 'Contracts', 'navigation'), +('en', 'nav.statistics', 'Statistics', 'navigation'), +('en', 'nav.search', 'Search', 'navigation'), +('en', 'nav.about', 'About', 'navigation'), +('en', 'nav.api', 'API', 'navigation'), + +-- Common Terms +('en', 'common.loading', 'Loading...', 'common'), +('en', 'common.error', 'Error', 'common'), +('en', 'common.success', 'Success', 'common'), +('en', 'common.warning', 'Warning', 'common'), +('en', 'common.info', 'Information', 'common'), +('en', 'common.block', 'Block', 'common'), +('en', 'common.transaction', 'Transaction', 'common'), +('en', 'common.address', 'Address', 'common'), +('en', 'common.token', 'Token', 'common'), +('en', 'common.contract', 'Smart Contract', 'common'), +('en', 'common.view_all', 'View All', 'common'), +('en', 'common.view_more', 'View More', 'common'), +('en', 'common.details', 'Details', 'common'), +('en', 'common.copy', 'Copy', 'common'), +('en', 'common.copied', 'Copied!', 'common'), +('en', 'common.ago', 'ago', 'common'), +('en', 'common.total', 'Total', 'common'), +('en', 'common.amount', 'Amount', 'common'), +('en', 'common.price', 'Price', 'common'), +('en', 'common.value', 'Value', 'common'), + +-- Search +('en', 'search.placeholder', 'Search by Address / Txn Hash / Block / Token', 'search'), +('en', 'search.searching', 'Searching...', 'search'), +('en', 'search.no_results', 'No results found', 'search'), +('en', 'search.invalid_input', 'Invalid search input', 'search'), +('en', 'search.enter_query', 'Enter your search query', 'search'), + +-- Home Page +('en', 'home.title', 'Pi Network Block Explorer', 'home'), +('en', 'home.subtitle', 'Explore the Pi Blockchain', 'home'), +('en', 'home.latest_blocks', 'Latest Blocks', 'home'), +('en', 'home.latest_transactions', 'Latest Transactions', 'home'), +('en', 'home.network_stats', 'Network Statistics', 'home'), +('en', 'home.total_blocks', 'Total Blocks', 'home'), +('en', 'home.total_transactions', 'Total Transactions', 'home'), +('en', 'home.total_addresses', 'Total Addresses', 'home'), +('en', 'home.avg_block_time', 'Avg Block Time', 'home'), +('en', 'home.welcome', 'Welcome to Pi Network Explorer', 'home'), + +-- Block Details +('en', 'block.height', 'Block Height', 'block'), +('en', 'block.hash', 'Block Hash', 'block'), +('en', 'block.parent_hash', 'Parent Hash', 'block'), +('en', 'block.timestamp', 'Timestamp', 'block'), +('en', 'block.transactions', 'Transactions', 'block'), +('en', 'block.miner', 'Miner', 'block'), +('en', 'block.difficulty', 'Difficulty', 'block'), +('en', 'block.total_difficulty', 'Total Difficulty', 'block'), +('en', 'block.size', 'Size', 'block'), +('en', 'block.gas_used', 'Gas Used', 'block'), +('en', 'block.gas_limit', 'Gas Limit', 'block'), +('en', 'block.nonce', 'Nonce', 'block'), +('en', 'block.state_root', 'State Root', 'block'), +('en', 'block.receipts_root', 'Receipts Root', 'block'), +('en', 'block.transactions_root', 'Transactions Root', 'block'), +('en', 'block.extra_data', 'Extra Data', 'block'), +('en', 'block.not_found', 'Block not found', 'block'), +('en', 'block.overview', 'Block Overview', 'block'), + +-- Transaction Details +('en', 'tx.hash', 'Transaction Hash', 'transaction'), +('en', 'tx.status', 'Status', 'transaction'), +('en', 'tx.success', 'Success', 'transaction'), +('en', 'tx.failed', 'Failed', 'transaction'), +('en', 'tx.pending', 'Pending', 'transaction'), +('en', 'tx.block', 'Block', 'transaction'), +('en', 'tx.from', 'From', 'transaction'), +('en', 'tx.to', 'To', 'transaction'), +('en', 'tx.contract_creation', 'Contract Creation', 'transaction'), +('en', 'tx.value', 'Value', 'transaction'), +('en', 'tx.fee', 'Transaction Fee', 'transaction'), +('en', 'tx.gas_price', 'Gas Price', 'transaction'), +('en', 'tx.gas_limit', 'Gas Limit', 'transaction'), +('en', 'tx.gas_used', 'Gas Used', 'transaction'), +('en', 'tx.nonce', 'Nonce', 'transaction'), +('en', 'tx.input_data', 'Input Data', 'transaction'), +('en', 'tx.logs', 'Logs', 'transaction'), +('en', 'tx.not_found', 'Transaction not found', 'transaction'), + +-- Address Details +('en', 'address.overview', 'Address Overview', 'address'), +('en', 'address.balance', 'Balance', 'address'), +('en', 'address.transactions', 'Transactions', 'address'), +('en', 'address.token_transfers', 'Token Transfers', 'address'), +('en', 'address.is_contract', 'Smart Contract', 'address'), +('en', 'address.creator', 'Creator', 'address'), +('en', 'address.creation_tx', 'Creation Transaction', 'address'), +('en', 'address.first_seen', 'First Seen', 'address'), +('en', 'address.last_seen', 'Last Seen', 'address'), +('en', 'address.not_found', 'Address not found', 'address'), + +-- Token Details +('en', 'token.name', 'Token Name', 'token'), +('en', 'token.symbol', 'Symbol', 'token'), +('en', 'token.decimals', 'Decimals', 'token'), +('en', 'token.total_supply', 'Total Supply', 'token'), +('en', 'token.holders', 'Holders', 'token'), +('en', 'token.transfers', 'Transfers', 'token'), +('en', 'token.type', 'Token Type', 'token'), +('en', 'token.contract', 'Contract Address', 'token'), +('en', 'token.not_found', 'Token not found', 'token'), + +-- Contract Details +('en', 'contract.address', 'Contract Address', 'contract'), +('en', 'contract.creator', 'Creator', 'contract'), +('en', 'contract.creation_tx', 'Creation Transaction', 'contract'), +('en', 'contract.creation_block', 'Creation Block', 'contract'), +('en', 'contract.verified', 'Verified', 'contract'), +('en', 'contract.unverified', 'Not Verified', 'contract'), +('en', 'contract.source_code', 'Source Code', 'contract'), +('en', 'contract.abi', 'Contract ABI', 'contract'), +('en', 'contract.bytecode', 'Bytecode', 'contract'), +('en', 'contract.compiler_version', 'Compiler Version', 'contract'), +('en', 'contract.optimization', 'Optimization', 'contract'), +('en', 'contract.runs', 'Runs', 'contract'), +('en', 'contract.not_found', 'Contract not found', 'contract'), + +-- Time Units +('en', 'time.seconds', 'seconds', 'time'), +('en', 'time.minutes', 'minutes', 'time'), +('en', 'time.hours', 'hours', 'time'), +('en', 'time.days', 'days', 'time'), +('en', 'time.months', 'months', 'time'), +('en', 'time.years', 'years', 'time'), +('en', 'time.ago', 'ago', 'time'), +('en', 'time.just_now', 'just now', 'time'), + +-- Error Messages +('en', 'error.general', 'An error occurred', 'error'), +('en', 'error.not_found', 'Not found', 'error'), +('en', 'error.invalid_address', 'Invalid address', 'error'), +('en', 'error.invalid_tx_hash', 'Invalid transaction hash', 'error'), +('en', 'error.invalid_block', 'Invalid block number', 'error'), +('en', 'error.database', 'Database error', 'error'), +('en', 'error.network', 'Network error', 'error'), +('en', 'error.connection', 'Connection error', 'error'); + +-- ============================================= +-- CHINESE (zh) - 中文翻译 +-- ============================================= + +INSERT INTO language_data (lang_code, lang_key, lang_value, category) VALUES +('zh', 'nav.home', '首页', 'navigation'), +('zh', 'nav.blocks', '区块', 'navigation'), +('zh', 'nav.transactions', '交易', 'navigation'), +('zh', 'nav.tokens', '代币', 'navigation'), +('zh', 'nav.contracts', '合约', 'navigation'), +('zh', 'nav.statistics', '统计', 'navigation'), +('zh', 'nav.search', '搜索', 'navigation'), + +('zh', 'common.loading', '加载中...', 'common'), +('zh', 'common.error', '错误', 'common'), +('zh', 'common.success', '成功', 'common'), +('zh', 'common.block', '区块', 'common'), +('zh', 'common.transaction', '交易', 'common'), +('zh', 'common.address', '地址', 'common'), +('zh', 'common.token', '代币', 'common'), +('zh', 'common.view_all', '查看全部', 'common'), +('zh', 'common.details', '详情', 'common'), + +('zh', 'search.placeholder', '搜索地址 / 交易哈希 / 区块 / 代币', 'search'), +('zh', 'search.no_results', '未找到结果', 'search'), + +('zh', 'home.title', 'Pi网络区块浏览器', 'home'), +('zh', 'home.latest_blocks', '最新区块', 'home'), +('zh', 'home.latest_transactions', '最新交易', 'home'), +('zh', 'home.total_blocks', '总区块数', 'home'), + +('zh', 'block.height', '区块高度', 'block'), +('zh', 'block.hash', '区块哈希', 'block'), +('zh', 'block.timestamp', '时间戳', 'block'), +('zh', 'block.transactions', '交易', 'block'), +('zh', 'block.miner', '矿工', 'block'), + +('zh', 'tx.hash', '交易哈希', 'transaction'), +('zh', 'tx.status', '状态', 'transaction'), +('zh', 'tx.success', '成功', 'transaction'), +('zh', 'tx.failed', '失败', 'transaction'), +('zh', 'tx.from', '发送方', 'transaction'), +('zh', 'tx.to', '接收方', 'transaction'); + +-- ============================================= +-- SPANISH (es) - Español +-- ============================================= + +INSERT INTO language_data (lang_code, lang_key, lang_value, category) VALUES +('es', 'nav.home', 'Inicio', 'navigation'), +('es', 'nav.blocks', 'Bloques', 'navigation'), +('es', 'nav.transactions', 'Transacciones', 'navigation'), +('es', 'nav.tokens', 'Tokens', 'navigation'), +('es', 'nav.contracts', 'Contratos', 'navigation'), +('es', 'nav.statistics', 'Estadísticas', 'navigation'), +('es', 'nav.search', 'Buscar', 'navigation'), + +('es', 'common.loading', 'Cargando...', 'common'), +('es', 'common.error', 'Error', 'common'), +('es', 'common.success', 'Éxito', 'common'), +('es', 'common.block', 'Bloque', 'common'), +('es', 'common.transaction', 'Transacción', 'common'), +('es', 'common.address', 'Dirección', 'common'), +('es', 'common.token', 'Token', 'common'), +('es', 'common.view_all', 'Ver Todo', 'common'), +('es', 'common.details', 'Detalles', 'common'), + +('es', 'search.placeholder', 'Buscar por Dirección / Hash de Tx / Bloque / Token', 'search'), +('es', 'search.no_results', 'No se encontraron resultados', 'search'), + +('es', 'home.title', 'Explorador de Bloques de Pi Network', 'home'), +('es', 'home.latest_blocks', 'Últimos Bloques', 'home'), +('es', 'home.latest_transactions', 'Últimas Transacciones', 'home'), + +('es', 'block.height', 'Altura del Bloque', 'block'), +('es', 'block.hash', 'Hash del Bloque', 'block'), +('es', 'block.timestamp', 'Marca de Tiempo', 'block'), +('es', 'block.transactions', 'Transacciones', 'block'), + +('es', 'tx.hash', 'Hash de Transacción', 'transaction'), +('es', 'tx.status', 'Estado', 'transaction'), +('es', 'tx.success', 'Éxito', 'transaction'), +('es', 'tx.failed', 'Fallido', 'transaction'); + +-- ============================================= +-- FRENCH (fr) - Français +-- ============================================= + +INSERT INTO language_data (lang_code, lang_key, lang_value, category) VALUES +('fr', 'nav.home', 'Accueil', 'navigation'), +('fr', 'nav.blocks', 'Blocs', 'navigation'), +('fr', 'nav.transactions', 'Transactions', 'navigation'), +('fr', 'nav.tokens', 'Jetons', 'navigation'), +('fr', 'nav.contracts', 'Contrats', 'navigation'), +('fr', 'nav.search', 'Rechercher', 'navigation'), + +('fr', 'common.loading', 'Chargement...', 'common'), +('fr', 'common.error', 'Erreur', 'common'), +('fr', 'common.block', 'Bloc', 'common'), +('fr', 'common.transaction', 'Transaction', 'common'), +('fr', 'common.address', 'Adresse', 'common'), +('fr', 'common.details', 'Détails', 'common'), + +('fr', 'search.placeholder', 'Rechercher par Adresse / Hash / Bloc / Jeton', 'search'), + +('fr', 'home.title', 'Explorateur de Blocs Pi Network', 'home'), +('fr', 'home.latest_blocks', 'Derniers Blocs', 'home'), +('fr', 'home.latest_transactions', 'Dernières Transactions', 'home'); + +-- ============================================= +-- GERMAN (de) - Deutsch +-- ============================================= + +INSERT INTO language_data (lang_code, lang_key, lang_value, category) VALUES +('de', 'nav.home', 'Startseite', 'navigation'), +('de', 'nav.blocks', 'Blöcke', 'navigation'), +('de', 'nav.transactions', 'Transaktionen', 'navigation'), +('de', 'nav.tokens', 'Token', 'navigation'), +('de', 'nav.contracts', 'Verträge', 'navigation'), +('de', 'nav.search', 'Suchen', 'navigation'), + +('de', 'common.loading', 'Laden...', 'common'), +('de', 'common.error', 'Fehler', 'common'), +('de', 'common.block', 'Block', 'common'), +('de', 'common.transaction', 'Transaktion', 'common'), +('de', 'common.address', 'Adresse', 'common'), +('de', 'common.details', 'Details', 'common'), + +('de', 'home.title', 'Pi Network Block Explorer', 'home'), +('de', 'home.latest_blocks', 'Neueste Blöcke', 'home'), +('de', 'home.latest_transactions', 'Neueste Transaktionen', 'home'); + +-- ============================================= +-- JAPANESE (ja) - 日本語 +-- ============================================= + +INSERT INTO language_data (lang_code, lang_key, lang_value, category) VALUES +('ja', 'nav.home', 'ホーム', 'navigation'), +('ja', 'nav.blocks', 'ブロック', 'navigation'), +('ja', 'nav.transactions', 'トランザクション', 'navigation'), +('ja', 'nav.tokens', 'トークン', 'navigation'), +('ja', 'nav.search', '検索', 'navigation'), + +('ja', 'common.loading', '読み込み中...', 'common'), +('ja', 'common.error', 'エラー', 'common'), +('ja', 'common.block', 'ブロック', 'common'), +('ja', 'common.transaction', 'トランザクション', 'common'), +('ja', 'common.address', 'アドレス', 'common'), + +('ja', 'home.title', 'Piネットワーク ブロックエクスプローラー', 'home'), +('ja', 'home.latest_blocks', '最新ブロック', 'home'), +('ja', 'home.latest_transactions', '最新トランザクション', 'home'); + +-- ============================================= +-- VERIFY INSTALLATION +-- ============================================= + +SELECT + lang_code, + COUNT(*) as total_translations, + COUNT(DISTINCT category) as categories +FROM language_data +GROUP BY lang_code +ORDER BY lang_code; + +SELECT 'Language data installation completed!' AS status; From d4a3a359c2fce713005f87e55ec8c35c3d4f3091 Mon Sep 17 00:00:00 2001 From: Marfin <248203147+arifinahmad99-cloud@users.noreply.github.com> Date: Sun, 11 Jan 2026 12:09:14 +0800 Subject: [PATCH 02/19] Add GitHub Actions workflow for Azure Node.js deployment This workflow builds and deploys a Node.js application to Azure Web App on commits to the main branch. --- .github/workflows/azure-webapps-node.yml | 78 ++++++++++++++++++++++++ 1 file changed, 78 insertions(+) create mode 100644 .github/workflows/azure-webapps-node.yml diff --git a/.github/workflows/azure-webapps-node.yml b/.github/workflows/azure-webapps-node.yml new file mode 100644 index 0000000..2ebbac2 --- /dev/null +++ b/.github/workflows/azure-webapps-node.yml @@ -0,0 +1,78 @@ +# This workflow will build and push a node.js application to an Azure Web App when a commit is pushed to your default branch. +# +# This workflow assumes you have already created the target Azure App Service web app. +# For instructions see https://docs.microsoft.com/en-us/azure/app-service/quickstart-nodejs?tabs=linux&pivots=development-environment-cli +# +# To configure this workflow: +# +# 1. Download the Publish Profile for your Azure Web App. You can download this file from the Overview page of your Web App in the Azure Portal. +# For more information: https://docs.microsoft.com/en-us/azure/app-service/deploy-github-actions?tabs=applevel#generate-deployment-credentials +# +# 2. Create a secret in your repository named AZURE_WEBAPP_PUBLISH_PROFILE, paste the publish profile contents as the value of the secret. +# For instructions on obtaining the publish profile see: https://docs.microsoft.com/azure/app-service/deploy-github-actions#configure-the-github-secret +# +# 3. Change the value for the AZURE_WEBAPP_NAME. Optionally, change the AZURE_WEBAPP_PACKAGE_PATH and NODE_VERSION environment variables below. +# +# For more information on GitHub Actions for Azure: https://github.com/Azure/Actions +# For more information on the Azure Web Apps Deploy action: https://github.com/Azure/webapps-deploy +# For more samples to get started with GitHub Action workflows to deploy to Azure: https://github.com/Azure/actions-workflow-samples + +on: + push: + branches: [ "main" ] + workflow_dispatch: + +env: + AZURE_WEBAPP_NAME: your-app-name # set this to your application's name + AZURE_WEBAPP_PACKAGE_PATH: '.' # set this to the path to your web app project, defaults to the repository root + NODE_VERSION: '20.x' # set this to the node version to use + +permissions: + contents: read + +jobs: + build: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + + - name: Set up Node.js + uses: actions/setup-node@v4 + with: + node-version: ${{ env.NODE_VERSION }} + cache: 'npm' + + - name: npm install, build, and test + run: | + npm install + npm run build --if-present + npm run test --if-present + + - name: Upload artifact for deployment job + uses: actions/upload-artifact@v4 + with: + name: node-app + path: . + + deploy: + permissions: + contents: none + runs-on: ubuntu-latest + needs: build + environment: + name: 'Development' + url: ${{ steps.deploy-to-webapp.outputs.webapp-url }} + + steps: + - name: Download artifact from build job + uses: actions/download-artifact@v4 + with: + name: node-app + + - name: 'Deploy to Azure WebApp' + id: deploy-to-webapp + uses: azure/webapps-deploy@v2 + with: + app-name: ${{ env.AZURE_WEBAPP_NAME }} + publish-profile: ${{ secrets.AZURE_WEBAPP_PUBLISH_PROFILE }} + package: ${{ env.AZURE_WEBAPP_PACKAGE_PATH }} From f4f7155586faddb0fd58d537873bb6606b4cb2ca Mon Sep 17 00:00:00 2001 From: Marfin <248203147+arifinahmad99-cloud@users.noreply.github.com> Date: Fri, 16 Jan 2026 05:11:01 +0800 Subject: [PATCH 03/19] Update privacy policy last updated date --- website/public/policy.html | 71 ++++++++++++++------------------------ 1 file changed, 25 insertions(+), 46 deletions(-) diff --git a/website/public/policy.html b/website/public/policy.html index 0648d69..599b2e6 100644 --- a/website/public/policy.html +++ b/website/public/policy.html @@ -1,10 +1,10 @@

Privacy Policy

-

Last updated: March 14, 2023

-

This Privacy Policy describes Our policies and procedures on the collection, use and disclosure of Your information when You use the Service and tells You about Your privacy rights and how the law protects You.

+

Last updated: January 15, 2026

+

This Privacy Policy describes Our policies and procedures on the collection, use and disclosure of Your information when You use the Service and tells You about Your privacy rights and how the law [...]

We use Your Personal data to provide and improve the Service. By using the Service, You agree to the collection and use of information in accordance with this Privacy Policy.

Interpretation and Definitions

Interpretation

-

The words of which the initial letter is capitalized have meanings defined under the following conditions. The following definitions shall have the same meaning regardless of whether they appear in singular or in plural.

+

The words of which the initial letter is capitalized have meanings defined under the following conditions. The following definitions shall have the same meaning regardless of whether they appear in[...]

Definitions

For the purposes of this Privacy Policy:

\ No newline at end of file From af40f758903257058b03c79f481964ec1caffc78 Mon Sep 17 00:00:00 2001 From: Marfin <248203147+arifinahmad99-cloud@users.noreply.github.com> Date: Sun, 18 Jan 2026 22:14:38 +0800 Subject: [PATCH 04/19] Revert "corrected crawler README.md MySQL link" --- crawler/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crawler/README.md b/crawler/README.md index fe4f211..679b064 100644 --- a/crawler/README.md +++ b/crawler/README.md @@ -61,7 +61,7 @@ npm start ## ⛏️ Built Using -- [MYSQL](https://www.mysql.com/) - Database +- [MYSQL](https://www.mongodb.com/) - Database - [NodeJs](https://nodejs.org/en/) - Server Environment - [StellarSDK](https://github.com/stellar/js-stellar-sdk) - BlockchainTool ## ✍️ Authors From dd7f06ec921a7ceb5a0158ec80458d1ec13cbe6e Mon Sep 17 00:00:00 2001 From: Marfin <248203147+arifinahmad99-cloud@users.noreply.github.com> Date: Sun, 18 Jan 2026 23:17:12 +0800 Subject: [PATCH 05/19] Create README.md --- .github/workflows/README.md | 83 +++++++++++++++++++++++++++++++++++++ 1 file changed, 83 insertions(+) create mode 100644 .github/workflows/README.md diff --git a/.github/workflows/README.md b/.github/workflows/README.md new file mode 100644 index 0000000..cc583f4 --- /dev/null +++ b/.github/workflows/README.md @@ -0,0 +1,83 @@ +# Crawler + +A web crawler for collecting and processing data from specified sources. + +## Table of Contents + +- [Installation](#installation) +- [Configuration](#configuration) +- [Usage](#usage) +- [Database Setup](#database-setup) +- [Contributing](#contributing) + +## Installation + +Install the required dependencies: + +```bash +pip install -r requirements.txt +``` + +Ensure you have Python 3.8+ installed on your system. + +## Configuration + +### Environment Variables + +Create a `.env` file in the project root with the following variables: + +``` +DATABASE_HOST=localhost +DATABASE_USER=crawler_user +DATABASE_PASSWORD=your_password +DATABASE_NAME=crawler_db +``` + +Update these values according to your local environment. + +## Usage + +Run the crawler with: + +```bash +python crawler.py +``` + +Optional flags: +- `--verbose`: Enable detailed logging output +- `--limit N`: Limit crawling to N pages +- `--timeout S`: Set request timeout to S seconds + +## Database Setup + +### MySQL Configuration + +The crawler uses MySQL to store collected data. Follow these steps to set up your database: + +1. **Install MySQL**: Download and install from [MySQL Official Website](https://dev.mysql.com/downloads/mysql/) + +2. **Create Database and User**: + ```sql + CREATE DATABASE crawler_db; + CREATE USER 'crawler_user'@'localhost' IDENTIFIED BY 'your_password'; + GRANT ALL PRIVILEGES ON crawler_db.* TO 'crawler_user'@'localhost'; + FLUSH PRIVILEGES; + ``` + +3. **Initialize Tables**: Run the database migration script: + ```bash + python scripts/init_db.py + ``` + +### Connection Details + +- **Host**: localhost (default) +- **Port**: 3306 (default MySQL port) +- **User**: crawler_user +- **Database**: crawler_db + +Update the connection parameters in your `.env` file if using different settings. + +## Contributing + +Please read CONTRIBUTING.md for details on our code of conduct and the process for submitting pull requests. \ No newline at end of file From 453469bb3a53caccd68d01e9fbf2645eb0337fee Mon Sep 17 00:00:00 2001 From: Marfin <248203147+arifinahmad99-cloud@users.noreply.github.com> Date: Sun, 18 Jan 2026 23:24:43 +0800 Subject: [PATCH 06/19] Create CONTRIBUTING.md Reporting Bugs When reporting bugs, please include: Description: What you were trying to do Expected behavior: What should have happened Actual behavior: What actually happened Environment: Python version, OS, MySQL version Steps to reproduce: Clear steps to replicate the issue Error message: Full error traceback if available Screenshots: If applicable --- .github/workflows/CONTRIBUTING.md | 224 ++++++++++++++++++++++++++++++ 1 file changed, 224 insertions(+) create mode 100644 .github/workflows/CONTRIBUTING.md diff --git a/.github/workflows/CONTRIBUTING.md b/.github/workflows/CONTRIBUTING.md new file mode 100644 index 0000000..d652fba --- /dev/null +++ b/.github/workflows/CONTRIBUTING.md @@ -0,0 +1,224 @@ +# Contributing to Crawler + +Thank you for your interest in contributing to the Crawler project! We welcome contributions from everyone. This document provides guidelines and instructions for contributing. + +## Code of Conduct + +We are committed to providing a welcoming and inspiring community for all. Please be respectful and constructive in all interactions. Harassment, discrimination, or disruptive behavior will not be tolerated. + +## How to Contribute + +There are many ways to contribute to this project: + +- **Report bugs** by opening an issue with detailed information +- **Suggest features** with clear use cases and expected behavior +- **Improve documentation** by fixing typos or clarifying confusing sections +- **Submit code changes** by creating pull requests with meaningful improvements +- **Review pull requests** and provide constructive feedback to other contributors + +## Getting Started + +### Prerequisites + +- Python 3.8 or higher +- Git +- A MySQL database for testing (optional but recommended) +- A code editor or IDE of your choice + +### Setting Up Your Development Environment + +1. Fork the repository on GitHub +2. Clone your fork locally: + ```bash + git clone https://github.com/your-username/crawler.git + cd crawler + ``` +3. Create a virtual environment: + ```bash + python -m venv venv + source venv/bin/activate # On Windows: venv\Scripts\activate + ``` +4. Install development dependencies: + ```bash + pip install -r requirements-dev.txt + ``` +5. Create a local `.env` file for testing: + ```bash + cp .env.example .env + ``` + +## Making Changes + +### Branch Naming + +Create a descriptive branch name for your changes: +- `feature/add-proxy-support` +- `bugfix/fix-mysql-connection-timeout` +- `docs/improve-readme` +- `test/add-crawler-tests` + +```bash +git checkout -b feature/your-feature-name +``` + +### Code Style + +Follow these guidelines to maintain consistent code quality: + +- Use PEP 8 style guide for Python code +- Keep lines under 100 characters when possible +- Use meaningful variable and function names +- Add docstrings to functions and classes +- Use type hints where applicable + +Example: +```python +def fetch_url(url: str, timeout: int = 10) -> str: + """ + Fetch content from a given URL. + + Args: + url: The URL to fetch + timeout: Request timeout in seconds (default: 10) + + Returns: + The HTML content of the page + + Raises: + requests.exceptions.RequestException: If the request fails + """ + response = requests.get(url, timeout=timeout) + response.raise_for_status() + return response.text +``` + +### Testing + +Before submitting a pull request, ensure your code passes all tests: + +```bash +# Run all tests +pytest + +# Run tests with coverage +pytest --cov=crawler + +# Run specific test file +pytest tests/test_crawler.py +``` + +Write tests for new features: +```python +def test_fetch_url_success(): + """Test that fetch_url returns content for valid URLs.""" + result = fetch_url("https://example.com") + assert result is not None + assert len(result) > 0 +``` + +### Commits + +Write clear, descriptive commit messages: + +```bash +# Good +git commit -m "Add proxy support to crawler + +- Add ProxyManager class to handle proxy rotation +- Update fetch_url to accept proxy configuration +- Add tests for proxy connection handling" + +# Avoid +git commit -m "fix stuff" +git commit -m "changes" +``` + +## Submitting Changes + +### Pull Request Process + +1. Ensure all tests pass and code is formatted correctly +2. Push your branch to your fork: + ```bash + git push origin feature/your-feature-name + ``` +3. Open a pull request on GitHub with: + - A clear title describing the change + - A detailed description of what was changed and why + - Reference to any related issues (e.g., "Fixes #123") + - Screenshots or examples if applicable +4. Address review comments and make requested changes +5. Ensure the CI/CD pipeline passes +6. Once approved, your PR will be merged + +### Pull Request Template + +```markdown +## Description +Brief explanation of what this PR does. + +## Changes Made +- Change 1 +- Change 2 +- Change 3 + +## Related Issues +Fixes #123 + +## Testing +Describe how you tested these changes. + +## Checklist +- [ ] Code follows style guidelines +- [ ] Tests pass locally +- [ ] Documentation is updated +- [ ] No breaking changes (or documented in PR) +``` + +## Reporting Bugs + +When reporting bugs, please include: + +- **Description**: What you were trying to do +- **Expected behavior**: What should have happened +- **Actual behavior**: What actually happened +- **Environment**: Python version, OS, MySQL version +- **Steps to reproduce**: Clear steps to replicate the issue +- **Error message**: Full error traceback if available +- **Screenshots**: If applicable + +Example: +``` +Title: Crawler fails with timeout on large datasets + +Description: When crawling more than 10,000 pages, the crawler +consistently times out. + +Steps to reproduce: +1. Configure crawler with 15,000 pages +2. Run `python crawler.py` +3. After ~8,000 pages, connection fails + +Expected: Crawler should complete all 15,000 pages +Actual: Crawler crashes with timeout error + +Environment: Python 3.9, Ubuntu 20.04, MySQL 8.0 +``` + +## Suggesting Features + +When suggesting features, explain: + +- **Use case**: Why this feature is needed +- **Expected behavior**: How it should work +- **Alternative approaches**: Other possible implementations +- **Impact**: How it affects existing functionality + +## Documentation + +Help improve documentation by: + +- Fixing typos and grammatical errors +- Adding missing sections or examples +- Clarifying confusing explanations +- Adding inline code comments for complex logic From ea3c366a1d4168be3114f98ffa8948f26f2c96d7 Mon Sep 17 00:00:00 2001 From: Marfin <248203147+arifinahmad99-cloud@users.noreply.github.com> Date: Mon, 19 Jan 2026 01:37:41 +0800 Subject: [PATCH 07/19] Create node.yml --- .github/workflows/node.yml | 37 +++++++++++++++++++++++++++++++++++++ 1 file changed, 37 insertions(+) create mode 100644 .github/workflows/node.yml diff --git a/.github/workflows/node.yml b/.github/workflows/node.yml new file mode 100644 index 0000000..4016102 --- /dev/null +++ b/.github/workflows/node.yml @@ -0,0 +1,37 @@ +{{ env.NODE_VERSION }} + cache: 'npm' + + - name: npm install, build, and test + run: | + npm install + npm run build --if-present + npm run test --if-present + + - name: Upload artifact for deployment job + uses: actions/upload-artifact@v4 + with: + name: node-app + path: . + + deploy: + permissions: + contents: none + runs-on: ubuntu-latest + needs: build + environment: + name: 'Development' + url: ${{ steps.deploy-to-webapp.outputs.webapp-url }} + + steps: + - name: Download artifact from build job + uses: actions/download-artifact@v4 + with: + name: node-app + + - name: 'Deploy to Azure WebApp' + id: deploy-to-webapp + uses: azure/webapps-deploy@v2 + with: + app-name: ${{ env.AZURE_WEBAPP_NAME }} + publish-profile: ${{ secrets.AZURE_WEBAPP_PUBLISH_PROFILE }} + package: ${{ env.AZURE_WEBAPP_PACKAGE_PATH }} \ No newline at end of file From c81bf0c378c8dbb9b891edd4fcefad1690fdf2db Mon Sep 17 00:00:00 2001 From: Marfin <248203147+arifinahmad99-cloud@users.noreply.github.com> Date: Mon, 19 Jan 2026 08:02:46 +0800 Subject: [PATCH 08/19] Create CONTRIBUTING.md --- crawler/CONTRIBUTING.md | 224 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 224 insertions(+) create mode 100644 crawler/CONTRIBUTING.md diff --git a/crawler/CONTRIBUTING.md b/crawler/CONTRIBUTING.md new file mode 100644 index 0000000..22f3b8c --- /dev/null +++ b/crawler/CONTRIBUTING.md @@ -0,0 +1,224 @@ +# Contributing to Crawler + +Thank you for your interest in contributing to the Crawler project! We welcome contributions from everyone. This document provides guidelines and instructions for contributing. + +## Code of Conduct + +We are committed to providing a welcoming and inspiring community for all. Please be respectful and constructive in all interactions. Harassment, discrimination, or disruptive behavior will not be tolerated. + +## How to Contribute + +There are many ways to contribute to this project: + +- **Report bugs** by opening an issue with detailed information +- **Suggest features** with clear use cases and expected behavior +- **Improve documentation** by fixing typos or clarifying confusing sections +- **Submit code changes** by creating pull requests with meaningful improvements +- **Review pull requests** and provide constructive feedback to other contributors + +## Getting Started + +### Prerequisites + +- Python 3.8 or higher +- Git +- A MySQL database for testing (optional but recommended) +- A code editor or IDE of your choice + +### Setting Up Your Development Environment + +1. Fork the repository on GitHub +2. Clone your fork locally: + ```bash + git clone https://github.com/your-username/crawler.git + cd crawler + ``` +3. Create a virtual environment: + ```bash + python -m venv venv + source venv/bin/activate # On Windows: venv\Scripts\activate + ``` +4. Install development dependencies: + ```bash + pip install -r requirements-dev.txt + ``` +5. Create a local `.env` file for testing: + ```bash + cp .env.example .env + ``` + +## Making Changes + +### Branch Naming + +Create a descriptive branch name for your changes: +- `feature/add-proxy-support` +- `bugfix/fix-mysql-connection-timeout` +- `docs/improve-readme` +- `test/add-crawler-tests` + +```bash +git checkout -b feature/your-feature-name +``` + +### Code Style + +Follow these guidelines to maintain consistent code quality: + +- Use PEP 8 style guide for Python code +- Keep lines under 100 characters when possible +- Use meaningful variable and function names +- Add docstrings to functions and classes +- Use type hints where applicable + +Example: +```python +def fetch_url(url: str, timeout: int = 10) -> str: + """ + Fetch content from a given URL. + + Args: + url: The URL to fetch + timeout: Request timeout in seconds (default: 10) + + Returns: + The HTML content of the page + + Raises: + requests.exceptions.RequestException: If the request fails + """ + response = requests.get(url, timeout=timeout) + response.raise_for_status() + return response.text +``` + +### Testing + +Before submitting a pull request, ensure your code passes all tests: + +```bash +# Run all tests +pytest + +# Run tests with coverage +pytest --cov=crawler + +# Run specific test file +pytest tests/test_crawler.py +``` + +Write tests for new features: +```python +def test_fetch_url_success(): + """Test that fetch_url returns content for valid URLs.""" + result = fetch_url("https://example.com") + assert result is not None + assert len(result) > 0 +``` + +### Commits + +Write clear, descriptive commit messages: + +```bash +# Good +git commit -m "Add proxy support to crawler + +- Add ProxyManager class to handle proxy rotation +- Update fetch_url to accept proxy configuration +- Add tests for proxy connection handling" + +# Avoid +git commit -m "fix stuff" +git commit -m "changes" +``` + +## Submitting Changes + +### Pull Request Process + +1. Ensure all tests pass and code is formatted correctly +2. Push your branch to your fork: + ```bash + git push origin feature/your-feature-name + ``` +3. Open a pull request on GitHub with: + - A clear title describing the change + - A detailed description of what was changed and why + - Reference to any related issues (e.g., "Fixes #123") + - Screenshots or examples if applicable +4. Address review comments and make requested changes +5. Ensure the CI/CD pipeline passes +6. Once approved, your PR will be merged + +### Pull Request Template + +```markdown +## Description +Brief explanation of what this PR does. + +## Changes Made +- Change 1 +- Change 2 +- Change 3 + +## Related Issues +Fixes #123 + +## Testing +Describe how you tested these changes. + +## Checklist +- [ ] Code follows style guidelines +- [ ] Tests pass locally +- [ ] Documentation is updated +- [ ] No breaking changes (or documented in PR) +``` + +## Reporting Bugs + +When reporting bugs, please include: + +- **Description**: What you were trying to do +- **Expected behavior**: What should have happened +- **Actual behavior**: What actually happened +- **Environment**: Python version, OS, MySQL version +- **Steps to reproduce**: Clear steps to replicate the issue +- **Error message**: Full error traceback if available +- **Screenshots**: If applicable + +Example: +``` +Title: Crawler fails with timeout on large datasets + +Description: When crawling more than 10,000 pages, the crawler +consistently times out. + +Steps to reproduce: +1. Configure crawler with 15,000 pages +2. Run `python crawler.py` +3. After ~8,000 pages, connection fails + +Expected: Crawler should complete all 15,000 pages +Actual: Crawler crashes with timeout error + +Environment: Python 3.9, Ubuntu 20.04, MySQL 8.0 +``` + +## Suggesting Features + +When suggesting features, explain: + +- **Use case**: Why this feature is needed +- **Expected behavior**: How it should work +- **Alternative approaches**: Other possible implementations +- **Impact**: How it affects existing functionality + +## Documentation + +Help improve documentation by: + +- Fixing typos and grammatical errors +- Adding missing sections or examples +- Clarifying confusing explanations +- Adding inline code comments for complex logic \ No newline at end of file From bc3596690d1489f8520e06b76c342d21e77d87e6 Mon Sep 17 00:00:00 2001 From: Marfin <248203147+arifinahmad99-cloud@users.noreply.github.com> Date: Mon, 19 Jan 2026 10:08:37 +0800 Subject: [PATCH 09/19] Create LICENSE --- crawler/LICENSE | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) create mode 100644 crawler/LICENSE diff --git a/crawler/LICENSE b/crawler/LICENSE new file mode 100644 index 0000000..bc7a6e5 --- /dev/null +++ b/crawler/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2025 Crawler Project Contributors + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. \ No newline at end of file From 0862923773172cbda5ad2d49ccf14b88e0cb497d Mon Sep 17 00:00:00 2001 From: Marfin <248203147+arifinahmad99-cloud@users.noreply.github.com> Date: Wed, 11 Feb 2026 10:13:41 +0800 Subject: [PATCH 10/19] Add Python CI/CD workflow with JSON processing This workflow sets up a comprehensive CI/CD pipeline for Python projects, including code quality checks, JSON validation, unit testing, JSON data processing, database operations, deployment, and security scanning. --- .github/workflows/python-ci-cd.yml | 348 +++++++++++++++++++++++++++++ 1 file changed, 348 insertions(+) create mode 100644 .github/workflows/python-ci-cd.yml diff --git a/.github/workflows/python-ci-cd.yml b/.github/workflows/python-ci-cd.yml new file mode 100644 index 0000000..467dea9 --- /dev/null +++ b/.github/workflows/python-ci-cd.yml @@ -0,0 +1,348 @@ +name: Python CI/CD with JSON Data Processing + +on: + push: + branches: [ main, develop ] + pull_request: + branches: [ main, develop ] + workflow_dispatch: + +env: + PYTHON_VERSION: '3.11' + JSON_DATA_PATH: 'data' + +jobs: + # Job 1: Code Quality and Linting + code-quality: + runs-on: ubuntu-latest + name: Code Quality Check + + steps: + - name: Checkout repository + uses: actions/checkout@v4 + + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: ${{ env.PYTHON_VERSION }} + cache: 'pip' + + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install flake8 pylint black isort mypy + if [ -f requirements.txt ]; then pip install -r requirements.txt; fi + if [ -f requirements-dev.txt ]; then pip install -r requirements-dev.txt; fi + + - name: Lint with flake8 + run: | + # Stop the build if there are Python syntax errors or undefined names + flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics + # Exit-zero treats all errors as warnings + flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics + + - name: Check code formatting with black + run: | + black --check --diff . + + - name: Check import sorting with isort + run: | + isort --check-only --diff . + + - name: Type checking with mypy + run: | + mypy . --ignore-missing-imports || true + + # Job 2: JSON Validation and Schema Check + json-validation: + runs-on: ubuntu-latest + name: Validate JSON Files + + steps: + - name: Checkout repository + uses: actions/checkout@v4 + + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: ${{ env.PYTHON_VERSION }} + + - name: Install JSON validation tools + run: | + pip install jsonschema pyjson5 pyyaml + + - name: Validate JSON syntax + run: | + echo "Validating JSON files..." + python -c " + import json + import os + import sys + + errors = [] + for root, dirs, files in os.walk('.'): + for file in files: + if file.endswith('.json'): + filepath = os.path.join(root, file) + try: + with open(filepath, 'r', encoding='utf-8') as f: + json.load(f) + print(f'✓ {filepath} is valid') + except json.JSONDecodeError as e: + errors.append(f'{filepath}: {str(e)}') + print(f'✗ {filepath} is invalid: {e}') + + if errors: + print(f'\n{len(errors)} JSON file(s) failed validation') + sys.exit(1) + else: + print(f'\nAll JSON files are valid!') + " + + - name: Upload JSON validation report + if: always() + uses: actions/upload-artifact@v4 + with: + name: json-validation-report + path: | + **/*.json + + # Job 3: Unit Tests + test: + runs-on: ubuntu-latest + needs: [code-quality, json-validation] + name: Run Tests + strategy: + matrix: + python-version: ['3.9', '3.10', '3.11', '3.12'] + + steps: + - name: Checkout repository + uses: actions/checkout@v4 + + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v5 + with: + python-version: ${{ matrix.python-version }} + cache: 'pip' + + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install pytest pytest-cov pytest-mock pytest-asyncio + if [ -f requirements.txt ]; then pip install -r requirements.txt; fi + + - name: Run tests with pytest + run: | + pytest --cov=. --cov-report=xml --cov-report=html --cov-report=term -v + + - name: Upload coverage reports + uses: codecov/codecov-action@v4 + with: + file: ./coverage.xml + flags: unittests + name: codecov-umbrella + fail_ci_if_error: false + + - name: Upload test results + if: always() + uses: actions/upload-artifact@v4 + with: + name: test-results-${{ matrix.python-version }} + path: | + htmlcov/ + coverage.xml + + # Job 4: JSON Data Processing + process-json-data: + runs-on: ubuntu-latest + needs: [test] + name: Process JSON Data + + steps: + - name: Checkout repository + uses: actions/checkout@v4 + + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: ${{ env.PYTHON_VERSION }} + + - name: Install data processing libraries + run: | + pip install pandas numpy jsonschema + + - name: Process JSON data files + run: | + python -c " + import json + import os + from datetime import datetime + + # Create processing report + report = { + 'timestamp': datetime.now().isoformat(), + 'files_processed': [], + 'total_files': 0, + 'status': 'success' + } + + for root, dirs, files in os.walk('${{ env.JSON_DATA_PATH }}'): + for file in files: + if file.endswith('.json'): + filepath = os.path.join(root, file) + try: + with open(filepath, 'r') as f: + data = json.load(f) + report['files_processed'].append({ + 'file': filepath, + 'size': os.path.getsize(filepath), + 'keys': list(data.keys()) if isinstance(data, dict) else 'array' + }) + report['total_files'] += 1 + except Exception as e: + report['status'] = 'failed' + print(f'Error processing {filepath}: {e}') + + # Save report + os.makedirs('reports', exist_ok=True) + with open('reports/json_processing_report.json', 'w') as f: + json.dump(report, f, indent=2) + + print(f'Processed {report[\"total_files\"]} JSON files') + " || echo "No JSON data files found in ${{ env.JSON_DATA_PATH }}" + + - name: Upload processing report + uses: actions/upload-artifact@v4 + with: + name: json-processing-report + path: reports/ + + # Job 5: Database Operations (if applicable) + database-operations: + runs-on: ubuntu-latest + needs: [process-json-data] + name: Database Sync + if: github.ref == 'refs/heads/main' + + services: + postgres: + image: postgres:15 + env: + POSTGRES_PASSWORD: postgres + POSTGRES_DB: testdb + options: >- + --health-cmd pg_isready + --health-interval 10s + --health-timeout 5s + --health-retries 5 + ports: + - 5432:5432 + + steps: + - name: Checkout repository + uses: actions/checkout@v4 + + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: ${{ env.PYTHON_VERSION }} + + - name: Install database libraries + run: | + pip install psycopg2-binary sqlalchemy pandas + + - name: JSON to Database migration + env: + DATABASE_URL: postgresql://postgres:postgres@localhost:5432/testdb + run: | + python -c " + import json + import os + from sqlalchemy import create_engine, text + + engine = create_engine(os.environ['DATABASE_URL']) + + with engine.connect() as conn: + # Create sample table + conn.execute(text(''' + CREATE TABLE IF NOT EXISTS json_data ( + id SERIAL PRIMARY KEY, + filename VARCHAR(255), + data JSONB, + created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP + ) + ''')) + conn.commit() + print('Database table created successfully') + + print('Database operations completed') + " + + # Job 6: Build and Deploy + build-deploy: + runs-on: ubuntu-latest + needs: [test, process-json-data] + name: Build and Deploy + if: github.ref == 'refs/heads/main' + + steps: + - name: Checkout repository + uses: actions/checkout@v4 + + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: ${{ env.PYTHON_VERSION }} + + - name: Build package + run: | + pip install build setuptools wheel + python -m build + + - name: Create deployment artifact + run: | + mkdir -p deployment + cp -r dist deployment/ + cp -r ${{ env.JSON_DATA_PATH }} deployment/ || echo "No data directory" + + - name: Upload deployment artifact + uses: actions/upload-artifact@v4 + with: + name: deployment-package + path: deployment/ + retention-days: 30 + + # Job 7: Security Scan + security-scan: + runs-on: ubuntu-latest + name: Security Scanning + + steps: + - name: Checkout repository + uses: actions/checkout@v4 + + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: ${{ env.PYTHON_VERSION }} + + - name: Install security tools + run: | + pip install bandit safety + + - name: Run Bandit security scan + run: | + bandit -r . -f json -o bandit-report.json || true + + - name: Check dependencies for vulnerabilities + run: | + safety check --json || true + + - name: Upload security reports + if: always() + uses: actions/upload-artifact@v4 + with: + name: security-reports + path: | + bandit-report.json From 21efb248b71427bf354297379a3b11ff22325794 Mon Sep 17 00:00:00 2001 From: Marfin <248203147+arifinahmad99-cloud@users.noreply.github.com> Date: Wed, 11 Feb 2026 10:15:32 +0800 Subject: [PATCH 11/19] Add JSON data processor and validator script This script provides functionality for processing, validating, and managing JSON data, including reading, writing, merging, and generating statistics. --- .github/workflows/json_processor.py | 372 ++++++++++++++++++++++++++++ 1 file changed, 372 insertions(+) create mode 100644 .github/workflows/json_processor.py diff --git a/.github/workflows/json_processor.py b/.github/workflows/json_processor.py new file mode 100644 index 0000000..cbb5943 --- /dev/null +++ b/.github/workflows/json_processor.py @@ -0,0 +1,372 @@ +#!/usr/bin/env python3 +""" +JSON Data Processor and Validator +Handles JSON file operations, validation, and data transformation +""" + +import json +import os +import sys +from pathlib import Path +from typing import Any, Dict, List, Optional, Union +from datetime import datetime +import logging + +# Configure logging +logging.basicConfig( + level=logging.INFO, + format='%(asctime)s - %(name)s - %(levelname)s - %(message)s' +) +logger = logging.getLogger(__name__) + + +class JSONDataProcessor: + """Handles JSON data processing and validation""" + + def __init__(self, data_dir: str = "data"): + """ + Initialize JSON Data Processor + + Args: + data_dir: Directory containing JSON files + """ + self.data_dir = Path(data_dir) + self.data_dir.mkdir(exist_ok=True) + + def validate_json_file(self, filepath: Union[str, Path]) -> bool: + """ + Validate a JSON file + + Args: + filepath: Path to JSON file + + Returns: + True if valid, False otherwise + """ + try: + with open(filepath, 'r', encoding='utf-8') as f: + json.load(f) + logger.info(f"✓ Valid JSON: {filepath}") + return True + except json.JSONDecodeError as e: + logger.error(f"✗ Invalid JSON in {filepath}: {e}") + return False + except Exception as e: + logger.error(f"✗ Error reading {filepath}: {e}") + return False + + def validate_all_files(self) -> Dict[str, Any]: + """ + Validate all JSON files in the data directory + + Returns: + Dictionary with validation results + """ + results = { + 'total_files': 0, + 'valid_files': 0, + 'invalid_files': 0, + 'errors': [] + } + + for json_file in self.data_dir.rglob('*.json'): + results['total_files'] += 1 + if self.validate_json_file(json_file): + results['valid_files'] += 1 + else: + results['invalid_files'] += 1 + results['errors'].append(str(json_file)) + + logger.info(f"Validation complete: {results['valid_files']}/{results['total_files']} valid") + return results + + def read_json(self, filename: str) -> Optional[Union[Dict, List]]: + """ + Read and parse a JSON file + + Args: + filename: Name of the JSON file + + Returns: + Parsed JSON data or None if error + """ + filepath = self.data_dir / filename + try: + with open(filepath, 'r', encoding='utf-8') as f: + data = json.load(f) + logger.info(f"Successfully read {filename}") + return data + except Exception as e: + logger.error(f"Error reading {filename}: {e}") + return None + + def write_json(self, filename: str, data: Union[Dict, List], indent: int = 2) -> bool: + """ + Write data to a JSON file + + Args: + filename: Name of the JSON file + data: Data to write + indent: JSON indentation level + + Returns: + True if successful, False otherwise + """ + filepath = self.data_dir / filename + try: + with open(filepath, 'w', encoding='utf-8') as f: + json.dump(data, f, indent=indent, ensure_ascii=False) + logger.info(f"Successfully wrote {filename}") + return True + except Exception as e: + logger.error(f"Error writing {filename}: {e}") + return False + + def merge_json_files(self, output_filename: str, pattern: str = "*.json") -> bool: + """ + Merge multiple JSON files into one + + Args: + output_filename: Name of output file + pattern: File pattern to match + + Returns: + True if successful, False otherwise + """ + merged_data = [] + + for json_file in self.data_dir.glob(pattern): + if json_file.name == output_filename: + continue + + data = self.read_json(json_file.name) + if data is not None: + if isinstance(data, list): + merged_data.extend(data) + else: + merged_data.append(data) + + return self.write_json(output_filename, merged_data) + + def transform_data(self, input_file: str, output_file: str, + transformer: callable) -> bool: + """ + Transform JSON data using a custom function + + Args: + input_file: Input JSON file + output_file: Output JSON file + transformer: Function to transform the data + + Returns: + True if successful, False otherwise + """ + data = self.read_json(input_file) + if data is None: + return False + + try: + transformed_data = transformer(data) + return self.write_json(output_file, transformed_data) + except Exception as e: + logger.error(f"Error transforming data: {e}") + return False + + def generate_schema(self, data: Union[Dict, List]) -> Dict: + """ + Generate a basic JSON schema from data + + Args: + data: JSON data + + Returns: + JSON schema + """ + def get_type(value): + if isinstance(value, bool): + return "boolean" + elif isinstance(value, int): + return "integer" + elif isinstance(value, float): + return "number" + elif isinstance(value, str): + return "string" + elif isinstance(value, list): + return "array" + elif isinstance(value, dict): + return "object" + elif value is None: + return "null" + return "string" + + if isinstance(data, dict): + schema = { + "type": "object", + "properties": {} + } + for key, value in data.items(): + schema["properties"][key] = { + "type": get_type(value) + } + if isinstance(value, dict): + schema["properties"][key] = self.generate_schema(value) + elif isinstance(value, list) and value: + schema["properties"][key]["items"] = { + "type": get_type(value[0]) + } + return schema + elif isinstance(data, list): + return { + "type": "array", + "items": self.generate_schema(data[0]) if data else {} + } + else: + return {"type": get_type(data)} + + def create_backup(self, filename: str) -> bool: + """ + Create a backup of a JSON file + + Args: + filename: Name of file to backup + + Returns: + True if successful, False otherwise + """ + filepath = self.data_dir / filename + if not filepath.exists(): + logger.error(f"File {filename} does not exist") + return False + + timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") + backup_name = f"{filepath.stem}_backup_{timestamp}.json" + backup_path = self.data_dir / "backups" / backup_name + backup_path.parent.mkdir(exist_ok=True) + + try: + data = self.read_json(filename) + backup_filepath = backup_path + with open(backup_filepath, 'w', encoding='utf-8') as f: + json.dump(data, f, indent=2, ensure_ascii=False) + logger.info(f"Backup created: {backup_name}") + return True + except Exception as e: + logger.error(f"Error creating backup: {e}") + return False + + def get_statistics(self) -> Dict[str, Any]: + """ + Get statistics about JSON files in data directory + + Returns: + Dictionary with statistics + """ + stats = { + 'total_files': 0, + 'total_size_bytes': 0, + 'files': [] + } + + for json_file in self.data_dir.rglob('*.json'): + file_size = json_file.stat().st_size + stats['total_files'] += 1 + stats['total_size_bytes'] += file_size + + stats['files'].append({ + 'name': json_file.name, + 'path': str(json_file.relative_to(self.data_dir)), + 'size_bytes': file_size, + 'size_kb': round(file_size / 1024, 2), + 'modified': datetime.fromtimestamp( + json_file.stat().st_mtime + ).isoformat() + }) + + stats['total_size_kb'] = round(stats['total_size_bytes'] / 1024, 2) + stats['total_size_mb'] = round(stats['total_size_bytes'] / (1024 * 1024), 2) + + return stats + + +class JSONDatabaseSync: + """Synchronize JSON data with database""" + + def __init__(self, database_url: Optional[str] = None): + """ + Initialize database sync + + Args: + database_url: Database connection URL + """ + self.database_url = database_url or os.getenv('DATABASE_URL') + + def sync_to_database(self, json_data: Dict, table_name: str) -> bool: + """ + Sync JSON data to database + + Args: + json_data: JSON data to sync + table_name: Target table name + + Returns: + True if successful, False otherwise + """ + try: + # This is a placeholder - implement actual database logic + logger.info(f"Syncing data to table: {table_name}") + logger.info(f"Data keys: {list(json_data.keys())}") + return True + except Exception as e: + logger.error(f"Error syncing to database: {e}") + return False + + def export_from_database(self, table_name: str, output_file: str) -> bool: + """ + Export database table to JSON + + Args: + table_name: Source table name + output_file: Output JSON file + + Returns: + True if successful, False otherwise + """ + try: + # This is a placeholder - implement actual database logic + logger.info(f"Exporting from table: {table_name}") + logger.info(f"Output file: {output_file}") + return True + except Exception as e: + logger.error(f"Error exporting from database: {e}") + return False + + +def main(): + """Main function for CLI usage""" + processor = JSONDataProcessor() + + # Example usage + print("JSON Data Processor") + print("=" * 50) + + # Validate all JSON files + results = processor.validate_all_files() + print(f"\nValidation Results:") + print(f" Total files: {results['total_files']}") + print(f" Valid files: {results['valid_files']}") + print(f" Invalid files: {results['invalid_files']}") + + # Get statistics + stats = processor.get_statistics() + print(f"\nStatistics:") + print(f" Total files: {stats['total_files']}") + print(f" Total size: {stats['total_size_kb']} KB") + + if stats['files']: + print(f"\nFiles:") + for file_info in stats['files']: + print(f" - {file_info['name']} ({file_info['size_kb']} KB)") + + +if __name__ == "__main__": + main() From 59500120f67ba0aa30bb67305d3546b57ee6af9a Mon Sep 17 00:00:00 2001 From: Marfin <248203147+arifinahmad99-cloud@users.noreply.github.com> Date: Wed, 11 Feb 2026 10:18:09 +0800 Subject: [PATCH 12/19] Add requirements.txt for project dependencies Added core dependencies, data processing, JSON processing, database, testing, code quality, security, API, and utility libraries. --- .github/workflows/requirements.txt | 43 ++++++++++++++++++++++++++++++ 1 file changed, 43 insertions(+) create mode 100644 .github/workflows/requirements.txt diff --git a/.github/workflows/requirements.txt b/.github/workflows/requirements.txt new file mode 100644 index 0000000..f84f20b --- /dev/null +++ b/.github/workflows/requirements.txt @@ -0,0 +1,43 @@ +# Core dependencies +python-dotenv==1.0.0 +requests==2.31.0 + +# Data processing +pandas==2.1.4 +numpy==1.26.3 + +# JSON processing and validation +jsonschema==4.20.0 +pyjson5==1.6.4 +pyyaml==6.0.1 + +# Database +psycopg2-binary==2.9.9 +sqlalchemy==2.0.25 + +# Testing +pytest==7.4.4 +pytest-cov==4.1.0 +pytest-mock==3.12.0 +pytest-asyncio==0.23.3 + +# Code quality +flake8==7.0.0 +pylint==3.0.3 +black==24.1.1 +isort==5.13.2 +mypy==1.8.0 + +# Security +bandit==1.7.6 +safety==3.0.1 + +# API and web +fastapi==0.109.0 +uvicorn==0.27.0 +httpx==0.26.0 + +# Utilities +click==8.1.7 +rich==13.7.0 +tqdm==4.66.1 From 9be26c569255dcaba16f0efde48ce97c3b8c3263 Mon Sep 17 00:00:00 2001 From: Marfin <248203147+arifinahmad99-cloud@users.noreply.github.com> Date: Wed, 11 Feb 2026 10:21:27 +0800 Subject: [PATCH 13/19] Revise README for ExplorePi project details Updated README to reflect project name change and added detailed features, installation instructions, and usage examples. --- .github/workflows/README.md | 421 +++++++++++++++++++++++++++++++----- 1 file changed, 372 insertions(+), 49 deletions(-) diff --git a/.github/workflows/README.md b/.github/workflows/README.md index cc583f4..6cb4cb9 100644 --- a/.github/workflows/README.md +++ b/.github/workflows/README.md @@ -1,83 +1,406 @@ -# Crawler +# ExplorePi - JSON Data Management System -A web crawler for collecting and processing data from specified sources. +## 📋 Overview -## Table of Contents +A comprehensive Python-based system for managing, validating, processing, and synchronizing JSON data with integrated CI/CD workflows, API services, and database operations. -- [Installation](#installation) -- [Configuration](#configuration) -- [Usage](#usage) -- [Database Setup](#database-setup) -- [Contributing](#contributing) +## 🚀 Features -## Installation +- **JSON Validation & Schema Management** - Validate JSON files against custom schemas +- **Data Processing Pipeline** - Automated data transformation and aggregation +- **REST API Service** - FastAPI-based API for JSON operations +- **Database Synchronization** - Sync JSON data with PostgreSQL +- **GitHub Actions Workflows** - Automated CI/CD pipelines +- **Backup & Recovery** - Automated backup system with versioning +- **Data Analytics** - Statistics and reporting capabilities -Install the required dependencies: +## 📁 Project Structure +``` +ExplorePi/ +├── .github/ +│ └── workflows/ +│ ├── python-ci-cd.yml # Main CI/CD workflow +│ └── json-sync-workflow.yml # Data synchronization workflow +├── data/ # JSON data files +│ ├── users.json +│ ├── products.json +│ └── config.json +├── schemas/ # JSON schema definitions +│ ├── user_schema.json +│ ├── product_schema.json +│ └── config_schema.json +├── backups/ # Backup archives +├── processed/ # Processed data outputs +├── reports/ # Generated reports +├── json_processor.py # Core JSON processing library +├── schema_validator.py # Schema validation utilities +├── json_api.py # REST API service +└── requirements.txt # Python dependencies +``` + +## 🔧 Installation + +### Prerequisites + +- Python 3.9 or higher +- PostgreSQL 15+ (for database features) +- Git +- pip + +### Setup + +1. **Clone the repository** +```bash +git clone https://github.com/arifinahmad99-cloud/ExplorePi.git +cd ExplorePi +``` + +2. **Create virtual environment** +```bash +python -m venv venv +source venv/bin/activate # On Windows: venv\Scripts\activate +``` + +3. **Install dependencies** ```bash pip install -r requirements.txt ``` -Ensure you have Python 3.8+ installed on your system. +4. **Set up environment variables** +```bash +cp .env.example .env +# Edit .env with your configuration +``` + +## 📚 Usage + +### JSON Processor + +Process and validate JSON files: + +```python +from json_processor import JSONDataProcessor + +# Initialize processor +processor = JSONDataProcessor(data_dir="data") + +# Validate all JSON files +results = processor.validate_all_files() +print(f"Valid files: {results['valid_files']}/{results['total_files']}") + +# Read JSON file +data = processor.read_json("users.json") + +# Write JSON file +processor.write_json("output.json", {"key": "value"}) + +# Merge multiple files +processor.merge_json_files("merged.json", "*.json") + +# Get statistics +stats = processor.get_statistics() +print(f"Total files: {stats['total_files']}") +print(f"Total size: {stats['total_size_mb']} MB") +``` + +### Schema Validator + +Validate JSON data against schemas: + +```python +from schema_validator import JSONSchemaValidator + +# Initialize validator +validator = JSONSchemaValidator(schema_dir="schemas") + +# Validate data +data = {"id": 1, "username": "john", "email": "john@example.com"} +is_valid, error = validator.validate_data(data, "user_schema") + +if is_valid: + print("✓ Data is valid") +else: + print(f"✗ Validation error: {error}") + +# Create new schema +user_schema = { + "type": "object", + "required": ["id", "username", "email"], + "properties": { + "id": {"type": "integer"}, + "username": {"type": "string"}, + "email": {"type": "string", "format": "email"} + } +} +validator.create_schema("user_schema", user_schema) +``` + +### REST API Service + +Start the API server: + +```bash +python json_api.py +``` + +Or with uvicorn: + +```bash +uvicorn json_api:app --reload --host 0.0.0.0 --port 8000 +``` + +**API Endpoints:** + +```bash +# List all files +GET http://localhost:8000/files + +# Get specific file +GET http://localhost:8000/files/users.json + +# Create new file +POST http://localhost:8000/files +{ + "filename": "new_data.json", + "data": {"key": "value"} +} + +# Update file +PUT http://localhost:8000/files/users.json +{ + "id": 1, + "username": "updated" +} + +# Delete file +DELETE http://localhost:8000/files/old_data.json + +# Upload file +POST http://localhost:8000/upload +# (multipart/form-data with file) + +# Validate data +POST http://localhost:8000/validate +{ + "data": {"test": "data"}, + "schema_name": "user_schema" +} + +# Transform data +POST http://localhost:8000/transform +{ + "input_filename": "input.json", + "output_filename": "output.json", + "operation": "filter", + "parameters": { + "key": "status", + "value": "active" + } +} + +# Get statistics +GET http://localhost:8000/stats + +# Search data +GET http://localhost:8000/search?query=john&field=username + +# Health check +GET http://localhost:8000/health +``` + +### Command Line Interface -## Configuration +Quick validation and processing: -### Environment Variables +```bash +# Validate all JSON files +python json_processor.py -Create a `.env` file in the project root with the following variables: +# Validate with schemas +python schema_validator.py +# Create example data and schemas +python schema_validator.py ``` -DATABASE_HOST=localhost -DATABASE_USER=crawler_user -DATABASE_PASSWORD=your_password -DATABASE_NAME=crawler_db + +## 🔄 GitHub Actions Workflows + +### Python CI/CD Workflow + +Automatically runs on push and pull requests: + +**Jobs:** +1. **Code Quality** - Linting with flake8, black, isort, mypy +2. **JSON Validation** - Validates all JSON files +3. **Testing** - Runs pytest on Python 3.9-3.12 +4. **Data Processing** - Processes JSON files +5. **Database Operations** - Syncs with PostgreSQL +6. **Build & Deploy** - Creates deployment packages +7. **Security Scan** - Runs bandit and safety checks + +### JSON Synchronization Workflow + +Scheduled daily at midnight UTC: + +**Jobs:** +1. **Validate JSON** - Validates all data files +2. **Backup Data** - Creates timestamped backups +3. **Process Data** - Transforms and aggregates data +4. **Generate Reports** - Creates HTML/JSON reports +5. **Database Sync** - Syncs to PostgreSQL (optional) +6. **Notification** - Summary of results + +**Manual Trigger:** +```bash +# Go to Actions tab in GitHub +# Select "JSON Data Synchronization" +# Click "Run workflow" +# Choose sync type: full, incremental, or validate_only ``` -Update these values according to your local environment. +## 🗄️ Database Integration + +### PostgreSQL Setup + +```sql +-- Create database +CREATE DATABASE json_data; + +-- Tables are auto-created by workflows +-- json_records: Stores JSON data +-- sync_history: Tracks synchronization history +``` + +### Sync Data to Database + +```python +from json_processor import JSONDatabaseSync -## Usage +# Initialize sync +db_sync = JSONDatabaseSync(database_url="postgresql://user:pass@localhost/json_data") -Run the crawler with: +# Sync to database +data = {"id": 1, "name": "Example"} +db_sync.sync_to_database(data, table_name="json_records") + +# Export from database +db_sync.export_from_database("json_records", "exported.json") +``` + +## 📊 Data Transformations + +### Available Operations + +1. **Filter** - Filter data by field values +```python +request = TransformRequest( + input_filename="users.json", + output_filename="active_users.json", + operation="filter", + parameters={"key": "active", "value": True} +) +``` + +2. **Map** - Rename/transform fields +```python +request = TransformRequest( + input_filename="users.json", + output_filename="transformed.json", + operation="map", + parameters={"field_map": {"old_name": "new_name"}} +) +``` + +3. **Sort** - Sort list data +```python +request = TransformRequest( + input_filename="users.json", + output_filename="sorted.json", + operation="sort", + parameters={"key": "created_at", "reverse": True} +) +``` + +## 🔐 Security + +- **Dependency Scanning** - Automated with Safety +- **Code Analysis** - Bandit for security issues +- **Secret Management** - Use GitHub Secrets for credentials +- **Input Validation** - All API endpoints validate input +- **SQL Injection Protection** - Parameterized queries only + +## 📈 Monitoring & Logging + +### Logging Configuration + +```python +import logging + +logging.basicConfig( + level=logging.INFO, + format='%(asctime)s - %(name)s - %(levelname)s - %(message)s', + handlers=[ + logging.FileHandler('app.log'), + logging.StreamHandler() + ] +) +``` + +### Metrics & Reports + +- **Validation Reports** - JSON validation results +- **Processing Reports** - Data transformation statistics +- **Sync History** - Database synchronization logs +- **API Metrics** - Request/response logs + +## 🧪 Testing + +Run tests: ```bash -python crawler.py +# All tests +pytest + +# With coverage +pytest --cov=. --cov-report=html + +# Specific test file +pytest tests/test_processor.py + +# Verbose mode +pytest -v ``` -Optional flags: -- `--verbose`: Enable detailed logging output -- `--limit N`: Limit crawling to N pages -- `--timeout S`: Set request timeout to S seconds +## 🤝 Contributing -## Database Setup +1. Fork the repository +2. Create feature branch (`git checkout -b feature/AmazingFeature`) +3. Commit changes (`git commit -m 'Add AmazingFeature'`) +4. Push to branch (`git push origin feature/AmazingFeature`) +5. Open Pull Request -### MySQL Configuration +## 📝 License -The crawler uses MySQL to store collected data. Follow these steps to set up your database: +This project is licensed under the MIT License - see LICENSE file for details. -1. **Install MySQL**: Download and install from [MySQL Official Website](https://dev.mysql.com/downloads/mysql/) +## 👤 Author -2. **Create Database and User**: - ```sql - CREATE DATABASE crawler_db; - CREATE USER 'crawler_user'@'localhost' IDENTIFIED BY 'your_password'; - GRANT ALL PRIVILEGES ON crawler_db.* TO 'crawler_user'@'localhost'; - FLUSH PRIVILEGES; - ``` +**Ahmad Arifin** +- GitHub: [@arifinahmad99-cloud](https://github.com/arifinahmad99-cloud) -3. **Initialize Tables**: Run the database migration script: - ```bash - python scripts/init_db.py - ``` +## 🙏 Acknowledgments -### Connection Details +- FastAPI framework +- PostgreSQL database +- GitHub Actions +- Python community -- **Host**: localhost (default) -- **Port**: 3306 (default MySQL port) -- **User**: crawler_user -- **Database**: crawler_db +## 📞 Support -Update the connection parameters in your `.env` file if using different settings. +For support, please open an issue in the GitHub repository. -## Contributing +--- -Please read CONTRIBUTING.md for details on our code of conduct and the process for submitting pull requests. \ No newline at end of file +**Version:** 2.0.0 +**Last Updated:** February 2026 From d4a50ae04f653d51d02950c9a30208fbe082d702 Mon Sep 17 00:00:00 2001 From: Marfin <248203147+arifinahmad99-cloud@users.noreply.github.com> Date: Wed, 11 Feb 2026 10:27:48 +0800 Subject: [PATCH 14/19] Remove author section from README Removed author information from the README. --- .github/workflows/README.md | 5 ----- 1 file changed, 5 deletions(-) diff --git a/.github/workflows/README.md b/.github/workflows/README.md index 6cb4cb9..51a09b5 100644 --- a/.github/workflows/README.md +++ b/.github/workflows/README.md @@ -384,11 +384,6 @@ pytest -v This project is licensed under the MIT License - see LICENSE file for details. -## 👤 Author - -**Ahmad Arifin** -- GitHub: [@arifinahmad99-cloud](https://github.com/arifinahmad99-cloud) - ## 🙏 Acknowledgments - FastAPI framework From 69d51072d612f180274223b1bf8076ad37749de3 Mon Sep 17 00:00:00 2001 From: Marfin <248203147+arifinahmad99-cloud@users.noreply.github.com> Date: Wed, 11 Feb 2026 10:30:28 +0800 Subject: [PATCH 15/19] Add FastAPI JSON Data API service This file implements a FastAPI-based REST API for managing JSON data, including endpoints for file operations, validation, and transformation. --- .github/workflows/json_api.py | 401 ++++++++++++++++++++++++++++++++++ 1 file changed, 401 insertions(+) create mode 100644 .github/workflows/json_api.py diff --git a/.github/workflows/json_api.py b/.github/workflows/json_api.py new file mode 100644 index 0000000..ad0799d --- /dev/null +++ b/.github/workflows/json_api.py @@ -0,0 +1,401 @@ +#!/usr/bin/env python3 +""" +JSON Data API Service +FastAPI-based REST API for JSON data operations +""" + +from fastapi import FastAPI, HTTPException, UploadFile, File, Query +from fastapi.responses import JSONResponse, FileResponse +from fastapi.middleware.cors import CORSMiddleware +from pydantic import BaseModel, Field +from typing import List, Dict, Any, Optional +from datetime import datetime +from pathlib import Path +import json +import logging + +# Configure logging +logging.basicConfig(level=logging.INFO) +logger = logging.getLogger(__name__) + +# Initialize FastAPI app +app = FastAPI( + title="JSON Data API", + description="RESTful API for JSON data management and operations", + version="2.0.0" +) + +# Add CORS middleware +app.add_middleware( + CORSMiddleware, + allow_origins=["*"], + allow_credentials=True, + allow_methods=["*"], + allow_headers=["*"], +) + +# Data directory +DATA_DIR = Path("data") +DATA_DIR.mkdir(exist_ok=True) + +# Pydantic models +class JSONDataModel(BaseModel): + """Model for JSON data""" + filename: str = Field(..., description="Name of the JSON file") + data: Dict[str, Any] | List[Any] = Field(..., description="JSON data content") + +class ValidationRequest(BaseModel): + """Model for validation request""" + data: Dict[str, Any] | List[Any] = Field(..., description="Data to validate") + schema_name: Optional[str] = Field(None, description="Schema name for validation") + +class TransformRequest(BaseModel): + """Model for data transformation request""" + input_filename: str = Field(..., description="Input file name") + output_filename: str = Field(..., description="Output file name") + operation: str = Field(..., description="Transformation operation") + parameters: Optional[Dict[str, Any]] = Field(default={}, description="Operation parameters") + +class APIResponse(BaseModel): + """Standard API response model""" + status: str = Field(..., description="Response status") + message: str = Field(..., description="Response message") + data: Optional[Dict[str, Any]] = Field(None, description="Response data") + timestamp: datetime = Field(default_factory=datetime.now) + + +# Utility functions +def read_json_file(filename: str) -> Dict | List: + """Read JSON file from data directory""" + filepath = DATA_DIR / filename + if not filepath.exists(): + raise HTTPException(status_code=404, detail=f"File {filename} not found") + + try: + with open(filepath, 'r', encoding='utf-8') as f: + return json.load(f) + except json.JSONDecodeError as e: + raise HTTPException(status_code=400, detail=f"Invalid JSON in file: {str(e)}") + +def write_json_file(filename: str, data: Dict | List) -> None: + """Write JSON file to data directory""" + filepath = DATA_DIR / filename + try: + with open(filepath, 'w', encoding='utf-8') as f: + json.dump(data, f, indent=2, ensure_ascii=False) + except Exception as e: + raise HTTPException(status_code=500, detail=f"Error writing file: {str(e)}") + + +# API Routes +@app.get("/", response_model=APIResponse) +async def root(): + """Root endpoint - API information""" + return APIResponse( + status="success", + message="JSON Data API is running", + data={ + "version": "2.0.0", + "endpoints": { + "GET /files": "List all JSON files", + "GET /files/{filename}": "Get specific file content", + "POST /files": "Create new JSON file", + "PUT /files/{filename}": "Update existing file", + "DELETE /files/{filename}": "Delete file", + "POST /validate": "Validate JSON data", + "POST /transform": "Transform JSON data", + "GET /stats": "Get statistics" + } + } + ) + +@app.get("/health") +async def health_check(): + """Health check endpoint""" + return {"status": "healthy", "timestamp": datetime.now().isoformat()} + +@app.get("/files", response_model=APIResponse) +async def list_files(): + """List all JSON files in data directory""" + try: + files = [] + for filepath in DATA_DIR.glob("*.json"): + stat = filepath.stat() + files.append({ + "filename": filepath.name, + "size_bytes": stat.st_size, + "size_kb": round(stat.st_size / 1024, 2), + "modified": datetime.fromtimestamp(stat.st_mtime).isoformat() + }) + + return APIResponse( + status="success", + message=f"Found {len(files)} JSON files", + data={"files": files, "total": len(files)} + ) + except Exception as e: + raise HTTPException(status_code=500, detail=str(e)) + +@app.get("/files/{filename}") +async def get_file(filename: str): + """Get content of a specific JSON file""" + try: + data = read_json_file(filename) + return JSONResponse(content=data) + except HTTPException: + raise + except Exception as e: + raise HTTPException(status_code=500, detail=str(e)) + +@app.post("/files", response_model=APIResponse) +async def create_file(json_data: JSONDataModel): + """Create a new JSON file""" + try: + filepath = DATA_DIR / json_data.filename + if filepath.exists(): + raise HTTPException(status_code=409, detail="File already exists") + + write_json_file(json_data.filename, json_data.data) + + return APIResponse( + status="success", + message=f"File {json_data.filename} created successfully", + data={"filename": json_data.filename} + ) + except HTTPException: + raise + except Exception as e: + raise HTTPException(status_code=500, detail=str(e)) + +@app.put("/files/{filename}", response_model=APIResponse) +async def update_file(filename: str, json_data: Dict | List): + """Update an existing JSON file""" + try: + filepath = DATA_DIR / filename + if not filepath.exists(): + raise HTTPException(status_code=404, detail="File not found") + + write_json_file(filename, json_data) + + return APIResponse( + status="success", + message=f"File {filename} updated successfully", + data={"filename": filename} + ) + except HTTPException: + raise + except Exception as e: + raise HTTPException(status_code=500, detail=str(e)) + +@app.delete("/files/{filename}", response_model=APIResponse) +async def delete_file(filename: str): + """Delete a JSON file""" + try: + filepath = DATA_DIR / filename + if not filepath.exists(): + raise HTTPException(status_code=404, detail="File not found") + + filepath.unlink() + + return APIResponse( + status="success", + message=f"File {filename} deleted successfully", + data={"filename": filename} + ) + except HTTPException: + raise + except Exception as e: + raise HTTPException(status_code=500, detail=str(e)) + +@app.post("/upload") +async def upload_file(file: UploadFile = File(...)): + """Upload a JSON file""" + if not file.filename.endswith('.json'): + raise HTTPException(status_code=400, detail="Only JSON files are allowed") + + try: + content = await file.read() + data = json.loads(content.decode('utf-8')) + + write_json_file(file.filename, data) + + return APIResponse( + status="success", + message=f"File {file.filename} uploaded successfully", + data={"filename": file.filename, "size": len(content)} + ) + except json.JSONDecodeError: + raise HTTPException(status_code=400, detail="Invalid JSON file") + except Exception as e: + raise HTTPException(status_code=500, detail=str(e)) + +@app.post("/validate", response_model=APIResponse) +async def validate_data(request: ValidationRequest): + """Validate JSON data""" + try: + # Basic validation (data is already parsed if we get here) + is_valid = True + message = "Data is valid JSON" + + # Additional validation logic can be added here + # For example, schema validation if schema_name is provided + + return APIResponse( + status="success" if is_valid else "error", + message=message, + data={ + "valid": is_valid, + "data_type": type(request.data).__name__, + "schema": request.schema_name + } + ) + except Exception as e: + raise HTTPException(status_code=500, detail=str(e)) + +@app.post("/transform", response_model=APIResponse) +async def transform_data(request: TransformRequest): + """Transform JSON data""" + try: + input_data = read_json_file(request.input_filename) + + # Perform transformation based on operation + transformed_data = None + + if request.operation == "filter": + # Filter data based on parameters + key = request.parameters.get("key") + value = request.parameters.get("value") + if isinstance(input_data, list): + transformed_data = [item for item in input_data if item.get(key) == value] + else: + transformed_data = input_data + + elif request.operation == "map": + # Map/transform fields + field_map = request.parameters.get("field_map", {}) + if isinstance(input_data, list): + transformed_data = [ + {field_map.get(k, k): v for k, v in item.items()} + for item in input_data + ] + else: + transformed_data = {field_map.get(k, k): v for k, v in input_data.items()} + + elif request.operation == "sort": + # Sort list data + if isinstance(input_data, list): + sort_key = request.parameters.get("key") + reverse = request.parameters.get("reverse", False) + transformed_data = sorted(input_data, key=lambda x: x.get(sort_key, ""), reverse=reverse) + else: + transformed_data = input_data + + else: + raise HTTPException(status_code=400, detail=f"Unknown operation: {request.operation}") + + # Write transformed data + write_json_file(request.output_filename, transformed_data) + + return APIResponse( + status="success", + message=f"Data transformed successfully", + data={ + "input": request.input_filename, + "output": request.output_filename, + "operation": request.operation + } + ) + except HTTPException: + raise + except Exception as e: + raise HTTPException(status_code=500, detail=str(e)) + +@app.get("/stats", response_model=APIResponse) +async def get_statistics(): + """Get statistics about JSON files""" + try: + total_files = 0 + total_size = 0 + file_types = {} + + for filepath in DATA_DIR.glob("*.json"): + total_files += 1 + total_size += filepath.stat().st_size + + # Categorize files + category = filepath.stem.split('_')[0] if '_' in filepath.stem else 'other' + file_types[category] = file_types.get(category, 0) + 1 + + return APIResponse( + status="success", + message="Statistics retrieved successfully", + data={ + "total_files": total_files, + "total_size_bytes": total_size, + "total_size_kb": round(total_size / 1024, 2), + "total_size_mb": round(total_size / (1024 * 1024), 2), + "file_categories": file_types + } + ) + except Exception as e: + raise HTTPException(status_code=500, detail=str(e)) + +@app.get("/search") +async def search_data( + query: str = Query(..., description="Search query"), + field: Optional[str] = Query(None, description="Field to search in") +): + """Search across all JSON files""" + try: + results = [] + + for filepath in DATA_DIR.glob("*.json"): + try: + data = read_json_file(filepath.name) + + # Simple search implementation + if isinstance(data, list): + for item in data: + if isinstance(item, dict): + if field: + if field in item and query.lower() in str(item[field]).lower(): + results.append({ + "file": filepath.name, + "data": item + }) + else: + if any(query.lower() in str(v).lower() for v in item.values()): + results.append({ + "file": filepath.name, + "data": item + }) + elif isinstance(data, dict): + if field: + if field in data and query.lower() in str(data[field]).lower(): + results.append({ + "file": filepath.name, + "data": data + }) + else: + if any(query.lower() in str(v).lower() for v in data.values()): + results.append({ + "file": filepath.name, + "data": data + }) + except: + continue + + return JSONResponse(content={ + "query": query, + "field": field, + "results": results, + "total": len(results) + }) + except Exception as e: + raise HTTPException(status_code=500, detail=str(e)) + + +# Run the application +if __name__ == "__main__": + import uvicorn + uvicorn.run(app, host="0.0.0.0", port=8000, log_level="info") From fa5a1cf3fe2324dcbc3b8107aa6fda0f6384473e Mon Sep 17 00:00:00 2001 From: Marfin <248203147+arifinahmad99-cloud@users.noreply.github.com> Date: Wed, 11 Feb 2026 10:59:30 +0800 Subject: [PATCH 16/19] Add JSON data synchronization workflow --- .github/workflows/json-sync-workflow.yml | 492 +++++++++++++++++++++++ 1 file changed, 492 insertions(+) create mode 100644 .github/workflows/json-sync-workflow.yml diff --git a/.github/workflows/json-sync-workflow.yml b/.github/workflows/json-sync-workflow.yml new file mode 100644 index 0000000..f92270f --- /dev/null +++ b/.github/workflows/json-sync-workflow.yml @@ -0,0 +1,492 @@ +name: JSON Data Synchronization + +on: + schedule: + # Run every day at midnight UTC + - cron: '0 0 * * *' + push: + paths: + - 'data/**/*.json' + - 'schemas/**/*.json' + workflow_dispatch: + inputs: + sync_type: + description: 'Type of synchronization' + required: true + type: choice + options: + - full + - incremental + - validate_only + +env: + DATA_DIR: 'data' + SCHEMA_DIR: 'schemas' + BACKUP_DIR: 'backups' + +jobs: + # Job 1: Validate JSON Files + validate-json: + runs-on: ubuntu-latest + name: Validate JSON Files + + outputs: + validation_status: ${{ steps.validate.outputs.status }} + + steps: + - name: Checkout repository + uses: actions/checkout@v4 + + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: '3.11' + + - name: Install dependencies + run: | + pip install jsonschema pyyaml + + - name: Validate JSON syntax and schemas + id: validate + run: | + python3 << 'EOF' + import json + import os + import sys + from pathlib import Path + + errors = [] + warnings = [] + validated = [] + + # Validate all JSON files + for root, dirs, files in os.walk('${{ env.DATA_DIR }}'): + for file in files: + if file.endswith('.json'): + filepath = Path(root) / file + try: + with open(filepath, 'r', encoding='utf-8') as f: + data = json.load(f) + validated.append(str(filepath)) + print(f'✓ {filepath} - Valid JSON') + except json.JSONDecodeError as e: + errors.append(f'{filepath}: {str(e)}') + print(f'✗ {filepath} - Invalid JSON: {e}') + except Exception as e: + warnings.append(f'{filepath}: {str(e)}') + print(f'⚠ {filepath} - Warning: {e}') + + # Create validation report + report = { + 'timestamp': '2026-02-11T00:00:00Z', + 'total_files': len(validated) + len(errors), + 'valid_files': len(validated), + 'invalid_files': len(errors), + 'warnings': len(warnings), + 'files': validated, + 'errors': errors, + 'warnings_list': warnings + } + + os.makedirs('reports', exist_ok=True) + with open('reports/validation_report.json', 'w') as f: + json.dump(report, f, indent=2) + + print(f'\n📊 Summary:') + print(f' Total files: {report["total_files"]}') + print(f' Valid: {report["valid_files"]}') + print(f' Invalid: {report["invalid_files"]}') + print(f' Warnings: {report["warnings"]}') + + if errors: + print(f'\n❌ Validation failed with {len(errors)} error(s)') + sys.exit(1) + else: + print(f'\n✅ All JSON files are valid') + print(f'::set-output name=status::success') + EOF + + - name: Upload validation report + if: always() + uses: actions/upload-artifact@v4 + with: + name: validation-report + path: reports/validation_report.json + + # Job 2: Backup JSON Data + backup-data: + runs-on: ubuntu-latest + needs: validate-json + if: needs.validate-json.outputs.validation_status == 'success' + name: Backup JSON Data + + steps: + - name: Checkout repository + uses: actions/checkout@v4 + + - name: Create timestamped backup + run: | + TIMESTAMP=$(date +%Y%m%d_%H%M%S) + BACKUP_NAME="json_backup_${TIMESTAMP}" + + mkdir -p ${{ env.BACKUP_DIR }}/$BACKUP_NAME + + # Copy all JSON files + if [ -d "${{ env.DATA_DIR }}" ]; then + cp -r ${{ env.DATA_DIR }}/* ${{ env.BACKUP_DIR }}/$BACKUP_NAME/ || echo "No data to backup" + fi + + # Create archive + cd ${{ env.BACKUP_DIR }} + tar -czf ${BACKUP_NAME}.tar.gz $BACKUP_NAME + rm -rf $BACKUP_NAME + + echo "✓ Backup created: ${BACKUP_NAME}.tar.gz" + ls -lh ${BACKUP_NAME}.tar.gz + + - name: Upload backup artifact + uses: actions/upload-artifact@v4 + with: + name: json-backup + path: ${{ env.BACKUP_DIR }}/*.tar.gz + retention-days: 30 + + # Job 3: Transform and Process Data + process-data: + runs-on: ubuntu-latest + needs: validate-json + if: needs.validate-json.outputs.validation_status == 'success' + name: Process and Transform Data + + steps: + - name: Checkout repository + uses: actions/checkout@v4 + + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: '3.11' + + - name: Install processing libraries + run: | + pip install pandas numpy jsonschema + + - name: Process JSON data + run: | + python3 << 'EOF' + import json + import os + from pathlib import Path + from datetime import datetime + + def process_json_files(data_dir): + """Process all JSON files and create aggregated reports""" + + all_data = [] + stats = { + 'total_records': 0, + 'files_processed': 0, + 'data_types': {}, + 'timestamp': datetime.now().isoformat() + } + + for json_file in Path(data_dir).rglob('*.json'): + try: + with open(json_file, 'r') as f: + data = json.load(f) + + # Collect statistics + if isinstance(data, list): + stats['total_records'] += len(data) + all_data.extend(data) + elif isinstance(data, dict): + stats['total_records'] += 1 + all_data.append(data) + + stats['files_processed'] += 1 + + # Track data types + file_type = json_file.stem + stats['data_types'][file_type] = stats['data_types'].get(file_type, 0) + 1 + + print(f'✓ Processed: {json_file.name}') + + except Exception as e: + print(f'✗ Error processing {json_file}: {e}') + + return all_data, stats + + # Process data + data, statistics = process_json_files('${{ env.DATA_DIR }}') + + # Save aggregated data + os.makedirs('processed', exist_ok=True) + + with open('processed/aggregated_data.json', 'w') as f: + json.dump(data, f, indent=2) + + with open('processed/statistics.json', 'w') as f: + json.dump(statistics, f, indent=2) + + print(f'\n📊 Processing Statistics:') + print(f' Files processed: {statistics["files_processed"]}') + print(f' Total records: {statistics["total_records"]}') + print(f' Data types: {statistics["data_types"]}') + EOF + + - name: Upload processed data + uses: actions/upload-artifact@v4 + with: + name: processed-data + path: processed/ + + # Job 4: Generate Reports + generate-reports: + runs-on: ubuntu-latest + needs: [validate-json, process-data] + name: Generate Data Reports + + steps: + - name: Checkout repository + uses: actions/checkout@v4 + + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: '3.11' + + - name: Download processed data + uses: actions/download-artifact@v4 + with: + name: processed-data + path: processed/ + + - name: Generate comprehensive report + run: | + python3 << 'EOF' + import json + import os + from datetime import datetime + from pathlib import Path + + # Load statistics + with open('processed/statistics.json', 'r') as f: + stats = json.load(f) + + # Create HTML report + html_report = f""" + + + + JSON Data Synchronization Report + + + +

JSON Data Synchronization Report

+

Generated: {datetime.now().strftime('%Y-%m-%d %H:%M:%S UTC')}

+ +
+

Statistics

+
Files Processed: {stats['files_processed']}
+
Total Records: {stats['total_records']}
+
Data Types: {', '.join(stats['data_types'].keys())}
+
+ +

Status

+

✓ Synchronization completed successfully

+ + + """ + + os.makedirs('reports', exist_ok=True) + with open('reports/sync_report.html', 'w') as f: + f.write(html_report) + + print('✓ HTML report generated') + EOF + + - name: Upload reports + uses: actions/upload-artifact@v4 + with: + name: sync-reports + path: reports/ + + # Job 5: Database Synchronization (if enabled) + sync-to-database: + runs-on: ubuntu-latest + needs: [validate-json, process-data] + if: github.event.inputs.sync_type == 'full' || github.event.inputs.sync_type == 'incremental' + name: Sync to Database + + services: + postgres: + image: postgres:15 + env: + POSTGRES_PASSWORD: postgres + POSTGRES_DB: json_data + options: >- + --health-cmd pg_isready + --health-interval 10s + --health-timeout 5s + --health-retries 5 + ports: + - 5432:5432 + + steps: + - name: Checkout repository + uses: actions/checkout@v4 + + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: '3.11' + + - name: Install database libraries + run: | + pip install psycopg2-binary sqlalchemy pandas + + - name: Download processed data + uses: actions/download-artifact@v4 + with: + name: processed-data + path: processed/ + + - name: Sync data to database + env: + DATABASE_URL: postgresql://postgres:postgres@localhost:5432/json_data + SYNC_TYPE: ${{ github.event.inputs.sync_type || 'incremental' }} + run: | + python3 << 'EOF' + import json + import os + from sqlalchemy import create_engine, text, Table, Column, Integer, String, JSON, MetaData, DateTime + from datetime import datetime + + # Connect to database + engine = create_engine(os.environ['DATABASE_URL']) + metadata = MetaData() + + # Create tables + with engine.connect() as conn: + # Main data table + conn.execute(text(''' + CREATE TABLE IF NOT EXISTS json_records ( + id SERIAL PRIMARY KEY, + data_type VARCHAR(100), + record_data JSONB, + created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP, + updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP + ) + ''')) + + # Sync history table + conn.execute(text(''' + CREATE TABLE IF NOT EXISTS sync_history ( + id SERIAL PRIMARY KEY, + sync_type VARCHAR(50), + records_synced INTEGER, + status VARCHAR(50), + timestamp TIMESTAMP DEFAULT CURRENT_TIMESTAMP + ) + ''')) + + conn.commit() + print('✓ Database tables created/verified') + + # Load and sync data + with open('processed/aggregated_data.json', 'r') as f: + data = json.load(f) + + synced_count = 0 + sync_type = os.environ.get('SYNC_TYPE', 'incremental') + + with engine.connect() as conn: + if sync_type == 'full': + # Clear existing data for full sync + conn.execute(text('TRUNCATE TABLE json_records')) + conn.commit() + print('✓ Cleared existing data for full sync') + + # Insert data + for record in data: + try: + conn.execute(text(''' + INSERT INTO json_records (data_type, record_data) + VALUES (:data_type, :record_data) + '''), { + 'data_type': record.get('type', 'unknown'), + 'record_data': json.dumps(record) + }) + synced_count += 1 + except Exception as e: + print(f'Warning: Could not sync record: {e}') + + # Record sync history + conn.execute(text(''' + INSERT INTO sync_history (sync_type, records_synced, status) + VALUES (:sync_type, :records_synced, :status) + '''), { + 'sync_type': sync_type, + 'records_synced': synced_count, + 'status': 'success' + }) + + conn.commit() + print(f'✓ Synced {synced_count} records to database') + EOF + + - name: Verify database sync + env: + DATABASE_URL: postgresql://postgres:postgres@localhost:5432/json_data + run: | + python3 << 'EOF' + from sqlalchemy import create_engine, text + import os + + engine = create_engine(os.environ['DATABASE_URL']) + + with engine.connect() as conn: + result = conn.execute(text('SELECT COUNT(*) FROM json_records')) + count = result.scalar() + print(f'\n✓ Database contains {count} records') + + result = conn.execute(text(''' + SELECT sync_type, records_synced, status, timestamp + FROM sync_history + ORDER BY timestamp DESC + LIMIT 1 + ''')) + last_sync = result.fetchone() + if last_sync: + print(f'✓ Last sync: {last_sync[0]} - {last_sync[1]} records - {last_sync[2]}') + EOF + + # Job 6: Notification + notify-completion: + runs-on: ubuntu-latest + needs: [validate-json, backup-data, process-data, generate-reports] + if: always() + name: Send Notification + + steps: + - name: Checkout repository + uses: actions/checkout@v4 + + - name: Create notification summary + run: | + echo "# JSON Data Synchronization Complete" >> $GITHUB_STEP_SUMMARY + echo "" >> $GITHUB_STEP_SUMMARY + echo "## Status: ✅ Success" >> $GITHUB_STEP_SUMMARY + echo "" >> $GITHUB_STEP_SUMMARY + echo "- Validation: ${{ needs.validate-json.result }}" >> $GITHUB_STEP_SUMMARY + echo "- Backup: ${{ needs.backup-data.result }}" >> $GITHUB_STEP_SUMMARY + echo "- Processing: ${{ needs.process-data.result }}" >> $GITHUB_STEP_SUMMARY + echo "- Reports: ${{ needs.generate-reports.result }}" >> $GITHUB_STEP_SUMMARY + echo "" >> $GITHUB_STEP_SUMMARY + echo "**Workflow completed at:** $(date -u)" >> $GITHUB_STEP_SUMMARY From 2abeb6519130a40b77b058e4734ab8dd760d81b2 Mon Sep 17 00:00:00 2001 From: Marfin <248203147+arifinahmad99-cloud@users.noreply.github.com> Date: Wed, 11 Feb 2026 11:13:05 +0800 Subject: [PATCH 17/19] Add .gitignore for Python and general project files --- .github/workflows/.gitignore | 127 +++++++++++++++++++++++++++++++++++ 1 file changed, 127 insertions(+) create mode 100644 .github/workflows/.gitignore diff --git a/.github/workflows/.gitignore b/.github/workflows/.gitignore new file mode 100644 index 0000000..dd3b8ce --- /dev/null +++ b/.github/workflows/.gitignore @@ -0,0 +1,127 @@ +# Python +__pycache__/ +*.py[cod] +*$py.class +*.so +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +share/python-wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST +*.manifest +*.spec +pip-log.txt +pip-delete-this-directory.txt + +# Virtual Environment +venv/ +ENV/ +env/ +.venv + +# IDE +.vscode/ +.idea/ +*.swp +*.swo +*~ +.DS_Store + +# Testing +.tox/ +.nox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +*.log +.hypothesis/ +.pytest_cache/ +htmlcov/ +.mypy_cache/ +.dmypy.json +dmypy.json + +# Documentation +docs/_build/ +.sphinx/ + +# Environment +.env +.env.local +.env.*.local +*.env + +# Database +*.db +*.sqlite +*.sqlite3 + +# Backups +backups/ +*.tar.gz +*.zip +*.bak + +# Logs +logs/ +*.log +npm-debug.log* +yarn-debug.log* +yarn-error.log* + +# OS +.DS_Store +Thumbs.db +desktop.ini + +# Temporary +tmp/ +temp/ +*.tmp + +# Reports +reports/*.html +reports/*.json +!reports/.gitkeep + +# Processed data (keep structure, ignore content) +processed/* +!processed/.gitkeep + +# Security +*.pem +*.key +*.cert +secrets/ + +# Node (if using any JS tools) +node_modules/ +package-lock.json + +# Jupyter Notebook +.ipynb_checkpoints +*.ipynb + +# Profiling +*.prof +*.lprof +.profiling/ + +# Docker +docker-compose.override.yml From 35b88ffcd93868ef2da75f941135bf22c06987c4 Mon Sep 17 00:00:00 2001 From: Marfin <248203147+arifinahmad99-cloud@users.noreply.github.com> Date: Wed, 11 Feb 2026 11:55:01 +0800 Subject: [PATCH 18/19] Update and rename azure-webapps-node.yml to docker-compose.yml --- .github/workflows/azure-webapps-node.yml | 78 --------------- .github/workflows/docker-compose.yml | 122 +++++++++++++++++++++++ 2 files changed, 122 insertions(+), 78 deletions(-) delete mode 100644 .github/workflows/azure-webapps-node.yml create mode 100644 .github/workflows/docker-compose.yml diff --git a/.github/workflows/azure-webapps-node.yml b/.github/workflows/azure-webapps-node.yml deleted file mode 100644 index 2ebbac2..0000000 --- a/.github/workflows/azure-webapps-node.yml +++ /dev/null @@ -1,78 +0,0 @@ -# This workflow will build and push a node.js application to an Azure Web App when a commit is pushed to your default branch. -# -# This workflow assumes you have already created the target Azure App Service web app. -# For instructions see https://docs.microsoft.com/en-us/azure/app-service/quickstart-nodejs?tabs=linux&pivots=development-environment-cli -# -# To configure this workflow: -# -# 1. Download the Publish Profile for your Azure Web App. You can download this file from the Overview page of your Web App in the Azure Portal. -# For more information: https://docs.microsoft.com/en-us/azure/app-service/deploy-github-actions?tabs=applevel#generate-deployment-credentials -# -# 2. Create a secret in your repository named AZURE_WEBAPP_PUBLISH_PROFILE, paste the publish profile contents as the value of the secret. -# For instructions on obtaining the publish profile see: https://docs.microsoft.com/azure/app-service/deploy-github-actions#configure-the-github-secret -# -# 3. Change the value for the AZURE_WEBAPP_NAME. Optionally, change the AZURE_WEBAPP_PACKAGE_PATH and NODE_VERSION environment variables below. -# -# For more information on GitHub Actions for Azure: https://github.com/Azure/Actions -# For more information on the Azure Web Apps Deploy action: https://github.com/Azure/webapps-deploy -# For more samples to get started with GitHub Action workflows to deploy to Azure: https://github.com/Azure/actions-workflow-samples - -on: - push: - branches: [ "main" ] - workflow_dispatch: - -env: - AZURE_WEBAPP_NAME: your-app-name # set this to your application's name - AZURE_WEBAPP_PACKAGE_PATH: '.' # set this to the path to your web app project, defaults to the repository root - NODE_VERSION: '20.x' # set this to the node version to use - -permissions: - contents: read - -jobs: - build: - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v4 - - - name: Set up Node.js - uses: actions/setup-node@v4 - with: - node-version: ${{ env.NODE_VERSION }} - cache: 'npm' - - - name: npm install, build, and test - run: | - npm install - npm run build --if-present - npm run test --if-present - - - name: Upload artifact for deployment job - uses: actions/upload-artifact@v4 - with: - name: node-app - path: . - - deploy: - permissions: - contents: none - runs-on: ubuntu-latest - needs: build - environment: - name: 'Development' - url: ${{ steps.deploy-to-webapp.outputs.webapp-url }} - - steps: - - name: Download artifact from build job - uses: actions/download-artifact@v4 - with: - name: node-app - - - name: 'Deploy to Azure WebApp' - id: deploy-to-webapp - uses: azure/webapps-deploy@v2 - with: - app-name: ${{ env.AZURE_WEBAPP_NAME }} - publish-profile: ${{ secrets.AZURE_WEBAPP_PUBLISH_PROFILE }} - package: ${{ env.AZURE_WEBAPP_PACKAGE_PATH }} diff --git a/.github/workflows/docker-compose.yml b/.github/workflows/docker-compose.yml new file mode 100644 index 0000000..95f498b --- /dev/null +++ b/.github/workflows/docker-compose.yml @@ -0,0 +1,122 @@ +version: '3.8' + +services: + # PostgreSQL Database + postgres: + image: postgres:15-alpine + container_name: explorepi-postgres + environment: + POSTGRES_USER: ${POSTGRES_USER:-postgres} + POSTGRES_PASSWORD: ${POSTGRES_PASSWORD:-postgres} + POSTGRES_DB: ${POSTGRES_DB:-json_data} + PGDATA: /var/lib/postgresql/data/pgdata + volumes: + - postgres_data:/var/lib/postgresql/data + ports: + - "5432:5432" + healthcheck: + test: ["CMD-SHELL", "pg_isready -U postgres"] + interval: 10s + timeout: 5s + retries: 5 + networks: + - explorepi-network + restart: unless-stopped + + # Redis Cache (optional) + redis: + image: redis:7-alpine + container_name: explorepi-redis + ports: + - "6379:6379" + volumes: + - redis_data:/data + healthcheck: + test: ["CMD", "redis-cli", "ping"] + interval: 10s + timeout: 3s + retries: 5 + networks: + - explorepi-network + restart: unless-stopped + + # ExplorePi API Application + api: + build: + context: . + dockerfile: Dockerfile + container_name: explorepi-api + environment: + - DATABASE_URL=postgresql://postgres:postgres@postgres:5432/json_data + - REDIS_URL=redis://redis:6379/0 + - ENVIRONMENT=production + - API_HOST=0.0.0.0 + - API_PORT=8000 + volumes: + - ./data:/app/data + - ./schemas:/app/schemas + - ./backups:/app/backups + - ./processed:/app/processed + - ./reports:/app/reports + - ./logs:/app/logs + ports: + - "8000:8000" + depends_on: + postgres: + condition: service_healthy + redis: + condition: service_healthy + networks: + - explorepi-network + restart: unless-stopped + healthcheck: + test: ["CMD", "curl", "-f", "http://localhost:8000/health"] + interval: 30s + timeout: 10s + retries: 3 + start_period: 40s + + # pgAdmin (Database Management UI) + pgadmin: + image: dpage/pgadmin4:latest + container_name: explorepi-pgadmin + environment: + PGADMIN_DEFAULT_EMAIL: ${PGADMIN_EMAIL:-admin@explorepi.com} + PGADMIN_DEFAULT_PASSWORD: ${PGADMIN_PASSWORD:-admin} + PGADMIN_CONFIG_SERVER_MODE: 'False' + volumes: + - pgadmin_data:/var/lib/pgadmin + ports: + - "5050:80" + depends_on: + - postgres + networks: + - explorepi-network + restart: unless-stopped + + # Nginx Reverse Proxy (optional) + nginx: + image: nginx:alpine + container_name: explorepi-nginx + volumes: + - ./nginx.conf:/etc/nginx/nginx.conf:ro + ports: + - "80:80" + - "443:443" + depends_on: + - api + networks: + - explorepi-network + restart: unless-stopped + +volumes: + postgres_data: + driver: local + redis_data: + driver: local + pgadmin_data: + driver: local + +networks: + explorepi-network: + driver: bridge From 8e8020e51543577fa555c3d76c1bbb53d8e9994e Mon Sep 17 00:00:00 2001 From: Marfin <248203147+arifinahmad99-cloud@users.noreply.github.com> Date: Sat, 14 Feb 2026 15:20:00 +0800 Subject: [PATCH 19/19] Add initial devcontainer configuration --- .devcontainer/devcontainer.json | 4 ++++ 1 file changed, 4 insertions(+) create mode 100644 .devcontainer/devcontainer.json diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json new file mode 100644 index 0000000..39bbd26 --- /dev/null +++ b/.devcontainer/devcontainer.json @@ -0,0 +1,4 @@ +{ + "image": "mcr.microsoft.com/devcontainers/universal:2", + "features": {} +}