From 72af7eb9c395e2d592f9a334b6ace98508e4c60b Mon Sep 17 00:00:00 2001
From: Marfin <248203147+arifinahmad99-cloud@users.noreply.github.com>
Date: Sun, 11 Jan 2026 07:36:43 +0800
Subject: [PATCH 01/19] Implement multi-language support for UI
Added multi-language support with English, Chinese, Spanish, French, German, and Japanese translations for various UI elements and messages.
---
website/Teranslate.js | 331 ++++++++++++++++++++++++++++++++++++++++++
1 file changed, 331 insertions(+)
create mode 100644 website/Teranslate.js
diff --git a/website/Teranslate.js b/website/Teranslate.js
new file mode 100644
index 0000000..a713821
--- /dev/null
+++ b/website/Teranslate.js
@@ -0,0 +1,331 @@
+-- =============================================
+-- EXPLOREPI LANGUAGE DATA INSTALLATION
+-- Complete Multi-Language Support
+-- =============================================
+
+USE explorepi;
+
+-- Clear existing language data (optional)
+-- TRUNCATE TABLE language_data;
+
+-- =============================================
+-- ENGLISH (en) - Complete Translation
+-- =============================================
+
+INSERT INTO language_data (lang_code, lang_key, lang_value, category) VALUES
+-- Navigation
+('en', 'nav.home', 'Home', 'navigation'),
+('en', 'nav.blocks', 'Blocks', 'navigation'),
+('en', 'nav.transactions', 'Transactions', 'navigation'),
+('en', 'nav.tokens', 'Tokens', 'navigation'),
+('en', 'nav.contracts', 'Contracts', 'navigation'),
+('en', 'nav.statistics', 'Statistics', 'navigation'),
+('en', 'nav.search', 'Search', 'navigation'),
+('en', 'nav.about', 'About', 'navigation'),
+('en', 'nav.api', 'API', 'navigation'),
+
+-- Common Terms
+('en', 'common.loading', 'Loading...', 'common'),
+('en', 'common.error', 'Error', 'common'),
+('en', 'common.success', 'Success', 'common'),
+('en', 'common.warning', 'Warning', 'common'),
+('en', 'common.info', 'Information', 'common'),
+('en', 'common.block', 'Block', 'common'),
+('en', 'common.transaction', 'Transaction', 'common'),
+('en', 'common.address', 'Address', 'common'),
+('en', 'common.token', 'Token', 'common'),
+('en', 'common.contract', 'Smart Contract', 'common'),
+('en', 'common.view_all', 'View All', 'common'),
+('en', 'common.view_more', 'View More', 'common'),
+('en', 'common.details', 'Details', 'common'),
+('en', 'common.copy', 'Copy', 'common'),
+('en', 'common.copied', 'Copied!', 'common'),
+('en', 'common.ago', 'ago', 'common'),
+('en', 'common.total', 'Total', 'common'),
+('en', 'common.amount', 'Amount', 'common'),
+('en', 'common.price', 'Price', 'common'),
+('en', 'common.value', 'Value', 'common'),
+
+-- Search
+('en', 'search.placeholder', 'Search by Address / Txn Hash / Block / Token', 'search'),
+('en', 'search.searching', 'Searching...', 'search'),
+('en', 'search.no_results', 'No results found', 'search'),
+('en', 'search.invalid_input', 'Invalid search input', 'search'),
+('en', 'search.enter_query', 'Enter your search query', 'search'),
+
+-- Home Page
+('en', 'home.title', 'Pi Network Block Explorer', 'home'),
+('en', 'home.subtitle', 'Explore the Pi Blockchain', 'home'),
+('en', 'home.latest_blocks', 'Latest Blocks', 'home'),
+('en', 'home.latest_transactions', 'Latest Transactions', 'home'),
+('en', 'home.network_stats', 'Network Statistics', 'home'),
+('en', 'home.total_blocks', 'Total Blocks', 'home'),
+('en', 'home.total_transactions', 'Total Transactions', 'home'),
+('en', 'home.total_addresses', 'Total Addresses', 'home'),
+('en', 'home.avg_block_time', 'Avg Block Time', 'home'),
+('en', 'home.welcome', 'Welcome to Pi Network Explorer', 'home'),
+
+-- Block Details
+('en', 'block.height', 'Block Height', 'block'),
+('en', 'block.hash', 'Block Hash', 'block'),
+('en', 'block.parent_hash', 'Parent Hash', 'block'),
+('en', 'block.timestamp', 'Timestamp', 'block'),
+('en', 'block.transactions', 'Transactions', 'block'),
+('en', 'block.miner', 'Miner', 'block'),
+('en', 'block.difficulty', 'Difficulty', 'block'),
+('en', 'block.total_difficulty', 'Total Difficulty', 'block'),
+('en', 'block.size', 'Size', 'block'),
+('en', 'block.gas_used', 'Gas Used', 'block'),
+('en', 'block.gas_limit', 'Gas Limit', 'block'),
+('en', 'block.nonce', 'Nonce', 'block'),
+('en', 'block.state_root', 'State Root', 'block'),
+('en', 'block.receipts_root', 'Receipts Root', 'block'),
+('en', 'block.transactions_root', 'Transactions Root', 'block'),
+('en', 'block.extra_data', 'Extra Data', 'block'),
+('en', 'block.not_found', 'Block not found', 'block'),
+('en', 'block.overview', 'Block Overview', 'block'),
+
+-- Transaction Details
+('en', 'tx.hash', 'Transaction Hash', 'transaction'),
+('en', 'tx.status', 'Status', 'transaction'),
+('en', 'tx.success', 'Success', 'transaction'),
+('en', 'tx.failed', 'Failed', 'transaction'),
+('en', 'tx.pending', 'Pending', 'transaction'),
+('en', 'tx.block', 'Block', 'transaction'),
+('en', 'tx.from', 'From', 'transaction'),
+('en', 'tx.to', 'To', 'transaction'),
+('en', 'tx.contract_creation', 'Contract Creation', 'transaction'),
+('en', 'tx.value', 'Value', 'transaction'),
+('en', 'tx.fee', 'Transaction Fee', 'transaction'),
+('en', 'tx.gas_price', 'Gas Price', 'transaction'),
+('en', 'tx.gas_limit', 'Gas Limit', 'transaction'),
+('en', 'tx.gas_used', 'Gas Used', 'transaction'),
+('en', 'tx.nonce', 'Nonce', 'transaction'),
+('en', 'tx.input_data', 'Input Data', 'transaction'),
+('en', 'tx.logs', 'Logs', 'transaction'),
+('en', 'tx.not_found', 'Transaction not found', 'transaction'),
+
+-- Address Details
+('en', 'address.overview', 'Address Overview', 'address'),
+('en', 'address.balance', 'Balance', 'address'),
+('en', 'address.transactions', 'Transactions', 'address'),
+('en', 'address.token_transfers', 'Token Transfers', 'address'),
+('en', 'address.is_contract', 'Smart Contract', 'address'),
+('en', 'address.creator', 'Creator', 'address'),
+('en', 'address.creation_tx', 'Creation Transaction', 'address'),
+('en', 'address.first_seen', 'First Seen', 'address'),
+('en', 'address.last_seen', 'Last Seen', 'address'),
+('en', 'address.not_found', 'Address not found', 'address'),
+
+-- Token Details
+('en', 'token.name', 'Token Name', 'token'),
+('en', 'token.symbol', 'Symbol', 'token'),
+('en', 'token.decimals', 'Decimals', 'token'),
+('en', 'token.total_supply', 'Total Supply', 'token'),
+('en', 'token.holders', 'Holders', 'token'),
+('en', 'token.transfers', 'Transfers', 'token'),
+('en', 'token.type', 'Token Type', 'token'),
+('en', 'token.contract', 'Contract Address', 'token'),
+('en', 'token.not_found', 'Token not found', 'token'),
+
+-- Contract Details
+('en', 'contract.address', 'Contract Address', 'contract'),
+('en', 'contract.creator', 'Creator', 'contract'),
+('en', 'contract.creation_tx', 'Creation Transaction', 'contract'),
+('en', 'contract.creation_block', 'Creation Block', 'contract'),
+('en', 'contract.verified', 'Verified', 'contract'),
+('en', 'contract.unverified', 'Not Verified', 'contract'),
+('en', 'contract.source_code', 'Source Code', 'contract'),
+('en', 'contract.abi', 'Contract ABI', 'contract'),
+('en', 'contract.bytecode', 'Bytecode', 'contract'),
+('en', 'contract.compiler_version', 'Compiler Version', 'contract'),
+('en', 'contract.optimization', 'Optimization', 'contract'),
+('en', 'contract.runs', 'Runs', 'contract'),
+('en', 'contract.not_found', 'Contract not found', 'contract'),
+
+-- Time Units
+('en', 'time.seconds', 'seconds', 'time'),
+('en', 'time.minutes', 'minutes', 'time'),
+('en', 'time.hours', 'hours', 'time'),
+('en', 'time.days', 'days', 'time'),
+('en', 'time.months', 'months', 'time'),
+('en', 'time.years', 'years', 'time'),
+('en', 'time.ago', 'ago', 'time'),
+('en', 'time.just_now', 'just now', 'time'),
+
+-- Error Messages
+('en', 'error.general', 'An error occurred', 'error'),
+('en', 'error.not_found', 'Not found', 'error'),
+('en', 'error.invalid_address', 'Invalid address', 'error'),
+('en', 'error.invalid_tx_hash', 'Invalid transaction hash', 'error'),
+('en', 'error.invalid_block', 'Invalid block number', 'error'),
+('en', 'error.database', 'Database error', 'error'),
+('en', 'error.network', 'Network error', 'error'),
+('en', 'error.connection', 'Connection error', 'error');
+
+-- =============================================
+-- CHINESE (zh) - 中文翻译
+-- =============================================
+
+INSERT INTO language_data (lang_code, lang_key, lang_value, category) VALUES
+('zh', 'nav.home', '首页', 'navigation'),
+('zh', 'nav.blocks', '区块', 'navigation'),
+('zh', 'nav.transactions', '交易', 'navigation'),
+('zh', 'nav.tokens', '代币', 'navigation'),
+('zh', 'nav.contracts', '合约', 'navigation'),
+('zh', 'nav.statistics', '统计', 'navigation'),
+('zh', 'nav.search', '搜索', 'navigation'),
+
+('zh', 'common.loading', '加载中...', 'common'),
+('zh', 'common.error', '错误', 'common'),
+('zh', 'common.success', '成功', 'common'),
+('zh', 'common.block', '区块', 'common'),
+('zh', 'common.transaction', '交易', 'common'),
+('zh', 'common.address', '地址', 'common'),
+('zh', 'common.token', '代币', 'common'),
+('zh', 'common.view_all', '查看全部', 'common'),
+('zh', 'common.details', '详情', 'common'),
+
+('zh', 'search.placeholder', '搜索地址 / 交易哈希 / 区块 / 代币', 'search'),
+('zh', 'search.no_results', '未找到结果', 'search'),
+
+('zh', 'home.title', 'Pi网络区块浏览器', 'home'),
+('zh', 'home.latest_blocks', '最新区块', 'home'),
+('zh', 'home.latest_transactions', '最新交易', 'home'),
+('zh', 'home.total_blocks', '总区块数', 'home'),
+
+('zh', 'block.height', '区块高度', 'block'),
+('zh', 'block.hash', '区块哈希', 'block'),
+('zh', 'block.timestamp', '时间戳', 'block'),
+('zh', 'block.transactions', '交易', 'block'),
+('zh', 'block.miner', '矿工', 'block'),
+
+('zh', 'tx.hash', '交易哈希', 'transaction'),
+('zh', 'tx.status', '状态', 'transaction'),
+('zh', 'tx.success', '成功', 'transaction'),
+('zh', 'tx.failed', '失败', 'transaction'),
+('zh', 'tx.from', '发送方', 'transaction'),
+('zh', 'tx.to', '接收方', 'transaction');
+
+-- =============================================
+-- SPANISH (es) - Español
+-- =============================================
+
+INSERT INTO language_data (lang_code, lang_key, lang_value, category) VALUES
+('es', 'nav.home', 'Inicio', 'navigation'),
+('es', 'nav.blocks', 'Bloques', 'navigation'),
+('es', 'nav.transactions', 'Transacciones', 'navigation'),
+('es', 'nav.tokens', 'Tokens', 'navigation'),
+('es', 'nav.contracts', 'Contratos', 'navigation'),
+('es', 'nav.statistics', 'Estadísticas', 'navigation'),
+('es', 'nav.search', 'Buscar', 'navigation'),
+
+('es', 'common.loading', 'Cargando...', 'common'),
+('es', 'common.error', 'Error', 'common'),
+('es', 'common.success', 'Éxito', 'common'),
+('es', 'common.block', 'Bloque', 'common'),
+('es', 'common.transaction', 'Transacción', 'common'),
+('es', 'common.address', 'Dirección', 'common'),
+('es', 'common.token', 'Token', 'common'),
+('es', 'common.view_all', 'Ver Todo', 'common'),
+('es', 'common.details', 'Detalles', 'common'),
+
+('es', 'search.placeholder', 'Buscar por Dirección / Hash de Tx / Bloque / Token', 'search'),
+('es', 'search.no_results', 'No se encontraron resultados', 'search'),
+
+('es', 'home.title', 'Explorador de Bloques de Pi Network', 'home'),
+('es', 'home.latest_blocks', 'Últimos Bloques', 'home'),
+('es', 'home.latest_transactions', 'Últimas Transacciones', 'home'),
+
+('es', 'block.height', 'Altura del Bloque', 'block'),
+('es', 'block.hash', 'Hash del Bloque', 'block'),
+('es', 'block.timestamp', 'Marca de Tiempo', 'block'),
+('es', 'block.transactions', 'Transacciones', 'block'),
+
+('es', 'tx.hash', 'Hash de Transacción', 'transaction'),
+('es', 'tx.status', 'Estado', 'transaction'),
+('es', 'tx.success', 'Éxito', 'transaction'),
+('es', 'tx.failed', 'Fallido', 'transaction');
+
+-- =============================================
+-- FRENCH (fr) - Français
+-- =============================================
+
+INSERT INTO language_data (lang_code, lang_key, lang_value, category) VALUES
+('fr', 'nav.home', 'Accueil', 'navigation'),
+('fr', 'nav.blocks', 'Blocs', 'navigation'),
+('fr', 'nav.transactions', 'Transactions', 'navigation'),
+('fr', 'nav.tokens', 'Jetons', 'navigation'),
+('fr', 'nav.contracts', 'Contrats', 'navigation'),
+('fr', 'nav.search', 'Rechercher', 'navigation'),
+
+('fr', 'common.loading', 'Chargement...', 'common'),
+('fr', 'common.error', 'Erreur', 'common'),
+('fr', 'common.block', 'Bloc', 'common'),
+('fr', 'common.transaction', 'Transaction', 'common'),
+('fr', 'common.address', 'Adresse', 'common'),
+('fr', 'common.details', 'Détails', 'common'),
+
+('fr', 'search.placeholder', 'Rechercher par Adresse / Hash / Bloc / Jeton', 'search'),
+
+('fr', 'home.title', 'Explorateur de Blocs Pi Network', 'home'),
+('fr', 'home.latest_blocks', 'Derniers Blocs', 'home'),
+('fr', 'home.latest_transactions', 'Dernières Transactions', 'home');
+
+-- =============================================
+-- GERMAN (de) - Deutsch
+-- =============================================
+
+INSERT INTO language_data (lang_code, lang_key, lang_value, category) VALUES
+('de', 'nav.home', 'Startseite', 'navigation'),
+('de', 'nav.blocks', 'Blöcke', 'navigation'),
+('de', 'nav.transactions', 'Transaktionen', 'navigation'),
+('de', 'nav.tokens', 'Token', 'navigation'),
+('de', 'nav.contracts', 'Verträge', 'navigation'),
+('de', 'nav.search', 'Suchen', 'navigation'),
+
+('de', 'common.loading', 'Laden...', 'common'),
+('de', 'common.error', 'Fehler', 'common'),
+('de', 'common.block', 'Block', 'common'),
+('de', 'common.transaction', 'Transaktion', 'common'),
+('de', 'common.address', 'Adresse', 'common'),
+('de', 'common.details', 'Details', 'common'),
+
+('de', 'home.title', 'Pi Network Block Explorer', 'home'),
+('de', 'home.latest_blocks', 'Neueste Blöcke', 'home'),
+('de', 'home.latest_transactions', 'Neueste Transaktionen', 'home');
+
+-- =============================================
+-- JAPANESE (ja) - 日本語
+-- =============================================
+
+INSERT INTO language_data (lang_code, lang_key, lang_value, category) VALUES
+('ja', 'nav.home', 'ホーム', 'navigation'),
+('ja', 'nav.blocks', 'ブロック', 'navigation'),
+('ja', 'nav.transactions', 'トランザクション', 'navigation'),
+('ja', 'nav.tokens', 'トークン', 'navigation'),
+('ja', 'nav.search', '検索', 'navigation'),
+
+('ja', 'common.loading', '読み込み中...', 'common'),
+('ja', 'common.error', 'エラー', 'common'),
+('ja', 'common.block', 'ブロック', 'common'),
+('ja', 'common.transaction', 'トランザクション', 'common'),
+('ja', 'common.address', 'アドレス', 'common'),
+
+('ja', 'home.title', 'Piネットワーク ブロックエクスプローラー', 'home'),
+('ja', 'home.latest_blocks', '最新ブロック', 'home'),
+('ja', 'home.latest_transactions', '最新トランザクション', 'home');
+
+-- =============================================
+-- VERIFY INSTALLATION
+-- =============================================
+
+SELECT
+ lang_code,
+ COUNT(*) as total_translations,
+ COUNT(DISTINCT category) as categories
+FROM language_data
+GROUP BY lang_code
+ORDER BY lang_code;
+
+SELECT 'Language data installation completed!' AS status;
From d4a3a359c2fce713005f87e55ec8c35c3d4f3091 Mon Sep 17 00:00:00 2001
From: Marfin <248203147+arifinahmad99-cloud@users.noreply.github.com>
Date: Sun, 11 Jan 2026 12:09:14 +0800
Subject: [PATCH 02/19] Add GitHub Actions workflow for Azure Node.js
deployment
This workflow builds and deploys a Node.js application to Azure Web App on commits to the main branch.
---
.github/workflows/azure-webapps-node.yml | 78 ++++++++++++++++++++++++
1 file changed, 78 insertions(+)
create mode 100644 .github/workflows/azure-webapps-node.yml
diff --git a/.github/workflows/azure-webapps-node.yml b/.github/workflows/azure-webapps-node.yml
new file mode 100644
index 0000000..2ebbac2
--- /dev/null
+++ b/.github/workflows/azure-webapps-node.yml
@@ -0,0 +1,78 @@
+# This workflow will build and push a node.js application to an Azure Web App when a commit is pushed to your default branch.
+#
+# This workflow assumes you have already created the target Azure App Service web app.
+# For instructions see https://docs.microsoft.com/en-us/azure/app-service/quickstart-nodejs?tabs=linux&pivots=development-environment-cli
+#
+# To configure this workflow:
+#
+# 1. Download the Publish Profile for your Azure Web App. You can download this file from the Overview page of your Web App in the Azure Portal.
+# For more information: https://docs.microsoft.com/en-us/azure/app-service/deploy-github-actions?tabs=applevel#generate-deployment-credentials
+#
+# 2. Create a secret in your repository named AZURE_WEBAPP_PUBLISH_PROFILE, paste the publish profile contents as the value of the secret.
+# For instructions on obtaining the publish profile see: https://docs.microsoft.com/azure/app-service/deploy-github-actions#configure-the-github-secret
+#
+# 3. Change the value for the AZURE_WEBAPP_NAME. Optionally, change the AZURE_WEBAPP_PACKAGE_PATH and NODE_VERSION environment variables below.
+#
+# For more information on GitHub Actions for Azure: https://github.com/Azure/Actions
+# For more information on the Azure Web Apps Deploy action: https://github.com/Azure/webapps-deploy
+# For more samples to get started with GitHub Action workflows to deploy to Azure: https://github.com/Azure/actions-workflow-samples
+
+on:
+ push:
+ branches: [ "main" ]
+ workflow_dispatch:
+
+env:
+ AZURE_WEBAPP_NAME: your-app-name # set this to your application's name
+ AZURE_WEBAPP_PACKAGE_PATH: '.' # set this to the path to your web app project, defaults to the repository root
+ NODE_VERSION: '20.x' # set this to the node version to use
+
+permissions:
+ contents: read
+
+jobs:
+ build:
+ runs-on: ubuntu-latest
+ steps:
+ - uses: actions/checkout@v4
+
+ - name: Set up Node.js
+ uses: actions/setup-node@v4
+ with:
+ node-version: ${{ env.NODE_VERSION }}
+ cache: 'npm'
+
+ - name: npm install, build, and test
+ run: |
+ npm install
+ npm run build --if-present
+ npm run test --if-present
+
+ - name: Upload artifact for deployment job
+ uses: actions/upload-artifact@v4
+ with:
+ name: node-app
+ path: .
+
+ deploy:
+ permissions:
+ contents: none
+ runs-on: ubuntu-latest
+ needs: build
+ environment:
+ name: 'Development'
+ url: ${{ steps.deploy-to-webapp.outputs.webapp-url }}
+
+ steps:
+ - name: Download artifact from build job
+ uses: actions/download-artifact@v4
+ with:
+ name: node-app
+
+ - name: 'Deploy to Azure WebApp'
+ id: deploy-to-webapp
+ uses: azure/webapps-deploy@v2
+ with:
+ app-name: ${{ env.AZURE_WEBAPP_NAME }}
+ publish-profile: ${{ secrets.AZURE_WEBAPP_PUBLISH_PROFILE }}
+ package: ${{ env.AZURE_WEBAPP_PACKAGE_PATH }}
From f4f7155586faddb0fd58d537873bb6606b4cb2ca Mon Sep 17 00:00:00 2001
From: Marfin <248203147+arifinahmad99-cloud@users.noreply.github.com>
Date: Fri, 16 Jan 2026 05:11:01 +0800
Subject: [PATCH 03/19] Update privacy policy last updated date
---
website/public/policy.html | 71 ++++++++++++++------------------------
1 file changed, 25 insertions(+), 46 deletions(-)
diff --git a/website/public/policy.html b/website/public/policy.html
index 0648d69..599b2e6 100644
--- a/website/public/policy.html
+++ b/website/public/policy.html
@@ -1,10 +1,10 @@
Privacy Policy
-Last updated: March 14, 2023
-This Privacy Policy describes Our policies and procedures on the collection, use and disclosure of Your information when You use the Service and tells You about Your privacy rights and how the law protects You.
+Last updated: January 15, 2026
+This Privacy Policy describes Our policies and procedures on the collection, use and disclosure of Your information when You use the Service and tells You about Your privacy rights and how the law [...]
We use Your Personal data to provide and improve the Service. By using the Service, You agree to the collection and use of information in accordance with this Privacy Policy.
Interpretation and Definitions
Interpretation
-The words of which the initial letter is capitalized have meanings defined under the following conditions. The following definitions shall have the same meaning regardless of whether they appear in singular or in plural.
+The words of which the initial letter is capitalized have meanings defined under the following conditions. The following definitions shall have the same meaning regardless of whether they appear in[...]
Definitions
For the purposes of this Privacy Policy:
@@ -12,7 +12,7 @@ Definitions
Account means a unique account created for You to access our Service or parts of our Service.
-
-
Affiliate means an entity that controls, is controlled by or is under common control with a party, where "control" means ownership of 50% or more of the shares, equity interest or other securities entitled to vote for election of directors or other managing authority.
+Affiliate means an entity that controls, is controlled by or is under common control with a party, where "control" means ownership of 50% or more of the shares, equity in[...]
-
Application refers to ExplorePi, the software program provided by the APP.
@@ -33,13 +33,13 @@ Definitions
Service refers to the Application.
-
-
Service Provider means any natural or legal person who processes the data on behalf of the APP. It refers to third-party companies or individuals employed by the APP to facilitate the Service, to provide the Service on behalf of the APP, to perform services related to the Service or to assist the APP in analyzing how the Service is used.
+Service Provider means any natural or legal person who processes the data on behalf of the APP. It refers to third-party companies or individuals employed by the APP to facilitate [...]
-
Third-party Social Media Service refers to any website or any social network website through which a User can log in or create an account to use the Service.
-
-
Usage Data refers to data collected automatically, either generated by the use of the Service or from the Service infrastructure itself (for example, the duration of a page visit).
+Usage Data refers to data collected automatically, either generated by the use of the Service or from the Service infrastructure itself (for example, the duration of a page visit).[...]
-
You means the individual accessing or using the Service, or the APP, or other legal entity on behalf of which such individual is accessing or using the Service, as applicable.
@@ -48,23 +48,19 @@ Definitions
Collecting and Using Your Personal Data
Types of Data Collected
Personal Data
-While using Our Service, We may ask You to provide Us with certain personally identifiable information that can be used to contact or identify You. Personally identifiable information may include, but is not limited to:
+While using Our Service, We may ask You to provide Us with certain personally identifiable information that can be used to contact or identify You. Personally identifiable information may include, [...]
Usage Data
Usage Data is collected automatically when using the Service.
-Usage Data may include information such as Your Device's Internet Protocol address (e.g. IP address), browser type, browser version, the pages of our Service that You visit, the time and date of Your visit, the time spent on those pages, unique device identifiers and other diagnostic data.
-When You access the Service by or through a mobile device, We may collect certain information automatically, including, but not limited to, the type of mobile device You use, Your mobile device unique ID, the IP address of Your mobile device, Your mobile operating system, the type of mobile Internet browser You use, unique device identifiers and other diagnostic data.
-We may also collect information that Your browser sends whenever You visit our Service or when You access the Service by or through a mobile device.
+Usage Data may include information such as Your Device's Internet Protocol address (e.g. IP address), browser type, browser version, the pages of our Service that You visit, the time and date of Yo[...]\n
When You access the Service by or through a mobile device, We may collect certain information automatically, including, but not limited to, the type of mobile device You use, Your mobile device uni[...]\n
We may also collect information that Your browser sends whenever You visit our Service or when You access the Service by or through a mobile device.
Information from Third-Party Social Media Services
The APP allows You to create an account and log in to use the Service through the following Third-party Social Media Services:
-If You decide to register through or otherwise grant us access to a Third-Party Social Media Service, We may collect Personal data that is already associated with Your Third-Party Social Media Service's account, such as Your username, Your public address, Your activities associated with that account.
-You may also have the option of sharing additional information with the APP through Your Third-Party Social Media Service's account. If You choose to provide such information and Personal Data, during registration or otherwise, You are giving the APP permission to use, share, and store it in a manner consistent with this Privacy Policy.
-Information Collected while Using the Application
+If You decide to register through or otherwise grant us access to a Third-Party Social Media Service, We may collect Personal data that is already associated with Your Third-Party Social Media Serv[...]\n
You may also have the option of sharing additional information with the APP through Your Third-Party Social Media Service's account. If You choose to provide such information and Personal Data, dur[...]\n
Information Collected while Using the Application
While using Our Application, in order to provide features of Our Application, We may collect, with Your prior permission:
- Information regarding your location
@@ -78,51 +74,36 @@ Use of Your Personal Data
To provide and maintain our Service, including to monitor the usage of our Service.
-
-
To manage Your Account: to manage Your registration as a user of the Service. The Personal Data You provide can give You access to different functionalities of the Service that are available to You as a registered user.
-
+To manage Your Account: to manage Your registration as a user of the Service. The Personal Data You provide can give You access to different functionalities of the Service that are[...]\n
-
-
For the performance of a contract: the development, compliance and undertaking of the purchase contract for the products, items or services You have purchased or of any other contract with Us through the Service.
-
+For the performance of a contract: the development, compliance and undertaking of the purchase contract for the products, items or services You have purchased or of any other contr[...]\n
-
-
To provide You with news, special offers and general information about other goods, services and events which we offer that are similar to those that you have already purchased or enquired about unless You have opted not to receive such information.
-
+To provide You with news, special offers and general information about other goods, services and events which we offer that are similar to those that you have already purchased or[...]\n
-
To manage Your requests: To attend and manage Your requests to Us.
-
-
For APP transfers: We may use Your information to evaluate or conduct a merger, divestiture, restructuring, reorganization, dissolution, or other sale or transfer of some or all of Our assets, whether as a going concern or as part of bankruptcy, liquidation, or similar proceeding, in which Personal Data held by Us about our Service users is among the assets transferred.
-
+For APP transfers: We may use Your information to evaluate or conduct a merger, divestiture, restructuring, reorganization, dissolution, or other sale or transfer of some or all of[...]\n
-
-
For other purposes: We may use Your information for other purposes, such as data analysis, identifying usage trends, determining the effectiveness of our promotional campaigns and to evaluate and improve our Service, products, services, marketing and your experience.
-
+For other purposes: We may use Your information for other purposes, such as data analysis, identifying usage trends, determining the effectiveness of our promotional campaigns and[...]\n
We may share Your personal information in the following situations:
- With Service Providers: We may share Your personal information with Service Providers to monitor and analyze the use of our Service, to contact You.
-- For APP transfers: We may share or transfer Your personal information in connection with, or during negotiations of, any merger, sale of APP assets, financing, or acquisition of all or a portion of Our APP to another APP.
-- With Affiliates: We may share Your information with Our affiliates, in which case we will require those affiliates to honor this Privacy Policy. Affiliates include Our parent APP and any other subsidiaries, joint venture partners or other companies that We control or that are under common control with Us.
-- With app partners: We may share Your information with Our app partners to offer You certain products, services or promotions.
-- With other users: when You share personal information or otherwise interact in the public areas with other users, such information may be viewed by all users and may be publicly distributed outside. If You interact with other users or register through a Third-Party Social Media Service, Your contacts on the Third-Party Social Media Service may see Your name, profile, pictures and description of Your activity. Similarly, other users will be able to view descriptions of Your activity, communicate with You and view Your profile.
-- With Your consent: We may disclose Your personal information for any other purpose with Your consent.
+- For APP transfers: We may share or transfer Your personal information in connection with, or during negotiations of, any merger, sale of APP assets, financing, or acquisition of a[...]\n
- With Affiliates: We may share Your information with Our affiliates, in which case we will require those affiliates to honor this Privacy Policy. Affiliates include Our parent APP[...]\n
- With app partners: We may share Your information with Our app partners to offer You certain products, services or promotions.
+- With other users: when You share personal information or otherwise interact in the public areas with other users, such information may be viewed by all users and may be publicly d[...]\n
- With Your consent: We may disclose Your personal information for any other purpose with Your consent.
Retention of Your Personal Data
-The APP will retain Your Personal Data only for as long as is necessary for the purposes set out in this Privacy Policy. We will retain and use Your Personal Data to the extent necessary to comply with our legal obligations (for example, if we are required to retain your data to comply with applicable laws), resolve disputes, and enforce our legal agreements and policies.
-The APP will also retain Usage Data for internal analysis purposes. Usage Data is generally retained for a shorter period of time, except when this data is used to strengthen the security or to improve the functionality of Our Service, or We are legally obligated to retain this data for longer time periods.
-Transfer of Your Personal Data
-Your information, including Personal Data, is processed at the APP's operating offices and in any other places where the parties involved in the processing are located. It means that this information may be transferred to — and maintained on — computers located outside of Your state, province, country or other governmental jurisdiction where the data protection laws may differ than those from Your jurisdiction.
-Your consent to this Privacy Policy followed by Your submission of such information represents Your agreement to that transfer.
-The APP will take all steps reasonably necessary to ensure that Your data is treated securely and in accordance with this Privacy Policy and no transfer of Your Personal Data will take place to an organization or a country unless there are adequate controls in place including the security of Your data and other personal information.
-Delete Your Personal Data
+The APP will retain Your Personal Data only for as long as is necessary for the purposes set out in this Privacy Policy. We will retain and use Your Personal Data to the extent necessary to comply[...]\n
The APP will also retain Usage Data for internal analysis purposes. Usage Data is generally retained for a shorter period of time, except when this data is used to strengthen the security or to imp[...]\n
Transfer of Your Personal Data
+Your information, including Personal Data, is processed at the APP's operating offices and in any other places where the parties involved in the processing are located. It means that this informati[...]\n
Your consent to this Privacy Policy followed by Your submission of such information represents Your agreement to that transfer.
+The APP will take all steps reasonably necessary to ensure that Your data is treated securely and in accordance with this Privacy Policy and no transfer of Your Personal Data will take place to an[...]\n
Delete Your Personal Data
You have the right to delete or request that We assist in deleting the Personal Data that We have collected about You.
Our Service may give You the ability to delete certain information about You from within the Service.
-You may update, amend, or delete Your information at any time by signing in to Your Account, if you have one, and visiting the account settings section that allows you to manage Your personal information. You may also contact Us to request access to, correct, or delete any personal information that You have provided to Us.
-Please note, however, that We may need to retain certain information when we have a legal obligation or lawful basis to do so.
+You may update, amend, or delete Your information at any time by signing in to Your Account, if you have one, and visiting the account settings section that allows you to manage Your personal infor[...]\n
Please note, however, that We may need to retain certain information when we have a legal obligation or lawful basis to do so.
Disclosure of Your Personal Data
APP Transactions
-If the APP is involved in a merger, acquisition or asset sale, Your Personal Data may be transferred. We will provide notice before Your Personal Data is transferred and becomes subject to a different Privacy Policy.
-Law enforcement
-Under certain circumstances, the APP may be required to disclose Your Personal Data if required to do so by law or in response to valid requests by public authorities (e.g. a court or a government agency).
-Other legal requirements
+If the APP is involved in a merger, acquisition or asset sale, Your Personal Data may be transferred. We will provide notice before Your Personal Data is transferred and becomes subject to a differ[...]\n
Law enforcement
+Under certain circumstances, the APP may be required to disclose Your Personal Data if required to do so by law or in response to valid requests by public authorities (e.g. a court or a government[...]\n
Other legal requirements
The APP may disclose Your Personal Data in the good faith belief that such action is necessary to:
- Comply with a legal obligation
@@ -132,10 +113,8 @@ Other legal requirements
- Protect against legal liability
Security of Your Personal Data
-The security of Your Personal Data is important to Us, but remember that no method of transmission over the Internet, or method of electronic storage is 100% secure. While We strive to use commercially acceptable means to protect Your Personal Data, We cannot guarantee its absolute security.
-Links to Other Websites
-Our Service may contain links to other websites that are not operated by Us. If You click on a third party link, You will be directed to that third party's site. We strongly advise You to review the Privacy Policy of every site You visit.
-We have no control over and assume no responsibility for the content, privacy policies or practices of any third party sites or services.
+The security of Your Personal Data is important to Us, but remember that no method of transmission over the Internet, or method of electronic storage is 100% secure. While We strive to use commerci[...]\n
Links to Other Websites
+Our Service may contain links to other websites that are not operated by Us. If You click on a third party link, You will be directed to that third party's site. We strongly advise You to review th[...]\n
We have no control over and assume no responsibility for the content, privacy policies or practices of any third party sites or services.
Changes to this Privacy Policy
We may update Our Privacy Policy from time to time. We will notify You of any changes by posting the new Privacy Policy on this page.
Prior to the change becoming effective and update the "Last updated" date at the top of this Privacy Policy.
@@ -144,4 +123,4 @@ Contact Us
If you have any questions about this Privacy Policy, You can contact us:
- By email: 0205miss@gmail.com
-
+
\ No newline at end of file
From af40f758903257058b03c79f481964ec1caffc78 Mon Sep 17 00:00:00 2001
From: Marfin <248203147+arifinahmad99-cloud@users.noreply.github.com>
Date: Sun, 18 Jan 2026 22:14:38 +0800
Subject: [PATCH 04/19] Revert "corrected crawler README.md MySQL link"
---
crawler/README.md | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/crawler/README.md b/crawler/README.md
index fe4f211..679b064 100644
--- a/crawler/README.md
+++ b/crawler/README.md
@@ -61,7 +61,7 @@ npm start
## ⛏️ Built Using
-- [MYSQL](https://www.mysql.com/) - Database
+- [MYSQL](https://www.mongodb.com/) - Database
- [NodeJs](https://nodejs.org/en/) - Server Environment
- [StellarSDK](https://github.com/stellar/js-stellar-sdk) - BlockchainTool
## ✍️ Authors
From dd7f06ec921a7ceb5a0158ec80458d1ec13cbe6e Mon Sep 17 00:00:00 2001
From: Marfin <248203147+arifinahmad99-cloud@users.noreply.github.com>
Date: Sun, 18 Jan 2026 23:17:12 +0800
Subject: [PATCH 05/19] Create README.md
---
.github/workflows/README.md | 83 +++++++++++++++++++++++++++++++++++++
1 file changed, 83 insertions(+)
create mode 100644 .github/workflows/README.md
diff --git a/.github/workflows/README.md b/.github/workflows/README.md
new file mode 100644
index 0000000..cc583f4
--- /dev/null
+++ b/.github/workflows/README.md
@@ -0,0 +1,83 @@
+# Crawler
+
+A web crawler for collecting and processing data from specified sources.
+
+## Table of Contents
+
+- [Installation](#installation)
+- [Configuration](#configuration)
+- [Usage](#usage)
+- [Database Setup](#database-setup)
+- [Contributing](#contributing)
+
+## Installation
+
+Install the required dependencies:
+
+```bash
+pip install -r requirements.txt
+```
+
+Ensure you have Python 3.8+ installed on your system.
+
+## Configuration
+
+### Environment Variables
+
+Create a `.env` file in the project root with the following variables:
+
+```
+DATABASE_HOST=localhost
+DATABASE_USER=crawler_user
+DATABASE_PASSWORD=your_password
+DATABASE_NAME=crawler_db
+```
+
+Update these values according to your local environment.
+
+## Usage
+
+Run the crawler with:
+
+```bash
+python crawler.py
+```
+
+Optional flags:
+- `--verbose`: Enable detailed logging output
+- `--limit N`: Limit crawling to N pages
+- `--timeout S`: Set request timeout to S seconds
+
+## Database Setup
+
+### MySQL Configuration
+
+The crawler uses MySQL to store collected data. Follow these steps to set up your database:
+
+1. **Install MySQL**: Download and install from [MySQL Official Website](https://dev.mysql.com/downloads/mysql/)
+
+2. **Create Database and User**:
+ ```sql
+ CREATE DATABASE crawler_db;
+ CREATE USER 'crawler_user'@'localhost' IDENTIFIED BY 'your_password';
+ GRANT ALL PRIVILEGES ON crawler_db.* TO 'crawler_user'@'localhost';
+ FLUSH PRIVILEGES;
+ ```
+
+3. **Initialize Tables**: Run the database migration script:
+ ```bash
+ python scripts/init_db.py
+ ```
+
+### Connection Details
+
+- **Host**: localhost (default)
+- **Port**: 3306 (default MySQL port)
+- **User**: crawler_user
+- **Database**: crawler_db
+
+Update the connection parameters in your `.env` file if using different settings.
+
+## Contributing
+
+Please read CONTRIBUTING.md for details on our code of conduct and the process for submitting pull requests.
\ No newline at end of file
From 453469bb3a53caccd68d01e9fbf2645eb0337fee Mon Sep 17 00:00:00 2001
From: Marfin <248203147+arifinahmad99-cloud@users.noreply.github.com>
Date: Sun, 18 Jan 2026 23:24:43 +0800
Subject: [PATCH 06/19] Create CONTRIBUTING.md
Reporting Bugs
When reporting bugs, please include:
Description: What you were trying to do
Expected behavior: What should have happened
Actual behavior: What actually happened
Environment: Python version, OS, MySQL version
Steps to reproduce: Clear steps to replicate the issue
Error message: Full error traceback if available
Screenshots: If applicable
---
.github/workflows/CONTRIBUTING.md | 224 ++++++++++++++++++++++++++++++
1 file changed, 224 insertions(+)
create mode 100644 .github/workflows/CONTRIBUTING.md
diff --git a/.github/workflows/CONTRIBUTING.md b/.github/workflows/CONTRIBUTING.md
new file mode 100644
index 0000000..d652fba
--- /dev/null
+++ b/.github/workflows/CONTRIBUTING.md
@@ -0,0 +1,224 @@
+# Contributing to Crawler
+
+Thank you for your interest in contributing to the Crawler project! We welcome contributions from everyone. This document provides guidelines and instructions for contributing.
+
+## Code of Conduct
+
+We are committed to providing a welcoming and inspiring community for all. Please be respectful and constructive in all interactions. Harassment, discrimination, or disruptive behavior will not be tolerated.
+
+## How to Contribute
+
+There are many ways to contribute to this project:
+
+- **Report bugs** by opening an issue with detailed information
+- **Suggest features** with clear use cases and expected behavior
+- **Improve documentation** by fixing typos or clarifying confusing sections
+- **Submit code changes** by creating pull requests with meaningful improvements
+- **Review pull requests** and provide constructive feedback to other contributors
+
+## Getting Started
+
+### Prerequisites
+
+- Python 3.8 or higher
+- Git
+- A MySQL database for testing (optional but recommended)
+- A code editor or IDE of your choice
+
+### Setting Up Your Development Environment
+
+1. Fork the repository on GitHub
+2. Clone your fork locally:
+ ```bash
+ git clone https://github.com/your-username/crawler.git
+ cd crawler
+ ```
+3. Create a virtual environment:
+ ```bash
+ python -m venv venv
+ source venv/bin/activate # On Windows: venv\Scripts\activate
+ ```
+4. Install development dependencies:
+ ```bash
+ pip install -r requirements-dev.txt
+ ```
+5. Create a local `.env` file for testing:
+ ```bash
+ cp .env.example .env
+ ```
+
+## Making Changes
+
+### Branch Naming
+
+Create a descriptive branch name for your changes:
+- `feature/add-proxy-support`
+- `bugfix/fix-mysql-connection-timeout`
+- `docs/improve-readme`
+- `test/add-crawler-tests`
+
+```bash
+git checkout -b feature/your-feature-name
+```
+
+### Code Style
+
+Follow these guidelines to maintain consistent code quality:
+
+- Use PEP 8 style guide for Python code
+- Keep lines under 100 characters when possible
+- Use meaningful variable and function names
+- Add docstrings to functions and classes
+- Use type hints where applicable
+
+Example:
+```python
+def fetch_url(url: str, timeout: int = 10) -> str:
+ """
+ Fetch content from a given URL.
+
+ Args:
+ url: The URL to fetch
+ timeout: Request timeout in seconds (default: 10)
+
+ Returns:
+ The HTML content of the page
+
+ Raises:
+ requests.exceptions.RequestException: If the request fails
+ """
+ response = requests.get(url, timeout=timeout)
+ response.raise_for_status()
+ return response.text
+```
+
+### Testing
+
+Before submitting a pull request, ensure your code passes all tests:
+
+```bash
+# Run all tests
+pytest
+
+# Run tests with coverage
+pytest --cov=crawler
+
+# Run specific test file
+pytest tests/test_crawler.py
+```
+
+Write tests for new features:
+```python
+def test_fetch_url_success():
+ """Test that fetch_url returns content for valid URLs."""
+ result = fetch_url("https://example.com")
+ assert result is not None
+ assert len(result) > 0
+```
+
+### Commits
+
+Write clear, descriptive commit messages:
+
+```bash
+# Good
+git commit -m "Add proxy support to crawler
+
+- Add ProxyManager class to handle proxy rotation
+- Update fetch_url to accept proxy configuration
+- Add tests for proxy connection handling"
+
+# Avoid
+git commit -m "fix stuff"
+git commit -m "changes"
+```
+
+## Submitting Changes
+
+### Pull Request Process
+
+1. Ensure all tests pass and code is formatted correctly
+2. Push your branch to your fork:
+ ```bash
+ git push origin feature/your-feature-name
+ ```
+3. Open a pull request on GitHub with:
+ - A clear title describing the change
+ - A detailed description of what was changed and why
+ - Reference to any related issues (e.g., "Fixes #123")
+ - Screenshots or examples if applicable
+4. Address review comments and make requested changes
+5. Ensure the CI/CD pipeline passes
+6. Once approved, your PR will be merged
+
+### Pull Request Template
+
+```markdown
+## Description
+Brief explanation of what this PR does.
+
+## Changes Made
+- Change 1
+- Change 2
+- Change 3
+
+## Related Issues
+Fixes #123
+
+## Testing
+Describe how you tested these changes.
+
+## Checklist
+- [ ] Code follows style guidelines
+- [ ] Tests pass locally
+- [ ] Documentation is updated
+- [ ] No breaking changes (or documented in PR)
+```
+
+## Reporting Bugs
+
+When reporting bugs, please include:
+
+- **Description**: What you were trying to do
+- **Expected behavior**: What should have happened
+- **Actual behavior**: What actually happened
+- **Environment**: Python version, OS, MySQL version
+- **Steps to reproduce**: Clear steps to replicate the issue
+- **Error message**: Full error traceback if available
+- **Screenshots**: If applicable
+
+Example:
+```
+Title: Crawler fails with timeout on large datasets
+
+Description: When crawling more than 10,000 pages, the crawler
+consistently times out.
+
+Steps to reproduce:
+1. Configure crawler with 15,000 pages
+2. Run `python crawler.py`
+3. After ~8,000 pages, connection fails
+
+Expected: Crawler should complete all 15,000 pages
+Actual: Crawler crashes with timeout error
+
+Environment: Python 3.9, Ubuntu 20.04, MySQL 8.0
+```
+
+## Suggesting Features
+
+When suggesting features, explain:
+
+- **Use case**: Why this feature is needed
+- **Expected behavior**: How it should work
+- **Alternative approaches**: Other possible implementations
+- **Impact**: How it affects existing functionality
+
+## Documentation
+
+Help improve documentation by:
+
+- Fixing typos and grammatical errors
+- Adding missing sections or examples
+- Clarifying confusing explanations
+- Adding inline code comments for complex logic
From ea3c366a1d4168be3114f98ffa8948f26f2c96d7 Mon Sep 17 00:00:00 2001
From: Marfin <248203147+arifinahmad99-cloud@users.noreply.github.com>
Date: Mon, 19 Jan 2026 01:37:41 +0800
Subject: [PATCH 07/19] Create node.yml
---
.github/workflows/node.yml | 37 +++++++++++++++++++++++++++++++++++++
1 file changed, 37 insertions(+)
create mode 100644 .github/workflows/node.yml
diff --git a/.github/workflows/node.yml b/.github/workflows/node.yml
new file mode 100644
index 0000000..4016102
--- /dev/null
+++ b/.github/workflows/node.yml
@@ -0,0 +1,37 @@
+{{ env.NODE_VERSION }}
+ cache: 'npm'
+
+ - name: npm install, build, and test
+ run: |
+ npm install
+ npm run build --if-present
+ npm run test --if-present
+
+ - name: Upload artifact for deployment job
+ uses: actions/upload-artifact@v4
+ with:
+ name: node-app
+ path: .
+
+ deploy:
+ permissions:
+ contents: none
+ runs-on: ubuntu-latest
+ needs: build
+ environment:
+ name: 'Development'
+ url: ${{ steps.deploy-to-webapp.outputs.webapp-url }}
+
+ steps:
+ - name: Download artifact from build job
+ uses: actions/download-artifact@v4
+ with:
+ name: node-app
+
+ - name: 'Deploy to Azure WebApp'
+ id: deploy-to-webapp
+ uses: azure/webapps-deploy@v2
+ with:
+ app-name: ${{ env.AZURE_WEBAPP_NAME }}
+ publish-profile: ${{ secrets.AZURE_WEBAPP_PUBLISH_PROFILE }}
+ package: ${{ env.AZURE_WEBAPP_PACKAGE_PATH }}
\ No newline at end of file
From c81bf0c378c8dbb9b891edd4fcefad1690fdf2db Mon Sep 17 00:00:00 2001
From: Marfin <248203147+arifinahmad99-cloud@users.noreply.github.com>
Date: Mon, 19 Jan 2026 08:02:46 +0800
Subject: [PATCH 08/19] Create CONTRIBUTING.md
---
crawler/CONTRIBUTING.md | 224 ++++++++++++++++++++++++++++++++++++++++
1 file changed, 224 insertions(+)
create mode 100644 crawler/CONTRIBUTING.md
diff --git a/crawler/CONTRIBUTING.md b/crawler/CONTRIBUTING.md
new file mode 100644
index 0000000..22f3b8c
--- /dev/null
+++ b/crawler/CONTRIBUTING.md
@@ -0,0 +1,224 @@
+# Contributing to Crawler
+
+Thank you for your interest in contributing to the Crawler project! We welcome contributions from everyone. This document provides guidelines and instructions for contributing.
+
+## Code of Conduct
+
+We are committed to providing a welcoming and inspiring community for all. Please be respectful and constructive in all interactions. Harassment, discrimination, or disruptive behavior will not be tolerated.
+
+## How to Contribute
+
+There are many ways to contribute to this project:
+
+- **Report bugs** by opening an issue with detailed information
+- **Suggest features** with clear use cases and expected behavior
+- **Improve documentation** by fixing typos or clarifying confusing sections
+- **Submit code changes** by creating pull requests with meaningful improvements
+- **Review pull requests** and provide constructive feedback to other contributors
+
+## Getting Started
+
+### Prerequisites
+
+- Python 3.8 or higher
+- Git
+- A MySQL database for testing (optional but recommended)
+- A code editor or IDE of your choice
+
+### Setting Up Your Development Environment
+
+1. Fork the repository on GitHub
+2. Clone your fork locally:
+ ```bash
+ git clone https://github.com/your-username/crawler.git
+ cd crawler
+ ```
+3. Create a virtual environment:
+ ```bash
+ python -m venv venv
+ source venv/bin/activate # On Windows: venv\Scripts\activate
+ ```
+4. Install development dependencies:
+ ```bash
+ pip install -r requirements-dev.txt
+ ```
+5. Create a local `.env` file for testing:
+ ```bash
+ cp .env.example .env
+ ```
+
+## Making Changes
+
+### Branch Naming
+
+Create a descriptive branch name for your changes:
+- `feature/add-proxy-support`
+- `bugfix/fix-mysql-connection-timeout`
+- `docs/improve-readme`
+- `test/add-crawler-tests`
+
+```bash
+git checkout -b feature/your-feature-name
+```
+
+### Code Style
+
+Follow these guidelines to maintain consistent code quality:
+
+- Use PEP 8 style guide for Python code
+- Keep lines under 100 characters when possible
+- Use meaningful variable and function names
+- Add docstrings to functions and classes
+- Use type hints where applicable
+
+Example:
+```python
+def fetch_url(url: str, timeout: int = 10) -> str:
+ """
+ Fetch content from a given URL.
+
+ Args:
+ url: The URL to fetch
+ timeout: Request timeout in seconds (default: 10)
+
+ Returns:
+ The HTML content of the page
+
+ Raises:
+ requests.exceptions.RequestException: If the request fails
+ """
+ response = requests.get(url, timeout=timeout)
+ response.raise_for_status()
+ return response.text
+```
+
+### Testing
+
+Before submitting a pull request, ensure your code passes all tests:
+
+```bash
+# Run all tests
+pytest
+
+# Run tests with coverage
+pytest --cov=crawler
+
+# Run specific test file
+pytest tests/test_crawler.py
+```
+
+Write tests for new features:
+```python
+def test_fetch_url_success():
+ """Test that fetch_url returns content for valid URLs."""
+ result = fetch_url("https://example.com")
+ assert result is not None
+ assert len(result) > 0
+```
+
+### Commits
+
+Write clear, descriptive commit messages:
+
+```bash
+# Good
+git commit -m "Add proxy support to crawler
+
+- Add ProxyManager class to handle proxy rotation
+- Update fetch_url to accept proxy configuration
+- Add tests for proxy connection handling"
+
+# Avoid
+git commit -m "fix stuff"
+git commit -m "changes"
+```
+
+## Submitting Changes
+
+### Pull Request Process
+
+1. Ensure all tests pass and code is formatted correctly
+2. Push your branch to your fork:
+ ```bash
+ git push origin feature/your-feature-name
+ ```
+3. Open a pull request on GitHub with:
+ - A clear title describing the change
+ - A detailed description of what was changed and why
+ - Reference to any related issues (e.g., "Fixes #123")
+ - Screenshots or examples if applicable
+4. Address review comments and make requested changes
+5. Ensure the CI/CD pipeline passes
+6. Once approved, your PR will be merged
+
+### Pull Request Template
+
+```markdown
+## Description
+Brief explanation of what this PR does.
+
+## Changes Made
+- Change 1
+- Change 2
+- Change 3
+
+## Related Issues
+Fixes #123
+
+## Testing
+Describe how you tested these changes.
+
+## Checklist
+- [ ] Code follows style guidelines
+- [ ] Tests pass locally
+- [ ] Documentation is updated
+- [ ] No breaking changes (or documented in PR)
+```
+
+## Reporting Bugs
+
+When reporting bugs, please include:
+
+- **Description**: What you were trying to do
+- **Expected behavior**: What should have happened
+- **Actual behavior**: What actually happened
+- **Environment**: Python version, OS, MySQL version
+- **Steps to reproduce**: Clear steps to replicate the issue
+- **Error message**: Full error traceback if available
+- **Screenshots**: If applicable
+
+Example:
+```
+Title: Crawler fails with timeout on large datasets
+
+Description: When crawling more than 10,000 pages, the crawler
+consistently times out.
+
+Steps to reproduce:
+1. Configure crawler with 15,000 pages
+2. Run `python crawler.py`
+3. After ~8,000 pages, connection fails
+
+Expected: Crawler should complete all 15,000 pages
+Actual: Crawler crashes with timeout error
+
+Environment: Python 3.9, Ubuntu 20.04, MySQL 8.0
+```
+
+## Suggesting Features
+
+When suggesting features, explain:
+
+- **Use case**: Why this feature is needed
+- **Expected behavior**: How it should work
+- **Alternative approaches**: Other possible implementations
+- **Impact**: How it affects existing functionality
+
+## Documentation
+
+Help improve documentation by:
+
+- Fixing typos and grammatical errors
+- Adding missing sections or examples
+- Clarifying confusing explanations
+- Adding inline code comments for complex logic
\ No newline at end of file
From bc3596690d1489f8520e06b76c342d21e77d87e6 Mon Sep 17 00:00:00 2001
From: Marfin <248203147+arifinahmad99-cloud@users.noreply.github.com>
Date: Mon, 19 Jan 2026 10:08:37 +0800
Subject: [PATCH 09/19] Create LICENSE
---
crawler/LICENSE | 21 +++++++++++++++++++++
1 file changed, 21 insertions(+)
create mode 100644 crawler/LICENSE
diff --git a/crawler/LICENSE b/crawler/LICENSE
new file mode 100644
index 0000000..bc7a6e5
--- /dev/null
+++ b/crawler/LICENSE
@@ -0,0 +1,21 @@
+MIT License
+
+Copyright (c) 2025 Crawler Project Contributors
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
\ No newline at end of file
From 0862923773172cbda5ad2d49ccf14b88e0cb497d Mon Sep 17 00:00:00 2001
From: Marfin <248203147+arifinahmad99-cloud@users.noreply.github.com>
Date: Wed, 11 Feb 2026 10:13:41 +0800
Subject: [PATCH 10/19] Add Python CI/CD workflow with JSON processing
This workflow sets up a comprehensive CI/CD pipeline for Python projects, including code quality checks, JSON validation, unit testing, JSON data processing, database operations, deployment, and security scanning.
---
.github/workflows/python-ci-cd.yml | 348 +++++++++++++++++++++++++++++
1 file changed, 348 insertions(+)
create mode 100644 .github/workflows/python-ci-cd.yml
diff --git a/.github/workflows/python-ci-cd.yml b/.github/workflows/python-ci-cd.yml
new file mode 100644
index 0000000..467dea9
--- /dev/null
+++ b/.github/workflows/python-ci-cd.yml
@@ -0,0 +1,348 @@
+name: Python CI/CD with JSON Data Processing
+
+on:
+ push:
+ branches: [ main, develop ]
+ pull_request:
+ branches: [ main, develop ]
+ workflow_dispatch:
+
+env:
+ PYTHON_VERSION: '3.11'
+ JSON_DATA_PATH: 'data'
+
+jobs:
+ # Job 1: Code Quality and Linting
+ code-quality:
+ runs-on: ubuntu-latest
+ name: Code Quality Check
+
+ steps:
+ - name: Checkout repository
+ uses: actions/checkout@v4
+
+ - name: Set up Python
+ uses: actions/setup-python@v5
+ with:
+ python-version: ${{ env.PYTHON_VERSION }}
+ cache: 'pip'
+
+ - name: Install dependencies
+ run: |
+ python -m pip install --upgrade pip
+ pip install flake8 pylint black isort mypy
+ if [ -f requirements.txt ]; then pip install -r requirements.txt; fi
+ if [ -f requirements-dev.txt ]; then pip install -r requirements-dev.txt; fi
+
+ - name: Lint with flake8
+ run: |
+ # Stop the build if there are Python syntax errors or undefined names
+ flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics
+ # Exit-zero treats all errors as warnings
+ flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics
+
+ - name: Check code formatting with black
+ run: |
+ black --check --diff .
+
+ - name: Check import sorting with isort
+ run: |
+ isort --check-only --diff .
+
+ - name: Type checking with mypy
+ run: |
+ mypy . --ignore-missing-imports || true
+
+ # Job 2: JSON Validation and Schema Check
+ json-validation:
+ runs-on: ubuntu-latest
+ name: Validate JSON Files
+
+ steps:
+ - name: Checkout repository
+ uses: actions/checkout@v4
+
+ - name: Set up Python
+ uses: actions/setup-python@v5
+ with:
+ python-version: ${{ env.PYTHON_VERSION }}
+
+ - name: Install JSON validation tools
+ run: |
+ pip install jsonschema pyjson5 pyyaml
+
+ - name: Validate JSON syntax
+ run: |
+ echo "Validating JSON files..."
+ python -c "
+ import json
+ import os
+ import sys
+
+ errors = []
+ for root, dirs, files in os.walk('.'):
+ for file in files:
+ if file.endswith('.json'):
+ filepath = os.path.join(root, file)
+ try:
+ with open(filepath, 'r', encoding='utf-8') as f:
+ json.load(f)
+ print(f'✓ {filepath} is valid')
+ except json.JSONDecodeError as e:
+ errors.append(f'{filepath}: {str(e)}')
+ print(f'✗ {filepath} is invalid: {e}')
+
+ if errors:
+ print(f'\n{len(errors)} JSON file(s) failed validation')
+ sys.exit(1)
+ else:
+ print(f'\nAll JSON files are valid!')
+ "
+
+ - name: Upload JSON validation report
+ if: always()
+ uses: actions/upload-artifact@v4
+ with:
+ name: json-validation-report
+ path: |
+ **/*.json
+
+ # Job 3: Unit Tests
+ test:
+ runs-on: ubuntu-latest
+ needs: [code-quality, json-validation]
+ name: Run Tests
+ strategy:
+ matrix:
+ python-version: ['3.9', '3.10', '3.11', '3.12']
+
+ steps:
+ - name: Checkout repository
+ uses: actions/checkout@v4
+
+ - name: Set up Python ${{ matrix.python-version }}
+ uses: actions/setup-python@v5
+ with:
+ python-version: ${{ matrix.python-version }}
+ cache: 'pip'
+
+ - name: Install dependencies
+ run: |
+ python -m pip install --upgrade pip
+ pip install pytest pytest-cov pytest-mock pytest-asyncio
+ if [ -f requirements.txt ]; then pip install -r requirements.txt; fi
+
+ - name: Run tests with pytest
+ run: |
+ pytest --cov=. --cov-report=xml --cov-report=html --cov-report=term -v
+
+ - name: Upload coverage reports
+ uses: codecov/codecov-action@v4
+ with:
+ file: ./coverage.xml
+ flags: unittests
+ name: codecov-umbrella
+ fail_ci_if_error: false
+
+ - name: Upload test results
+ if: always()
+ uses: actions/upload-artifact@v4
+ with:
+ name: test-results-${{ matrix.python-version }}
+ path: |
+ htmlcov/
+ coverage.xml
+
+ # Job 4: JSON Data Processing
+ process-json-data:
+ runs-on: ubuntu-latest
+ needs: [test]
+ name: Process JSON Data
+
+ steps:
+ - name: Checkout repository
+ uses: actions/checkout@v4
+
+ - name: Set up Python
+ uses: actions/setup-python@v5
+ with:
+ python-version: ${{ env.PYTHON_VERSION }}
+
+ - name: Install data processing libraries
+ run: |
+ pip install pandas numpy jsonschema
+
+ - name: Process JSON data files
+ run: |
+ python -c "
+ import json
+ import os
+ from datetime import datetime
+
+ # Create processing report
+ report = {
+ 'timestamp': datetime.now().isoformat(),
+ 'files_processed': [],
+ 'total_files': 0,
+ 'status': 'success'
+ }
+
+ for root, dirs, files in os.walk('${{ env.JSON_DATA_PATH }}'):
+ for file in files:
+ if file.endswith('.json'):
+ filepath = os.path.join(root, file)
+ try:
+ with open(filepath, 'r') as f:
+ data = json.load(f)
+ report['files_processed'].append({
+ 'file': filepath,
+ 'size': os.path.getsize(filepath),
+ 'keys': list(data.keys()) if isinstance(data, dict) else 'array'
+ })
+ report['total_files'] += 1
+ except Exception as e:
+ report['status'] = 'failed'
+ print(f'Error processing {filepath}: {e}')
+
+ # Save report
+ os.makedirs('reports', exist_ok=True)
+ with open('reports/json_processing_report.json', 'w') as f:
+ json.dump(report, f, indent=2)
+
+ print(f'Processed {report[\"total_files\"]} JSON files')
+ " || echo "No JSON data files found in ${{ env.JSON_DATA_PATH }}"
+
+ - name: Upload processing report
+ uses: actions/upload-artifact@v4
+ with:
+ name: json-processing-report
+ path: reports/
+
+ # Job 5: Database Operations (if applicable)
+ database-operations:
+ runs-on: ubuntu-latest
+ needs: [process-json-data]
+ name: Database Sync
+ if: github.ref == 'refs/heads/main'
+
+ services:
+ postgres:
+ image: postgres:15
+ env:
+ POSTGRES_PASSWORD: postgres
+ POSTGRES_DB: testdb
+ options: >-
+ --health-cmd pg_isready
+ --health-interval 10s
+ --health-timeout 5s
+ --health-retries 5
+ ports:
+ - 5432:5432
+
+ steps:
+ - name: Checkout repository
+ uses: actions/checkout@v4
+
+ - name: Set up Python
+ uses: actions/setup-python@v5
+ with:
+ python-version: ${{ env.PYTHON_VERSION }}
+
+ - name: Install database libraries
+ run: |
+ pip install psycopg2-binary sqlalchemy pandas
+
+ - name: JSON to Database migration
+ env:
+ DATABASE_URL: postgresql://postgres:postgres@localhost:5432/testdb
+ run: |
+ python -c "
+ import json
+ import os
+ from sqlalchemy import create_engine, text
+
+ engine = create_engine(os.environ['DATABASE_URL'])
+
+ with engine.connect() as conn:
+ # Create sample table
+ conn.execute(text('''
+ CREATE TABLE IF NOT EXISTS json_data (
+ id SERIAL PRIMARY KEY,
+ filename VARCHAR(255),
+ data JSONB,
+ created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
+ )
+ '''))
+ conn.commit()
+ print('Database table created successfully')
+
+ print('Database operations completed')
+ "
+
+ # Job 6: Build and Deploy
+ build-deploy:
+ runs-on: ubuntu-latest
+ needs: [test, process-json-data]
+ name: Build and Deploy
+ if: github.ref == 'refs/heads/main'
+
+ steps:
+ - name: Checkout repository
+ uses: actions/checkout@v4
+
+ - name: Set up Python
+ uses: actions/setup-python@v5
+ with:
+ python-version: ${{ env.PYTHON_VERSION }}
+
+ - name: Build package
+ run: |
+ pip install build setuptools wheel
+ python -m build
+
+ - name: Create deployment artifact
+ run: |
+ mkdir -p deployment
+ cp -r dist deployment/
+ cp -r ${{ env.JSON_DATA_PATH }} deployment/ || echo "No data directory"
+
+ - name: Upload deployment artifact
+ uses: actions/upload-artifact@v4
+ with:
+ name: deployment-package
+ path: deployment/
+ retention-days: 30
+
+ # Job 7: Security Scan
+ security-scan:
+ runs-on: ubuntu-latest
+ name: Security Scanning
+
+ steps:
+ - name: Checkout repository
+ uses: actions/checkout@v4
+
+ - name: Set up Python
+ uses: actions/setup-python@v5
+ with:
+ python-version: ${{ env.PYTHON_VERSION }}
+
+ - name: Install security tools
+ run: |
+ pip install bandit safety
+
+ - name: Run Bandit security scan
+ run: |
+ bandit -r . -f json -o bandit-report.json || true
+
+ - name: Check dependencies for vulnerabilities
+ run: |
+ safety check --json || true
+
+ - name: Upload security reports
+ if: always()
+ uses: actions/upload-artifact@v4
+ with:
+ name: security-reports
+ path: |
+ bandit-report.json
From 21efb248b71427bf354297379a3b11ff22325794 Mon Sep 17 00:00:00 2001
From: Marfin <248203147+arifinahmad99-cloud@users.noreply.github.com>
Date: Wed, 11 Feb 2026 10:15:32 +0800
Subject: [PATCH 11/19] Add JSON data processor and validator script
This script provides functionality for processing, validating, and managing JSON data, including reading, writing, merging, and generating statistics.
---
.github/workflows/json_processor.py | 372 ++++++++++++++++++++++++++++
1 file changed, 372 insertions(+)
create mode 100644 .github/workflows/json_processor.py
diff --git a/.github/workflows/json_processor.py b/.github/workflows/json_processor.py
new file mode 100644
index 0000000..cbb5943
--- /dev/null
+++ b/.github/workflows/json_processor.py
@@ -0,0 +1,372 @@
+#!/usr/bin/env python3
+"""
+JSON Data Processor and Validator
+Handles JSON file operations, validation, and data transformation
+"""
+
+import json
+import os
+import sys
+from pathlib import Path
+from typing import Any, Dict, List, Optional, Union
+from datetime import datetime
+import logging
+
+# Configure logging
+logging.basicConfig(
+ level=logging.INFO,
+ format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
+)
+logger = logging.getLogger(__name__)
+
+
+class JSONDataProcessor:
+ """Handles JSON data processing and validation"""
+
+ def __init__(self, data_dir: str = "data"):
+ """
+ Initialize JSON Data Processor
+
+ Args:
+ data_dir: Directory containing JSON files
+ """
+ self.data_dir = Path(data_dir)
+ self.data_dir.mkdir(exist_ok=True)
+
+ def validate_json_file(self, filepath: Union[str, Path]) -> bool:
+ """
+ Validate a JSON file
+
+ Args:
+ filepath: Path to JSON file
+
+ Returns:
+ True if valid, False otherwise
+ """
+ try:
+ with open(filepath, 'r', encoding='utf-8') as f:
+ json.load(f)
+ logger.info(f"✓ Valid JSON: {filepath}")
+ return True
+ except json.JSONDecodeError as e:
+ logger.error(f"✗ Invalid JSON in {filepath}: {e}")
+ return False
+ except Exception as e:
+ logger.error(f"✗ Error reading {filepath}: {e}")
+ return False
+
+ def validate_all_files(self) -> Dict[str, Any]:
+ """
+ Validate all JSON files in the data directory
+
+ Returns:
+ Dictionary with validation results
+ """
+ results = {
+ 'total_files': 0,
+ 'valid_files': 0,
+ 'invalid_files': 0,
+ 'errors': []
+ }
+
+ for json_file in self.data_dir.rglob('*.json'):
+ results['total_files'] += 1
+ if self.validate_json_file(json_file):
+ results['valid_files'] += 1
+ else:
+ results['invalid_files'] += 1
+ results['errors'].append(str(json_file))
+
+ logger.info(f"Validation complete: {results['valid_files']}/{results['total_files']} valid")
+ return results
+
+ def read_json(self, filename: str) -> Optional[Union[Dict, List]]:
+ """
+ Read and parse a JSON file
+
+ Args:
+ filename: Name of the JSON file
+
+ Returns:
+ Parsed JSON data or None if error
+ """
+ filepath = self.data_dir / filename
+ try:
+ with open(filepath, 'r', encoding='utf-8') as f:
+ data = json.load(f)
+ logger.info(f"Successfully read {filename}")
+ return data
+ except Exception as e:
+ logger.error(f"Error reading {filename}: {e}")
+ return None
+
+ def write_json(self, filename: str, data: Union[Dict, List], indent: int = 2) -> bool:
+ """
+ Write data to a JSON file
+
+ Args:
+ filename: Name of the JSON file
+ data: Data to write
+ indent: JSON indentation level
+
+ Returns:
+ True if successful, False otherwise
+ """
+ filepath = self.data_dir / filename
+ try:
+ with open(filepath, 'w', encoding='utf-8') as f:
+ json.dump(data, f, indent=indent, ensure_ascii=False)
+ logger.info(f"Successfully wrote {filename}")
+ return True
+ except Exception as e:
+ logger.error(f"Error writing {filename}: {e}")
+ return False
+
+ def merge_json_files(self, output_filename: str, pattern: str = "*.json") -> bool:
+ """
+ Merge multiple JSON files into one
+
+ Args:
+ output_filename: Name of output file
+ pattern: File pattern to match
+
+ Returns:
+ True if successful, False otherwise
+ """
+ merged_data = []
+
+ for json_file in self.data_dir.glob(pattern):
+ if json_file.name == output_filename:
+ continue
+
+ data = self.read_json(json_file.name)
+ if data is not None:
+ if isinstance(data, list):
+ merged_data.extend(data)
+ else:
+ merged_data.append(data)
+
+ return self.write_json(output_filename, merged_data)
+
+ def transform_data(self, input_file: str, output_file: str,
+ transformer: callable) -> bool:
+ """
+ Transform JSON data using a custom function
+
+ Args:
+ input_file: Input JSON file
+ output_file: Output JSON file
+ transformer: Function to transform the data
+
+ Returns:
+ True if successful, False otherwise
+ """
+ data = self.read_json(input_file)
+ if data is None:
+ return False
+
+ try:
+ transformed_data = transformer(data)
+ return self.write_json(output_file, transformed_data)
+ except Exception as e:
+ logger.error(f"Error transforming data: {e}")
+ return False
+
+ def generate_schema(self, data: Union[Dict, List]) -> Dict:
+ """
+ Generate a basic JSON schema from data
+
+ Args:
+ data: JSON data
+
+ Returns:
+ JSON schema
+ """
+ def get_type(value):
+ if isinstance(value, bool):
+ return "boolean"
+ elif isinstance(value, int):
+ return "integer"
+ elif isinstance(value, float):
+ return "number"
+ elif isinstance(value, str):
+ return "string"
+ elif isinstance(value, list):
+ return "array"
+ elif isinstance(value, dict):
+ return "object"
+ elif value is None:
+ return "null"
+ return "string"
+
+ if isinstance(data, dict):
+ schema = {
+ "type": "object",
+ "properties": {}
+ }
+ for key, value in data.items():
+ schema["properties"][key] = {
+ "type": get_type(value)
+ }
+ if isinstance(value, dict):
+ schema["properties"][key] = self.generate_schema(value)
+ elif isinstance(value, list) and value:
+ schema["properties"][key]["items"] = {
+ "type": get_type(value[0])
+ }
+ return schema
+ elif isinstance(data, list):
+ return {
+ "type": "array",
+ "items": self.generate_schema(data[0]) if data else {}
+ }
+ else:
+ return {"type": get_type(data)}
+
+ def create_backup(self, filename: str) -> bool:
+ """
+ Create a backup of a JSON file
+
+ Args:
+ filename: Name of file to backup
+
+ Returns:
+ True if successful, False otherwise
+ """
+ filepath = self.data_dir / filename
+ if not filepath.exists():
+ logger.error(f"File {filename} does not exist")
+ return False
+
+ timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
+ backup_name = f"{filepath.stem}_backup_{timestamp}.json"
+ backup_path = self.data_dir / "backups" / backup_name
+ backup_path.parent.mkdir(exist_ok=True)
+
+ try:
+ data = self.read_json(filename)
+ backup_filepath = backup_path
+ with open(backup_filepath, 'w', encoding='utf-8') as f:
+ json.dump(data, f, indent=2, ensure_ascii=False)
+ logger.info(f"Backup created: {backup_name}")
+ return True
+ except Exception as e:
+ logger.error(f"Error creating backup: {e}")
+ return False
+
+ def get_statistics(self) -> Dict[str, Any]:
+ """
+ Get statistics about JSON files in data directory
+
+ Returns:
+ Dictionary with statistics
+ """
+ stats = {
+ 'total_files': 0,
+ 'total_size_bytes': 0,
+ 'files': []
+ }
+
+ for json_file in self.data_dir.rglob('*.json'):
+ file_size = json_file.stat().st_size
+ stats['total_files'] += 1
+ stats['total_size_bytes'] += file_size
+
+ stats['files'].append({
+ 'name': json_file.name,
+ 'path': str(json_file.relative_to(self.data_dir)),
+ 'size_bytes': file_size,
+ 'size_kb': round(file_size / 1024, 2),
+ 'modified': datetime.fromtimestamp(
+ json_file.stat().st_mtime
+ ).isoformat()
+ })
+
+ stats['total_size_kb'] = round(stats['total_size_bytes'] / 1024, 2)
+ stats['total_size_mb'] = round(stats['total_size_bytes'] / (1024 * 1024), 2)
+
+ return stats
+
+
+class JSONDatabaseSync:
+ """Synchronize JSON data with database"""
+
+ def __init__(self, database_url: Optional[str] = None):
+ """
+ Initialize database sync
+
+ Args:
+ database_url: Database connection URL
+ """
+ self.database_url = database_url or os.getenv('DATABASE_URL')
+
+ def sync_to_database(self, json_data: Dict, table_name: str) -> bool:
+ """
+ Sync JSON data to database
+
+ Args:
+ json_data: JSON data to sync
+ table_name: Target table name
+
+ Returns:
+ True if successful, False otherwise
+ """
+ try:
+ # This is a placeholder - implement actual database logic
+ logger.info(f"Syncing data to table: {table_name}")
+ logger.info(f"Data keys: {list(json_data.keys())}")
+ return True
+ except Exception as e:
+ logger.error(f"Error syncing to database: {e}")
+ return False
+
+ def export_from_database(self, table_name: str, output_file: str) -> bool:
+ """
+ Export database table to JSON
+
+ Args:
+ table_name: Source table name
+ output_file: Output JSON file
+
+ Returns:
+ True if successful, False otherwise
+ """
+ try:
+ # This is a placeholder - implement actual database logic
+ logger.info(f"Exporting from table: {table_name}")
+ logger.info(f"Output file: {output_file}")
+ return True
+ except Exception as e:
+ logger.error(f"Error exporting from database: {e}")
+ return False
+
+
+def main():
+ """Main function for CLI usage"""
+ processor = JSONDataProcessor()
+
+ # Example usage
+ print("JSON Data Processor")
+ print("=" * 50)
+
+ # Validate all JSON files
+ results = processor.validate_all_files()
+ print(f"\nValidation Results:")
+ print(f" Total files: {results['total_files']}")
+ print(f" Valid files: {results['valid_files']}")
+ print(f" Invalid files: {results['invalid_files']}")
+
+ # Get statistics
+ stats = processor.get_statistics()
+ print(f"\nStatistics:")
+ print(f" Total files: {stats['total_files']}")
+ print(f" Total size: {stats['total_size_kb']} KB")
+
+ if stats['files']:
+ print(f"\nFiles:")
+ for file_info in stats['files']:
+ print(f" - {file_info['name']} ({file_info['size_kb']} KB)")
+
+
+if __name__ == "__main__":
+ main()
From 59500120f67ba0aa30bb67305d3546b57ee6af9a Mon Sep 17 00:00:00 2001
From: Marfin <248203147+arifinahmad99-cloud@users.noreply.github.com>
Date: Wed, 11 Feb 2026 10:18:09 +0800
Subject: [PATCH 12/19] Add requirements.txt for project dependencies
Added core dependencies, data processing, JSON processing, database, testing, code quality, security, API, and utility libraries.
---
.github/workflows/requirements.txt | 43 ++++++++++++++++++++++++++++++
1 file changed, 43 insertions(+)
create mode 100644 .github/workflows/requirements.txt
diff --git a/.github/workflows/requirements.txt b/.github/workflows/requirements.txt
new file mode 100644
index 0000000..f84f20b
--- /dev/null
+++ b/.github/workflows/requirements.txt
@@ -0,0 +1,43 @@
+# Core dependencies
+python-dotenv==1.0.0
+requests==2.31.0
+
+# Data processing
+pandas==2.1.4
+numpy==1.26.3
+
+# JSON processing and validation
+jsonschema==4.20.0
+pyjson5==1.6.4
+pyyaml==6.0.1
+
+# Database
+psycopg2-binary==2.9.9
+sqlalchemy==2.0.25
+
+# Testing
+pytest==7.4.4
+pytest-cov==4.1.0
+pytest-mock==3.12.0
+pytest-asyncio==0.23.3
+
+# Code quality
+flake8==7.0.0
+pylint==3.0.3
+black==24.1.1
+isort==5.13.2
+mypy==1.8.0
+
+# Security
+bandit==1.7.6
+safety==3.0.1
+
+# API and web
+fastapi==0.109.0
+uvicorn==0.27.0
+httpx==0.26.0
+
+# Utilities
+click==8.1.7
+rich==13.7.0
+tqdm==4.66.1
From 9be26c569255dcaba16f0efde48ce97c3b8c3263 Mon Sep 17 00:00:00 2001
From: Marfin <248203147+arifinahmad99-cloud@users.noreply.github.com>
Date: Wed, 11 Feb 2026 10:21:27 +0800
Subject: [PATCH 13/19] Revise README for ExplorePi project details
Updated README to reflect project name change and added detailed features, installation instructions, and usage examples.
---
.github/workflows/README.md | 421 +++++++++++++++++++++++++++++++-----
1 file changed, 372 insertions(+), 49 deletions(-)
diff --git a/.github/workflows/README.md b/.github/workflows/README.md
index cc583f4..6cb4cb9 100644
--- a/.github/workflows/README.md
+++ b/.github/workflows/README.md
@@ -1,83 +1,406 @@
-# Crawler
+# ExplorePi - JSON Data Management System
-A web crawler for collecting and processing data from specified sources.
+## 📋 Overview
-## Table of Contents
+A comprehensive Python-based system for managing, validating, processing, and synchronizing JSON data with integrated CI/CD workflows, API services, and database operations.
-- [Installation](#installation)
-- [Configuration](#configuration)
-- [Usage](#usage)
-- [Database Setup](#database-setup)
-- [Contributing](#contributing)
+## 🚀 Features
-## Installation
+- **JSON Validation & Schema Management** - Validate JSON files against custom schemas
+- **Data Processing Pipeline** - Automated data transformation and aggregation
+- **REST API Service** - FastAPI-based API for JSON operations
+- **Database Synchronization** - Sync JSON data with PostgreSQL
+- **GitHub Actions Workflows** - Automated CI/CD pipelines
+- **Backup & Recovery** - Automated backup system with versioning
+- **Data Analytics** - Statistics and reporting capabilities
-Install the required dependencies:
+## 📁 Project Structure
+```
+ExplorePi/
+├── .github/
+│ └── workflows/
+│ ├── python-ci-cd.yml # Main CI/CD workflow
+│ └── json-sync-workflow.yml # Data synchronization workflow
+├── data/ # JSON data files
+│ ├── users.json
+│ ├── products.json
+│ └── config.json
+├── schemas/ # JSON schema definitions
+│ ├── user_schema.json
+│ ├── product_schema.json
+│ └── config_schema.json
+├── backups/ # Backup archives
+├── processed/ # Processed data outputs
+├── reports/ # Generated reports
+├── json_processor.py # Core JSON processing library
+├── schema_validator.py # Schema validation utilities
+├── json_api.py # REST API service
+└── requirements.txt # Python dependencies
+```
+
+## 🔧 Installation
+
+### Prerequisites
+
+- Python 3.9 or higher
+- PostgreSQL 15+ (for database features)
+- Git
+- pip
+
+### Setup
+
+1. **Clone the repository**
+```bash
+git clone https://github.com/arifinahmad99-cloud/ExplorePi.git
+cd ExplorePi
+```
+
+2. **Create virtual environment**
+```bash
+python -m venv venv
+source venv/bin/activate # On Windows: venv\Scripts\activate
+```
+
+3. **Install dependencies**
```bash
pip install -r requirements.txt
```
-Ensure you have Python 3.8+ installed on your system.
+4. **Set up environment variables**
+```bash
+cp .env.example .env
+# Edit .env with your configuration
+```
+
+## 📚 Usage
+
+### JSON Processor
+
+Process and validate JSON files:
+
+```python
+from json_processor import JSONDataProcessor
+
+# Initialize processor
+processor = JSONDataProcessor(data_dir="data")
+
+# Validate all JSON files
+results = processor.validate_all_files()
+print(f"Valid files: {results['valid_files']}/{results['total_files']}")
+
+# Read JSON file
+data = processor.read_json("users.json")
+
+# Write JSON file
+processor.write_json("output.json", {"key": "value"})
+
+# Merge multiple files
+processor.merge_json_files("merged.json", "*.json")
+
+# Get statistics
+stats = processor.get_statistics()
+print(f"Total files: {stats['total_files']}")
+print(f"Total size: {stats['total_size_mb']} MB")
+```
+
+### Schema Validator
+
+Validate JSON data against schemas:
+
+```python
+from schema_validator import JSONSchemaValidator
+
+# Initialize validator
+validator = JSONSchemaValidator(schema_dir="schemas")
+
+# Validate data
+data = {"id": 1, "username": "john", "email": "john@example.com"}
+is_valid, error = validator.validate_data(data, "user_schema")
+
+if is_valid:
+ print("✓ Data is valid")
+else:
+ print(f"✗ Validation error: {error}")
+
+# Create new schema
+user_schema = {
+ "type": "object",
+ "required": ["id", "username", "email"],
+ "properties": {
+ "id": {"type": "integer"},
+ "username": {"type": "string"},
+ "email": {"type": "string", "format": "email"}
+ }
+}
+validator.create_schema("user_schema", user_schema)
+```
+
+### REST API Service
+
+Start the API server:
+
+```bash
+python json_api.py
+```
+
+Or with uvicorn:
+
+```bash
+uvicorn json_api:app --reload --host 0.0.0.0 --port 8000
+```
+
+**API Endpoints:**
+
+```bash
+# List all files
+GET http://localhost:8000/files
+
+# Get specific file
+GET http://localhost:8000/files/users.json
+
+# Create new file
+POST http://localhost:8000/files
+{
+ "filename": "new_data.json",
+ "data": {"key": "value"}
+}
+
+# Update file
+PUT http://localhost:8000/files/users.json
+{
+ "id": 1,
+ "username": "updated"
+}
+
+# Delete file
+DELETE http://localhost:8000/files/old_data.json
+
+# Upload file
+POST http://localhost:8000/upload
+# (multipart/form-data with file)
+
+# Validate data
+POST http://localhost:8000/validate
+{
+ "data": {"test": "data"},
+ "schema_name": "user_schema"
+}
+
+# Transform data
+POST http://localhost:8000/transform
+{
+ "input_filename": "input.json",
+ "output_filename": "output.json",
+ "operation": "filter",
+ "parameters": {
+ "key": "status",
+ "value": "active"
+ }
+}
+
+# Get statistics
+GET http://localhost:8000/stats
+
+# Search data
+GET http://localhost:8000/search?query=john&field=username
+
+# Health check
+GET http://localhost:8000/health
+```
+
+### Command Line Interface
-## Configuration
+Quick validation and processing:
-### Environment Variables
+```bash
+# Validate all JSON files
+python json_processor.py
-Create a `.env` file in the project root with the following variables:
+# Validate with schemas
+python schema_validator.py
+# Create example data and schemas
+python schema_validator.py
```
-DATABASE_HOST=localhost
-DATABASE_USER=crawler_user
-DATABASE_PASSWORD=your_password
-DATABASE_NAME=crawler_db
+
+## 🔄 GitHub Actions Workflows
+
+### Python CI/CD Workflow
+
+Automatically runs on push and pull requests:
+
+**Jobs:**
+1. **Code Quality** - Linting with flake8, black, isort, mypy
+2. **JSON Validation** - Validates all JSON files
+3. **Testing** - Runs pytest on Python 3.9-3.12
+4. **Data Processing** - Processes JSON files
+5. **Database Operations** - Syncs with PostgreSQL
+6. **Build & Deploy** - Creates deployment packages
+7. **Security Scan** - Runs bandit and safety checks
+
+### JSON Synchronization Workflow
+
+Scheduled daily at midnight UTC:
+
+**Jobs:**
+1. **Validate JSON** - Validates all data files
+2. **Backup Data** - Creates timestamped backups
+3. **Process Data** - Transforms and aggregates data
+4. **Generate Reports** - Creates HTML/JSON reports
+5. **Database Sync** - Syncs to PostgreSQL (optional)
+6. **Notification** - Summary of results
+
+**Manual Trigger:**
+```bash
+# Go to Actions tab in GitHub
+# Select "JSON Data Synchronization"
+# Click "Run workflow"
+# Choose sync type: full, incremental, or validate_only
```
-Update these values according to your local environment.
+## 🗄️ Database Integration
+
+### PostgreSQL Setup
+
+```sql
+-- Create database
+CREATE DATABASE json_data;
+
+-- Tables are auto-created by workflows
+-- json_records: Stores JSON data
+-- sync_history: Tracks synchronization history
+```
+
+### Sync Data to Database
+
+```python
+from json_processor import JSONDatabaseSync
-## Usage
+# Initialize sync
+db_sync = JSONDatabaseSync(database_url="postgresql://user:pass@localhost/json_data")
-Run the crawler with:
+# Sync to database
+data = {"id": 1, "name": "Example"}
+db_sync.sync_to_database(data, table_name="json_records")
+
+# Export from database
+db_sync.export_from_database("json_records", "exported.json")
+```
+
+## 📊 Data Transformations
+
+### Available Operations
+
+1. **Filter** - Filter data by field values
+```python
+request = TransformRequest(
+ input_filename="users.json",
+ output_filename="active_users.json",
+ operation="filter",
+ parameters={"key": "active", "value": True}
+)
+```
+
+2. **Map** - Rename/transform fields
+```python
+request = TransformRequest(
+ input_filename="users.json",
+ output_filename="transformed.json",
+ operation="map",
+ parameters={"field_map": {"old_name": "new_name"}}
+)
+```
+
+3. **Sort** - Sort list data
+```python
+request = TransformRequest(
+ input_filename="users.json",
+ output_filename="sorted.json",
+ operation="sort",
+ parameters={"key": "created_at", "reverse": True}
+)
+```
+
+## 🔐 Security
+
+- **Dependency Scanning** - Automated with Safety
+- **Code Analysis** - Bandit for security issues
+- **Secret Management** - Use GitHub Secrets for credentials
+- **Input Validation** - All API endpoints validate input
+- **SQL Injection Protection** - Parameterized queries only
+
+## 📈 Monitoring & Logging
+
+### Logging Configuration
+
+```python
+import logging
+
+logging.basicConfig(
+ level=logging.INFO,
+ format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
+ handlers=[
+ logging.FileHandler('app.log'),
+ logging.StreamHandler()
+ ]
+)
+```
+
+### Metrics & Reports
+
+- **Validation Reports** - JSON validation results
+- **Processing Reports** - Data transformation statistics
+- **Sync History** - Database synchronization logs
+- **API Metrics** - Request/response logs
+
+## 🧪 Testing
+
+Run tests:
```bash
-python crawler.py
+# All tests
+pytest
+
+# With coverage
+pytest --cov=. --cov-report=html
+
+# Specific test file
+pytest tests/test_processor.py
+
+# Verbose mode
+pytest -v
```
-Optional flags:
-- `--verbose`: Enable detailed logging output
-- `--limit N`: Limit crawling to N pages
-- `--timeout S`: Set request timeout to S seconds
+## 🤝 Contributing
-## Database Setup
+1. Fork the repository
+2. Create feature branch (`git checkout -b feature/AmazingFeature`)
+3. Commit changes (`git commit -m 'Add AmazingFeature'`)
+4. Push to branch (`git push origin feature/AmazingFeature`)
+5. Open Pull Request
-### MySQL Configuration
+## 📝 License
-The crawler uses MySQL to store collected data. Follow these steps to set up your database:
+This project is licensed under the MIT License - see LICENSE file for details.
-1. **Install MySQL**: Download and install from [MySQL Official Website](https://dev.mysql.com/downloads/mysql/)
+## 👤 Author
-2. **Create Database and User**:
- ```sql
- CREATE DATABASE crawler_db;
- CREATE USER 'crawler_user'@'localhost' IDENTIFIED BY 'your_password';
- GRANT ALL PRIVILEGES ON crawler_db.* TO 'crawler_user'@'localhost';
- FLUSH PRIVILEGES;
- ```
+**Ahmad Arifin**
+- GitHub: [@arifinahmad99-cloud](https://github.com/arifinahmad99-cloud)
-3. **Initialize Tables**: Run the database migration script:
- ```bash
- python scripts/init_db.py
- ```
+## 🙏 Acknowledgments
-### Connection Details
+- FastAPI framework
+- PostgreSQL database
+- GitHub Actions
+- Python community
-- **Host**: localhost (default)
-- **Port**: 3306 (default MySQL port)
-- **User**: crawler_user
-- **Database**: crawler_db
+## 📞 Support
-Update the connection parameters in your `.env` file if using different settings.
+For support, please open an issue in the GitHub repository.
-## Contributing
+---
-Please read CONTRIBUTING.md for details on our code of conduct and the process for submitting pull requests.
\ No newline at end of file
+**Version:** 2.0.0
+**Last Updated:** February 2026
From d4a50ae04f653d51d02950c9a30208fbe082d702 Mon Sep 17 00:00:00 2001
From: Marfin <248203147+arifinahmad99-cloud@users.noreply.github.com>
Date: Wed, 11 Feb 2026 10:27:48 +0800
Subject: [PATCH 14/19] Remove author section from README
Removed author information from the README.
---
.github/workflows/README.md | 5 -----
1 file changed, 5 deletions(-)
diff --git a/.github/workflows/README.md b/.github/workflows/README.md
index 6cb4cb9..51a09b5 100644
--- a/.github/workflows/README.md
+++ b/.github/workflows/README.md
@@ -384,11 +384,6 @@ pytest -v
This project is licensed under the MIT License - see LICENSE file for details.
-## 👤 Author
-
-**Ahmad Arifin**
-- GitHub: [@arifinahmad99-cloud](https://github.com/arifinahmad99-cloud)
-
## 🙏 Acknowledgments
- FastAPI framework
From 69d51072d612f180274223b1bf8076ad37749de3 Mon Sep 17 00:00:00 2001
From: Marfin <248203147+arifinahmad99-cloud@users.noreply.github.com>
Date: Wed, 11 Feb 2026 10:30:28 +0800
Subject: [PATCH 15/19] Add FastAPI JSON Data API service
This file implements a FastAPI-based REST API for managing JSON data, including endpoints for file operations, validation, and transformation.
---
.github/workflows/json_api.py | 401 ++++++++++++++++++++++++++++++++++
1 file changed, 401 insertions(+)
create mode 100644 .github/workflows/json_api.py
diff --git a/.github/workflows/json_api.py b/.github/workflows/json_api.py
new file mode 100644
index 0000000..ad0799d
--- /dev/null
+++ b/.github/workflows/json_api.py
@@ -0,0 +1,401 @@
+#!/usr/bin/env python3
+"""
+JSON Data API Service
+FastAPI-based REST API for JSON data operations
+"""
+
+from fastapi import FastAPI, HTTPException, UploadFile, File, Query
+from fastapi.responses import JSONResponse, FileResponse
+from fastapi.middleware.cors import CORSMiddleware
+from pydantic import BaseModel, Field
+from typing import List, Dict, Any, Optional
+from datetime import datetime
+from pathlib import Path
+import json
+import logging
+
+# Configure logging
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
+
+# Initialize FastAPI app
+app = FastAPI(
+ title="JSON Data API",
+ description="RESTful API for JSON data management and operations",
+ version="2.0.0"
+)
+
+# Add CORS middleware
+app.add_middleware(
+ CORSMiddleware,
+ allow_origins=["*"],
+ allow_credentials=True,
+ allow_methods=["*"],
+ allow_headers=["*"],
+)
+
+# Data directory
+DATA_DIR = Path("data")
+DATA_DIR.mkdir(exist_ok=True)
+
+# Pydantic models
+class JSONDataModel(BaseModel):
+ """Model for JSON data"""
+ filename: str = Field(..., description="Name of the JSON file")
+ data: Dict[str, Any] | List[Any] = Field(..., description="JSON data content")
+
+class ValidationRequest(BaseModel):
+ """Model for validation request"""
+ data: Dict[str, Any] | List[Any] = Field(..., description="Data to validate")
+ schema_name: Optional[str] = Field(None, description="Schema name for validation")
+
+class TransformRequest(BaseModel):
+ """Model for data transformation request"""
+ input_filename: str = Field(..., description="Input file name")
+ output_filename: str = Field(..., description="Output file name")
+ operation: str = Field(..., description="Transformation operation")
+ parameters: Optional[Dict[str, Any]] = Field(default={}, description="Operation parameters")
+
+class APIResponse(BaseModel):
+ """Standard API response model"""
+ status: str = Field(..., description="Response status")
+ message: str = Field(..., description="Response message")
+ data: Optional[Dict[str, Any]] = Field(None, description="Response data")
+ timestamp: datetime = Field(default_factory=datetime.now)
+
+
+# Utility functions
+def read_json_file(filename: str) -> Dict | List:
+ """Read JSON file from data directory"""
+ filepath = DATA_DIR / filename
+ if not filepath.exists():
+ raise HTTPException(status_code=404, detail=f"File {filename} not found")
+
+ try:
+ with open(filepath, 'r', encoding='utf-8') as f:
+ return json.load(f)
+ except json.JSONDecodeError as e:
+ raise HTTPException(status_code=400, detail=f"Invalid JSON in file: {str(e)}")
+
+def write_json_file(filename: str, data: Dict | List) -> None:
+ """Write JSON file to data directory"""
+ filepath = DATA_DIR / filename
+ try:
+ with open(filepath, 'w', encoding='utf-8') as f:
+ json.dump(data, f, indent=2, ensure_ascii=False)
+ except Exception as e:
+ raise HTTPException(status_code=500, detail=f"Error writing file: {str(e)}")
+
+
+# API Routes
+@app.get("/", response_model=APIResponse)
+async def root():
+ """Root endpoint - API information"""
+ return APIResponse(
+ status="success",
+ message="JSON Data API is running",
+ data={
+ "version": "2.0.0",
+ "endpoints": {
+ "GET /files": "List all JSON files",
+ "GET /files/{filename}": "Get specific file content",
+ "POST /files": "Create new JSON file",
+ "PUT /files/{filename}": "Update existing file",
+ "DELETE /files/{filename}": "Delete file",
+ "POST /validate": "Validate JSON data",
+ "POST /transform": "Transform JSON data",
+ "GET /stats": "Get statistics"
+ }
+ }
+ )
+
+@app.get("/health")
+async def health_check():
+ """Health check endpoint"""
+ return {"status": "healthy", "timestamp": datetime.now().isoformat()}
+
+@app.get("/files", response_model=APIResponse)
+async def list_files():
+ """List all JSON files in data directory"""
+ try:
+ files = []
+ for filepath in DATA_DIR.glob("*.json"):
+ stat = filepath.stat()
+ files.append({
+ "filename": filepath.name,
+ "size_bytes": stat.st_size,
+ "size_kb": round(stat.st_size / 1024, 2),
+ "modified": datetime.fromtimestamp(stat.st_mtime).isoformat()
+ })
+
+ return APIResponse(
+ status="success",
+ message=f"Found {len(files)} JSON files",
+ data={"files": files, "total": len(files)}
+ )
+ except Exception as e:
+ raise HTTPException(status_code=500, detail=str(e))
+
+@app.get("/files/{filename}")
+async def get_file(filename: str):
+ """Get content of a specific JSON file"""
+ try:
+ data = read_json_file(filename)
+ return JSONResponse(content=data)
+ except HTTPException:
+ raise
+ except Exception as e:
+ raise HTTPException(status_code=500, detail=str(e))
+
+@app.post("/files", response_model=APIResponse)
+async def create_file(json_data: JSONDataModel):
+ """Create a new JSON file"""
+ try:
+ filepath = DATA_DIR / json_data.filename
+ if filepath.exists():
+ raise HTTPException(status_code=409, detail="File already exists")
+
+ write_json_file(json_data.filename, json_data.data)
+
+ return APIResponse(
+ status="success",
+ message=f"File {json_data.filename} created successfully",
+ data={"filename": json_data.filename}
+ )
+ except HTTPException:
+ raise
+ except Exception as e:
+ raise HTTPException(status_code=500, detail=str(e))
+
+@app.put("/files/{filename}", response_model=APIResponse)
+async def update_file(filename: str, json_data: Dict | List):
+ """Update an existing JSON file"""
+ try:
+ filepath = DATA_DIR / filename
+ if not filepath.exists():
+ raise HTTPException(status_code=404, detail="File not found")
+
+ write_json_file(filename, json_data)
+
+ return APIResponse(
+ status="success",
+ message=f"File {filename} updated successfully",
+ data={"filename": filename}
+ )
+ except HTTPException:
+ raise
+ except Exception as e:
+ raise HTTPException(status_code=500, detail=str(e))
+
+@app.delete("/files/{filename}", response_model=APIResponse)
+async def delete_file(filename: str):
+ """Delete a JSON file"""
+ try:
+ filepath = DATA_DIR / filename
+ if not filepath.exists():
+ raise HTTPException(status_code=404, detail="File not found")
+
+ filepath.unlink()
+
+ return APIResponse(
+ status="success",
+ message=f"File {filename} deleted successfully",
+ data={"filename": filename}
+ )
+ except HTTPException:
+ raise
+ except Exception as e:
+ raise HTTPException(status_code=500, detail=str(e))
+
+@app.post("/upload")
+async def upload_file(file: UploadFile = File(...)):
+ """Upload a JSON file"""
+ if not file.filename.endswith('.json'):
+ raise HTTPException(status_code=400, detail="Only JSON files are allowed")
+
+ try:
+ content = await file.read()
+ data = json.loads(content.decode('utf-8'))
+
+ write_json_file(file.filename, data)
+
+ return APIResponse(
+ status="success",
+ message=f"File {file.filename} uploaded successfully",
+ data={"filename": file.filename, "size": len(content)}
+ )
+ except json.JSONDecodeError:
+ raise HTTPException(status_code=400, detail="Invalid JSON file")
+ except Exception as e:
+ raise HTTPException(status_code=500, detail=str(e))
+
+@app.post("/validate", response_model=APIResponse)
+async def validate_data(request: ValidationRequest):
+ """Validate JSON data"""
+ try:
+ # Basic validation (data is already parsed if we get here)
+ is_valid = True
+ message = "Data is valid JSON"
+
+ # Additional validation logic can be added here
+ # For example, schema validation if schema_name is provided
+
+ return APIResponse(
+ status="success" if is_valid else "error",
+ message=message,
+ data={
+ "valid": is_valid,
+ "data_type": type(request.data).__name__,
+ "schema": request.schema_name
+ }
+ )
+ except Exception as e:
+ raise HTTPException(status_code=500, detail=str(e))
+
+@app.post("/transform", response_model=APIResponse)
+async def transform_data(request: TransformRequest):
+ """Transform JSON data"""
+ try:
+ input_data = read_json_file(request.input_filename)
+
+ # Perform transformation based on operation
+ transformed_data = None
+
+ if request.operation == "filter":
+ # Filter data based on parameters
+ key = request.parameters.get("key")
+ value = request.parameters.get("value")
+ if isinstance(input_data, list):
+ transformed_data = [item for item in input_data if item.get(key) == value]
+ else:
+ transformed_data = input_data
+
+ elif request.operation == "map":
+ # Map/transform fields
+ field_map = request.parameters.get("field_map", {})
+ if isinstance(input_data, list):
+ transformed_data = [
+ {field_map.get(k, k): v for k, v in item.items()}
+ for item in input_data
+ ]
+ else:
+ transformed_data = {field_map.get(k, k): v for k, v in input_data.items()}
+
+ elif request.operation == "sort":
+ # Sort list data
+ if isinstance(input_data, list):
+ sort_key = request.parameters.get("key")
+ reverse = request.parameters.get("reverse", False)
+ transformed_data = sorted(input_data, key=lambda x: x.get(sort_key, ""), reverse=reverse)
+ else:
+ transformed_data = input_data
+
+ else:
+ raise HTTPException(status_code=400, detail=f"Unknown operation: {request.operation}")
+
+ # Write transformed data
+ write_json_file(request.output_filename, transformed_data)
+
+ return APIResponse(
+ status="success",
+ message=f"Data transformed successfully",
+ data={
+ "input": request.input_filename,
+ "output": request.output_filename,
+ "operation": request.operation
+ }
+ )
+ except HTTPException:
+ raise
+ except Exception as e:
+ raise HTTPException(status_code=500, detail=str(e))
+
+@app.get("/stats", response_model=APIResponse)
+async def get_statistics():
+ """Get statistics about JSON files"""
+ try:
+ total_files = 0
+ total_size = 0
+ file_types = {}
+
+ for filepath in DATA_DIR.glob("*.json"):
+ total_files += 1
+ total_size += filepath.stat().st_size
+
+ # Categorize files
+ category = filepath.stem.split('_')[0] if '_' in filepath.stem else 'other'
+ file_types[category] = file_types.get(category, 0) + 1
+
+ return APIResponse(
+ status="success",
+ message="Statistics retrieved successfully",
+ data={
+ "total_files": total_files,
+ "total_size_bytes": total_size,
+ "total_size_kb": round(total_size / 1024, 2),
+ "total_size_mb": round(total_size / (1024 * 1024), 2),
+ "file_categories": file_types
+ }
+ )
+ except Exception as e:
+ raise HTTPException(status_code=500, detail=str(e))
+
+@app.get("/search")
+async def search_data(
+ query: str = Query(..., description="Search query"),
+ field: Optional[str] = Query(None, description="Field to search in")
+):
+ """Search across all JSON files"""
+ try:
+ results = []
+
+ for filepath in DATA_DIR.glob("*.json"):
+ try:
+ data = read_json_file(filepath.name)
+
+ # Simple search implementation
+ if isinstance(data, list):
+ for item in data:
+ if isinstance(item, dict):
+ if field:
+ if field in item and query.lower() in str(item[field]).lower():
+ results.append({
+ "file": filepath.name,
+ "data": item
+ })
+ else:
+ if any(query.lower() in str(v).lower() for v in item.values()):
+ results.append({
+ "file": filepath.name,
+ "data": item
+ })
+ elif isinstance(data, dict):
+ if field:
+ if field in data and query.lower() in str(data[field]).lower():
+ results.append({
+ "file": filepath.name,
+ "data": data
+ })
+ else:
+ if any(query.lower() in str(v).lower() for v in data.values()):
+ results.append({
+ "file": filepath.name,
+ "data": data
+ })
+ except:
+ continue
+
+ return JSONResponse(content={
+ "query": query,
+ "field": field,
+ "results": results,
+ "total": len(results)
+ })
+ except Exception as e:
+ raise HTTPException(status_code=500, detail=str(e))
+
+
+# Run the application
+if __name__ == "__main__":
+ import uvicorn
+ uvicorn.run(app, host="0.0.0.0", port=8000, log_level="info")
From fa5a1cf3fe2324dcbc3b8107aa6fda0f6384473e Mon Sep 17 00:00:00 2001
From: Marfin <248203147+arifinahmad99-cloud@users.noreply.github.com>
Date: Wed, 11 Feb 2026 10:59:30 +0800
Subject: [PATCH 16/19] Add JSON data synchronization workflow
---
.github/workflows/json-sync-workflow.yml | 492 +++++++++++++++++++++++
1 file changed, 492 insertions(+)
create mode 100644 .github/workflows/json-sync-workflow.yml
diff --git a/.github/workflows/json-sync-workflow.yml b/.github/workflows/json-sync-workflow.yml
new file mode 100644
index 0000000..f92270f
--- /dev/null
+++ b/.github/workflows/json-sync-workflow.yml
@@ -0,0 +1,492 @@
+name: JSON Data Synchronization
+
+on:
+ schedule:
+ # Run every day at midnight UTC
+ - cron: '0 0 * * *'
+ push:
+ paths:
+ - 'data/**/*.json'
+ - 'schemas/**/*.json'
+ workflow_dispatch:
+ inputs:
+ sync_type:
+ description: 'Type of synchronization'
+ required: true
+ type: choice
+ options:
+ - full
+ - incremental
+ - validate_only
+
+env:
+ DATA_DIR: 'data'
+ SCHEMA_DIR: 'schemas'
+ BACKUP_DIR: 'backups'
+
+jobs:
+ # Job 1: Validate JSON Files
+ validate-json:
+ runs-on: ubuntu-latest
+ name: Validate JSON Files
+
+ outputs:
+ validation_status: ${{ steps.validate.outputs.status }}
+
+ steps:
+ - name: Checkout repository
+ uses: actions/checkout@v4
+
+ - name: Set up Python
+ uses: actions/setup-python@v5
+ with:
+ python-version: '3.11'
+
+ - name: Install dependencies
+ run: |
+ pip install jsonschema pyyaml
+
+ - name: Validate JSON syntax and schemas
+ id: validate
+ run: |
+ python3 << 'EOF'
+ import json
+ import os
+ import sys
+ from pathlib import Path
+
+ errors = []
+ warnings = []
+ validated = []
+
+ # Validate all JSON files
+ for root, dirs, files in os.walk('${{ env.DATA_DIR }}'):
+ for file in files:
+ if file.endswith('.json'):
+ filepath = Path(root) / file
+ try:
+ with open(filepath, 'r', encoding='utf-8') as f:
+ data = json.load(f)
+ validated.append(str(filepath))
+ print(f'✓ {filepath} - Valid JSON')
+ except json.JSONDecodeError as e:
+ errors.append(f'{filepath}: {str(e)}')
+ print(f'✗ {filepath} - Invalid JSON: {e}')
+ except Exception as e:
+ warnings.append(f'{filepath}: {str(e)}')
+ print(f'⚠ {filepath} - Warning: {e}')
+
+ # Create validation report
+ report = {
+ 'timestamp': '2026-02-11T00:00:00Z',
+ 'total_files': len(validated) + len(errors),
+ 'valid_files': len(validated),
+ 'invalid_files': len(errors),
+ 'warnings': len(warnings),
+ 'files': validated,
+ 'errors': errors,
+ 'warnings_list': warnings
+ }
+
+ os.makedirs('reports', exist_ok=True)
+ with open('reports/validation_report.json', 'w') as f:
+ json.dump(report, f, indent=2)
+
+ print(f'\n📊 Summary:')
+ print(f' Total files: {report["total_files"]}')
+ print(f' Valid: {report["valid_files"]}')
+ print(f' Invalid: {report["invalid_files"]}')
+ print(f' Warnings: {report["warnings"]}')
+
+ if errors:
+ print(f'\n❌ Validation failed with {len(errors)} error(s)')
+ sys.exit(1)
+ else:
+ print(f'\n✅ All JSON files are valid')
+ print(f'::set-output name=status::success')
+ EOF
+
+ - name: Upload validation report
+ if: always()
+ uses: actions/upload-artifact@v4
+ with:
+ name: validation-report
+ path: reports/validation_report.json
+
+ # Job 2: Backup JSON Data
+ backup-data:
+ runs-on: ubuntu-latest
+ needs: validate-json
+ if: needs.validate-json.outputs.validation_status == 'success'
+ name: Backup JSON Data
+
+ steps:
+ - name: Checkout repository
+ uses: actions/checkout@v4
+
+ - name: Create timestamped backup
+ run: |
+ TIMESTAMP=$(date +%Y%m%d_%H%M%S)
+ BACKUP_NAME="json_backup_${TIMESTAMP}"
+
+ mkdir -p ${{ env.BACKUP_DIR }}/$BACKUP_NAME
+
+ # Copy all JSON files
+ if [ -d "${{ env.DATA_DIR }}" ]; then
+ cp -r ${{ env.DATA_DIR }}/* ${{ env.BACKUP_DIR }}/$BACKUP_NAME/ || echo "No data to backup"
+ fi
+
+ # Create archive
+ cd ${{ env.BACKUP_DIR }}
+ tar -czf ${BACKUP_NAME}.tar.gz $BACKUP_NAME
+ rm -rf $BACKUP_NAME
+
+ echo "✓ Backup created: ${BACKUP_NAME}.tar.gz"
+ ls -lh ${BACKUP_NAME}.tar.gz
+
+ - name: Upload backup artifact
+ uses: actions/upload-artifact@v4
+ with:
+ name: json-backup
+ path: ${{ env.BACKUP_DIR }}/*.tar.gz
+ retention-days: 30
+
+ # Job 3: Transform and Process Data
+ process-data:
+ runs-on: ubuntu-latest
+ needs: validate-json
+ if: needs.validate-json.outputs.validation_status == 'success'
+ name: Process and Transform Data
+
+ steps:
+ - name: Checkout repository
+ uses: actions/checkout@v4
+
+ - name: Set up Python
+ uses: actions/setup-python@v5
+ with:
+ python-version: '3.11'
+
+ - name: Install processing libraries
+ run: |
+ pip install pandas numpy jsonschema
+
+ - name: Process JSON data
+ run: |
+ python3 << 'EOF'
+ import json
+ import os
+ from pathlib import Path
+ from datetime import datetime
+
+ def process_json_files(data_dir):
+ """Process all JSON files and create aggregated reports"""
+
+ all_data = []
+ stats = {
+ 'total_records': 0,
+ 'files_processed': 0,
+ 'data_types': {},
+ 'timestamp': datetime.now().isoformat()
+ }
+
+ for json_file in Path(data_dir).rglob('*.json'):
+ try:
+ with open(json_file, 'r') as f:
+ data = json.load(f)
+
+ # Collect statistics
+ if isinstance(data, list):
+ stats['total_records'] += len(data)
+ all_data.extend(data)
+ elif isinstance(data, dict):
+ stats['total_records'] += 1
+ all_data.append(data)
+
+ stats['files_processed'] += 1
+
+ # Track data types
+ file_type = json_file.stem
+ stats['data_types'][file_type] = stats['data_types'].get(file_type, 0) + 1
+
+ print(f'✓ Processed: {json_file.name}')
+
+ except Exception as e:
+ print(f'✗ Error processing {json_file}: {e}')
+
+ return all_data, stats
+
+ # Process data
+ data, statistics = process_json_files('${{ env.DATA_DIR }}')
+
+ # Save aggregated data
+ os.makedirs('processed', exist_ok=True)
+
+ with open('processed/aggregated_data.json', 'w') as f:
+ json.dump(data, f, indent=2)
+
+ with open('processed/statistics.json', 'w') as f:
+ json.dump(statistics, f, indent=2)
+
+ print(f'\n📊 Processing Statistics:')
+ print(f' Files processed: {statistics["files_processed"]}')
+ print(f' Total records: {statistics["total_records"]}')
+ print(f' Data types: {statistics["data_types"]}')
+ EOF
+
+ - name: Upload processed data
+ uses: actions/upload-artifact@v4
+ with:
+ name: processed-data
+ path: processed/
+
+ # Job 4: Generate Reports
+ generate-reports:
+ runs-on: ubuntu-latest
+ needs: [validate-json, process-data]
+ name: Generate Data Reports
+
+ steps:
+ - name: Checkout repository
+ uses: actions/checkout@v4
+
+ - name: Set up Python
+ uses: actions/setup-python@v5
+ with:
+ python-version: '3.11'
+
+ - name: Download processed data
+ uses: actions/download-artifact@v4
+ with:
+ name: processed-data
+ path: processed/
+
+ - name: Generate comprehensive report
+ run: |
+ python3 << 'EOF'
+ import json
+ import os
+ from datetime import datetime
+ from pathlib import Path
+
+ # Load statistics
+ with open('processed/statistics.json', 'r') as f:
+ stats = json.load(f)
+
+ # Create HTML report
+ html_report = f"""
+
+
+
+ JSON Data Synchronization Report
+
+
+
+ JSON Data Synchronization Report
+ Generated: {datetime.now().strftime('%Y-%m-%d %H:%M:%S UTC')}
+
+
+
Statistics
+
Files Processed: {stats['files_processed']}
+
Total Records: {stats['total_records']}
+
Data Types: {', '.join(stats['data_types'].keys())}
+
+
+ Status
+ ✓ Synchronization completed successfully
+
+
+ """
+
+ os.makedirs('reports', exist_ok=True)
+ with open('reports/sync_report.html', 'w') as f:
+ f.write(html_report)
+
+ print('✓ HTML report generated')
+ EOF
+
+ - name: Upload reports
+ uses: actions/upload-artifact@v4
+ with:
+ name: sync-reports
+ path: reports/
+
+ # Job 5: Database Synchronization (if enabled)
+ sync-to-database:
+ runs-on: ubuntu-latest
+ needs: [validate-json, process-data]
+ if: github.event.inputs.sync_type == 'full' || github.event.inputs.sync_type == 'incremental'
+ name: Sync to Database
+
+ services:
+ postgres:
+ image: postgres:15
+ env:
+ POSTGRES_PASSWORD: postgres
+ POSTGRES_DB: json_data
+ options: >-
+ --health-cmd pg_isready
+ --health-interval 10s
+ --health-timeout 5s
+ --health-retries 5
+ ports:
+ - 5432:5432
+
+ steps:
+ - name: Checkout repository
+ uses: actions/checkout@v4
+
+ - name: Set up Python
+ uses: actions/setup-python@v5
+ with:
+ python-version: '3.11'
+
+ - name: Install database libraries
+ run: |
+ pip install psycopg2-binary sqlalchemy pandas
+
+ - name: Download processed data
+ uses: actions/download-artifact@v4
+ with:
+ name: processed-data
+ path: processed/
+
+ - name: Sync data to database
+ env:
+ DATABASE_URL: postgresql://postgres:postgres@localhost:5432/json_data
+ SYNC_TYPE: ${{ github.event.inputs.sync_type || 'incremental' }}
+ run: |
+ python3 << 'EOF'
+ import json
+ import os
+ from sqlalchemy import create_engine, text, Table, Column, Integer, String, JSON, MetaData, DateTime
+ from datetime import datetime
+
+ # Connect to database
+ engine = create_engine(os.environ['DATABASE_URL'])
+ metadata = MetaData()
+
+ # Create tables
+ with engine.connect() as conn:
+ # Main data table
+ conn.execute(text('''
+ CREATE TABLE IF NOT EXISTS json_records (
+ id SERIAL PRIMARY KEY,
+ data_type VARCHAR(100),
+ record_data JSONB,
+ created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
+ updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
+ )
+ '''))
+
+ # Sync history table
+ conn.execute(text('''
+ CREATE TABLE IF NOT EXISTS sync_history (
+ id SERIAL PRIMARY KEY,
+ sync_type VARCHAR(50),
+ records_synced INTEGER,
+ status VARCHAR(50),
+ timestamp TIMESTAMP DEFAULT CURRENT_TIMESTAMP
+ )
+ '''))
+
+ conn.commit()
+ print('✓ Database tables created/verified')
+
+ # Load and sync data
+ with open('processed/aggregated_data.json', 'r') as f:
+ data = json.load(f)
+
+ synced_count = 0
+ sync_type = os.environ.get('SYNC_TYPE', 'incremental')
+
+ with engine.connect() as conn:
+ if sync_type == 'full':
+ # Clear existing data for full sync
+ conn.execute(text('TRUNCATE TABLE json_records'))
+ conn.commit()
+ print('✓ Cleared existing data for full sync')
+
+ # Insert data
+ for record in data:
+ try:
+ conn.execute(text('''
+ INSERT INTO json_records (data_type, record_data)
+ VALUES (:data_type, :record_data)
+ '''), {
+ 'data_type': record.get('type', 'unknown'),
+ 'record_data': json.dumps(record)
+ })
+ synced_count += 1
+ except Exception as e:
+ print(f'Warning: Could not sync record: {e}')
+
+ # Record sync history
+ conn.execute(text('''
+ INSERT INTO sync_history (sync_type, records_synced, status)
+ VALUES (:sync_type, :records_synced, :status)
+ '''), {
+ 'sync_type': sync_type,
+ 'records_synced': synced_count,
+ 'status': 'success'
+ })
+
+ conn.commit()
+ print(f'✓ Synced {synced_count} records to database')
+ EOF
+
+ - name: Verify database sync
+ env:
+ DATABASE_URL: postgresql://postgres:postgres@localhost:5432/json_data
+ run: |
+ python3 << 'EOF'
+ from sqlalchemy import create_engine, text
+ import os
+
+ engine = create_engine(os.environ['DATABASE_URL'])
+
+ with engine.connect() as conn:
+ result = conn.execute(text('SELECT COUNT(*) FROM json_records'))
+ count = result.scalar()
+ print(f'\n✓ Database contains {count} records')
+
+ result = conn.execute(text('''
+ SELECT sync_type, records_synced, status, timestamp
+ FROM sync_history
+ ORDER BY timestamp DESC
+ LIMIT 1
+ '''))
+ last_sync = result.fetchone()
+ if last_sync:
+ print(f'✓ Last sync: {last_sync[0]} - {last_sync[1]} records - {last_sync[2]}')
+ EOF
+
+ # Job 6: Notification
+ notify-completion:
+ runs-on: ubuntu-latest
+ needs: [validate-json, backup-data, process-data, generate-reports]
+ if: always()
+ name: Send Notification
+
+ steps:
+ - name: Checkout repository
+ uses: actions/checkout@v4
+
+ - name: Create notification summary
+ run: |
+ echo "# JSON Data Synchronization Complete" >> $GITHUB_STEP_SUMMARY
+ echo "" >> $GITHUB_STEP_SUMMARY
+ echo "## Status: ✅ Success" >> $GITHUB_STEP_SUMMARY
+ echo "" >> $GITHUB_STEP_SUMMARY
+ echo "- Validation: ${{ needs.validate-json.result }}" >> $GITHUB_STEP_SUMMARY
+ echo "- Backup: ${{ needs.backup-data.result }}" >> $GITHUB_STEP_SUMMARY
+ echo "- Processing: ${{ needs.process-data.result }}" >> $GITHUB_STEP_SUMMARY
+ echo "- Reports: ${{ needs.generate-reports.result }}" >> $GITHUB_STEP_SUMMARY
+ echo "" >> $GITHUB_STEP_SUMMARY
+ echo "**Workflow completed at:** $(date -u)" >> $GITHUB_STEP_SUMMARY
From 2abeb6519130a40b77b058e4734ab8dd760d81b2 Mon Sep 17 00:00:00 2001
From: Marfin <248203147+arifinahmad99-cloud@users.noreply.github.com>
Date: Wed, 11 Feb 2026 11:13:05 +0800
Subject: [PATCH 17/19] Add .gitignore for Python and general project files
---
.github/workflows/.gitignore | 127 +++++++++++++++++++++++++++++++++++
1 file changed, 127 insertions(+)
create mode 100644 .github/workflows/.gitignore
diff --git a/.github/workflows/.gitignore b/.github/workflows/.gitignore
new file mode 100644
index 0000000..dd3b8ce
--- /dev/null
+++ b/.github/workflows/.gitignore
@@ -0,0 +1,127 @@
+# Python
+__pycache__/
+*.py[cod]
+*$py.class
+*.so
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+*.manifest
+*.spec
+pip-log.txt
+pip-delete-this-directory.txt
+
+# Virtual Environment
+venv/
+ENV/
+env/
+.venv
+
+# IDE
+.vscode/
+.idea/
+*.swp
+*.swo
+*~
+.DS_Store
+
+# Testing
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+*.log
+.hypothesis/
+.pytest_cache/
+htmlcov/
+.mypy_cache/
+.dmypy.json
+dmypy.json
+
+# Documentation
+docs/_build/
+.sphinx/
+
+# Environment
+.env
+.env.local
+.env.*.local
+*.env
+
+# Database
+*.db
+*.sqlite
+*.sqlite3
+
+# Backups
+backups/
+*.tar.gz
+*.zip
+*.bak
+
+# Logs
+logs/
+*.log
+npm-debug.log*
+yarn-debug.log*
+yarn-error.log*
+
+# OS
+.DS_Store
+Thumbs.db
+desktop.ini
+
+# Temporary
+tmp/
+temp/
+*.tmp
+
+# Reports
+reports/*.html
+reports/*.json
+!reports/.gitkeep
+
+# Processed data (keep structure, ignore content)
+processed/*
+!processed/.gitkeep
+
+# Security
+*.pem
+*.key
+*.cert
+secrets/
+
+# Node (if using any JS tools)
+node_modules/
+package-lock.json
+
+# Jupyter Notebook
+.ipynb_checkpoints
+*.ipynb
+
+# Profiling
+*.prof
+*.lprof
+.profiling/
+
+# Docker
+docker-compose.override.yml
From 35b88ffcd93868ef2da75f941135bf22c06987c4 Mon Sep 17 00:00:00 2001
From: Marfin <248203147+arifinahmad99-cloud@users.noreply.github.com>
Date: Wed, 11 Feb 2026 11:55:01 +0800
Subject: [PATCH 18/19] Update and rename azure-webapps-node.yml to
docker-compose.yml
---
.github/workflows/azure-webapps-node.yml | 78 ---------------
.github/workflows/docker-compose.yml | 122 +++++++++++++++++++++++
2 files changed, 122 insertions(+), 78 deletions(-)
delete mode 100644 .github/workflows/azure-webapps-node.yml
create mode 100644 .github/workflows/docker-compose.yml
diff --git a/.github/workflows/azure-webapps-node.yml b/.github/workflows/azure-webapps-node.yml
deleted file mode 100644
index 2ebbac2..0000000
--- a/.github/workflows/azure-webapps-node.yml
+++ /dev/null
@@ -1,78 +0,0 @@
-# This workflow will build and push a node.js application to an Azure Web App when a commit is pushed to your default branch.
-#
-# This workflow assumes you have already created the target Azure App Service web app.
-# For instructions see https://docs.microsoft.com/en-us/azure/app-service/quickstart-nodejs?tabs=linux&pivots=development-environment-cli
-#
-# To configure this workflow:
-#
-# 1. Download the Publish Profile for your Azure Web App. You can download this file from the Overview page of your Web App in the Azure Portal.
-# For more information: https://docs.microsoft.com/en-us/azure/app-service/deploy-github-actions?tabs=applevel#generate-deployment-credentials
-#
-# 2. Create a secret in your repository named AZURE_WEBAPP_PUBLISH_PROFILE, paste the publish profile contents as the value of the secret.
-# For instructions on obtaining the publish profile see: https://docs.microsoft.com/azure/app-service/deploy-github-actions#configure-the-github-secret
-#
-# 3. Change the value for the AZURE_WEBAPP_NAME. Optionally, change the AZURE_WEBAPP_PACKAGE_PATH and NODE_VERSION environment variables below.
-#
-# For more information on GitHub Actions for Azure: https://github.com/Azure/Actions
-# For more information on the Azure Web Apps Deploy action: https://github.com/Azure/webapps-deploy
-# For more samples to get started with GitHub Action workflows to deploy to Azure: https://github.com/Azure/actions-workflow-samples
-
-on:
- push:
- branches: [ "main" ]
- workflow_dispatch:
-
-env:
- AZURE_WEBAPP_NAME: your-app-name # set this to your application's name
- AZURE_WEBAPP_PACKAGE_PATH: '.' # set this to the path to your web app project, defaults to the repository root
- NODE_VERSION: '20.x' # set this to the node version to use
-
-permissions:
- contents: read
-
-jobs:
- build:
- runs-on: ubuntu-latest
- steps:
- - uses: actions/checkout@v4
-
- - name: Set up Node.js
- uses: actions/setup-node@v4
- with:
- node-version: ${{ env.NODE_VERSION }}
- cache: 'npm'
-
- - name: npm install, build, and test
- run: |
- npm install
- npm run build --if-present
- npm run test --if-present
-
- - name: Upload artifact for deployment job
- uses: actions/upload-artifact@v4
- with:
- name: node-app
- path: .
-
- deploy:
- permissions:
- contents: none
- runs-on: ubuntu-latest
- needs: build
- environment:
- name: 'Development'
- url: ${{ steps.deploy-to-webapp.outputs.webapp-url }}
-
- steps:
- - name: Download artifact from build job
- uses: actions/download-artifact@v4
- with:
- name: node-app
-
- - name: 'Deploy to Azure WebApp'
- id: deploy-to-webapp
- uses: azure/webapps-deploy@v2
- with:
- app-name: ${{ env.AZURE_WEBAPP_NAME }}
- publish-profile: ${{ secrets.AZURE_WEBAPP_PUBLISH_PROFILE }}
- package: ${{ env.AZURE_WEBAPP_PACKAGE_PATH }}
diff --git a/.github/workflows/docker-compose.yml b/.github/workflows/docker-compose.yml
new file mode 100644
index 0000000..95f498b
--- /dev/null
+++ b/.github/workflows/docker-compose.yml
@@ -0,0 +1,122 @@
+version: '3.8'
+
+services:
+ # PostgreSQL Database
+ postgres:
+ image: postgres:15-alpine
+ container_name: explorepi-postgres
+ environment:
+ POSTGRES_USER: ${POSTGRES_USER:-postgres}
+ POSTGRES_PASSWORD: ${POSTGRES_PASSWORD:-postgres}
+ POSTGRES_DB: ${POSTGRES_DB:-json_data}
+ PGDATA: /var/lib/postgresql/data/pgdata
+ volumes:
+ - postgres_data:/var/lib/postgresql/data
+ ports:
+ - "5432:5432"
+ healthcheck:
+ test: ["CMD-SHELL", "pg_isready -U postgres"]
+ interval: 10s
+ timeout: 5s
+ retries: 5
+ networks:
+ - explorepi-network
+ restart: unless-stopped
+
+ # Redis Cache (optional)
+ redis:
+ image: redis:7-alpine
+ container_name: explorepi-redis
+ ports:
+ - "6379:6379"
+ volumes:
+ - redis_data:/data
+ healthcheck:
+ test: ["CMD", "redis-cli", "ping"]
+ interval: 10s
+ timeout: 3s
+ retries: 5
+ networks:
+ - explorepi-network
+ restart: unless-stopped
+
+ # ExplorePi API Application
+ api:
+ build:
+ context: .
+ dockerfile: Dockerfile
+ container_name: explorepi-api
+ environment:
+ - DATABASE_URL=postgresql://postgres:postgres@postgres:5432/json_data
+ - REDIS_URL=redis://redis:6379/0
+ - ENVIRONMENT=production
+ - API_HOST=0.0.0.0
+ - API_PORT=8000
+ volumes:
+ - ./data:/app/data
+ - ./schemas:/app/schemas
+ - ./backups:/app/backups
+ - ./processed:/app/processed
+ - ./reports:/app/reports
+ - ./logs:/app/logs
+ ports:
+ - "8000:8000"
+ depends_on:
+ postgres:
+ condition: service_healthy
+ redis:
+ condition: service_healthy
+ networks:
+ - explorepi-network
+ restart: unless-stopped
+ healthcheck:
+ test: ["CMD", "curl", "-f", "http://localhost:8000/health"]
+ interval: 30s
+ timeout: 10s
+ retries: 3
+ start_period: 40s
+
+ # pgAdmin (Database Management UI)
+ pgadmin:
+ image: dpage/pgadmin4:latest
+ container_name: explorepi-pgadmin
+ environment:
+ PGADMIN_DEFAULT_EMAIL: ${PGADMIN_EMAIL:-admin@explorepi.com}
+ PGADMIN_DEFAULT_PASSWORD: ${PGADMIN_PASSWORD:-admin}
+ PGADMIN_CONFIG_SERVER_MODE: 'False'
+ volumes:
+ - pgadmin_data:/var/lib/pgadmin
+ ports:
+ - "5050:80"
+ depends_on:
+ - postgres
+ networks:
+ - explorepi-network
+ restart: unless-stopped
+
+ # Nginx Reverse Proxy (optional)
+ nginx:
+ image: nginx:alpine
+ container_name: explorepi-nginx
+ volumes:
+ - ./nginx.conf:/etc/nginx/nginx.conf:ro
+ ports:
+ - "80:80"
+ - "443:443"
+ depends_on:
+ - api
+ networks:
+ - explorepi-network
+ restart: unless-stopped
+
+volumes:
+ postgres_data:
+ driver: local
+ redis_data:
+ driver: local
+ pgadmin_data:
+ driver: local
+
+networks:
+ explorepi-network:
+ driver: bridge
From 8e8020e51543577fa555c3d76c1bbb53d8e9994e Mon Sep 17 00:00:00 2001
From: Marfin <248203147+arifinahmad99-cloud@users.noreply.github.com>
Date: Sat, 14 Feb 2026 15:20:00 +0800
Subject: [PATCH 19/19] Add initial devcontainer configuration
---
.devcontainer/devcontainer.json | 4 ++++
1 file changed, 4 insertions(+)
create mode 100644 .devcontainer/devcontainer.json
diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json
new file mode 100644
index 0000000..39bbd26
--- /dev/null
+++ b/.devcontainer/devcontainer.json
@@ -0,0 +1,4 @@
+{
+ "image": "mcr.microsoft.com/devcontainers/universal:2",
+ "features": {}
+}