From 182ed12b053e3442038a38ef365b193d0de1a810 Mon Sep 17 00:00:00 2001 From: nikki28 Date: Mon, 13 Jan 2025 18:15:34 +0530 Subject: [PATCH 1/3] adding conv_static_temporal_experiments --- nikki_exp_conv_temporal_static/demo.ipynb | 1230 +++++++++++++++++++++ nikki_exp_conv_temporal_static/main.py | 223 ++++ nikki_exp_conv_temporal_static/utils.py | 84 ++ 3 files changed, 1537 insertions(+) create mode 100644 nikki_exp_conv_temporal_static/demo.ipynb create mode 100644 nikki_exp_conv_temporal_static/main.py create mode 100644 nikki_exp_conv_temporal_static/utils.py diff --git a/nikki_exp_conv_temporal_static/demo.ipynb b/nikki_exp_conv_temporal_static/demo.ipynb new file mode 100644 index 0000000..b28347c --- /dev/null +++ b/nikki_exp_conv_temporal_static/demo.ipynb @@ -0,0 +1,1230 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import rasterio\n", + "import numpy as np\n", + "from sklearn.model_selection import train_test_split\n", + "\n", + "from typing import Any, Dict, Optional, List\n", + "import utils\n", + "import main\n", + "import sys" + ] + }, + { + "cell_type": "code", + "execution_count": 34, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "-1.346652" + ] + }, + "execution_count": 34, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# save_prediction" + ] + }, + { + "cell_type": "code", + "execution_count": 33, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[[[ 0.09771803]]\n", + "\n", + " [[-1.16380941]]]\n", + "[[[-1.44437387]]\n", + "\n", + " [[ 0.30727828]]]\n", + "[[[-1.34665584]]\n", + "\n", + " [[-0.85653113]]]\n" + ] + } + ], + "source": [ + "a=np.random.randn(2,1,1)\n", + "b=np.random.randn(2,1,1)\n", + "print(a)\n", + "print(b)\n", + "l=a+b\n", + "print(l)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Not useful " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# steps in test time:\n", + " # two 3D np arrays: pred_feat and pred_label\n", + " # add padding in case they have incompatible shape\n", + " # concatenate to get a single 3D array\n", + " # get a valid_mask: A 2d array which has nan if any of the bands value is nan for every pixel.\n", + " # filtered_stacked_data: nan for all bands if any band is nan otherwise the actual values \n", + " # pred_feat is the 9 bands \n", + " # pred label is the 2D array(1st band)\n", + " # we compute pred_feat_mask and pred_label_mask which is useless\n", + " # \n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Done with scale, resolution and zoom level.\n", + "# scale is basivally how \n", + "# Each band of an image can have diff scale and it can change depending on the image, the band is present in.\n", + "# Earth engine looks at the scale specified y the output and then finds out the approprate level of image pyramid to use as input.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# when we ingect an image in an earth engine:\n", + " # It is ingested at native resolution(the lowest level of image pyramid)\n", + " # the image data are also aggregated to create higher pyramid levels.\n", + " # The image data is aggregated until it fits within a 256*256 pixel tile.\n", + " # " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Scale==pixel resolution\n", + "# Zoom level\n", + "# Image assests exists at multiple scales, in image pyramid\n", + "# The lowest level of the image is the image at native resolution. During ingestion the data are aggregated to create high pyramid levels. The two ways of aggregration are:\n", + " # For continous values images: As we level up in pyramid, we take mean of 2*2 pixels \n", + " # For discrete values images: As we level up in pyramid, we take a sample (usually the top left pixel) of pixels at the next lower level\n", + "# We do the agregration till the entire image fits within 256*256 pixel tile.\n", + "# Each of this undivided squares which is divided into 4 squares is called a tile.\n", + "# " + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'f.read() reads the entire contents of the file into memory.\\n BytesIO(f.read()) wraps the file bytes into a BytesIO object so it can be treated like a file object.\\n\\nLoad the TIFF File:\\n\\n rasterio_open(file_bytes) is used to open the file using the rasterio library, which is designed for working with geospatial raster data (e.g., GeoTIFF files).\\n This function converts the in-memory BytesIO object into a raster data object.\\n\\nReturn the TIFF Object:\\n\\n The TIFF file object is returned for further processing'" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# we have an instance(object) to the GCSfilesystem, we can reda , write, manage files, open files directly from Google Cloud Storage without downloading them locally.\n", + "# Now we can open the files with this opject and read it in binary mode.\n", + "# Read File Data:\n", + "\"\"\"f.read() reads the entire contents of the file into memory.\n", + " BytesIO(f.read()) wraps the file bytes into a BytesIO object so it can be treated like a file object.\n", + "\n", + "Load the TIFF File:\n", + "\n", + " rasterio_open(file_bytes) is used to open the file using the rasterio library, which is designed for working with geospatial raster data (e.g., GeoTIFF files).\n", + " This function converts the in-memory BytesIO object into a raster data object.\n", + "\n", + "Return the TIFF Object:\n", + "\n", + " The TIFF file object is returned for further processing\"\"\"\n", + "# Google cloud has multiple projects\n", + "# each project has its won resources(buckets, databases and compute instances)\n", + "# storage.Client(project=PROJECT_ID) creates an object that allows you to interact with resources belonging to a specific project identified by PROJECT_ID.\n", + "# Buckets are the top-level containers in Google Cloud Storage (like a drive or main folder).\n", + "# Inside a bucket, you can organize files using folder-like paths (e.g., folder1/subfolder/file.txt).\n", + "# Use the storage.Client to get a reference to the bucket with client.get_bucket(bucket_name)\n", + "\n", + "\n", + "# gcs_path is the path of the folder relative to your google cloud storage(has gs://bucket_name/path_to_folder)\n", + "# function that retrieves the bucket name file name on which the tiff is stored\n", + "# client.get_bucket(bucket_name) return an object that refers to that particular bucket\n", + "# In the end we need to close the connection to google cloud storage \n", + "\n", + "# Ideally, we get two tiff files(features and labels), the number of data points are m*n where each data point has b bands\n", + "# Removing the points which has -1 values (corresponsiding label)\n", + "# divioding the grid into train test split \n", + "# " + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [], + "source": [ + "# test_features\n", + "# test_features_padded\n", + "# stacked_data\n", + "# valid_mask a 2D which is nan if any of the bands is nan or label is -1\n", + "# filtered_stacked_data is 3D on the basis of valid_mask (all 10 bands(9+label) is nan if valid_mask is nan otherwise org feature +label) \n", + "# pred_feat is the 9 bands features\n", + "# pred_label is the firts channel\n", + "# nan_pred_feat and nan_pred_label are the \n" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[[ 3. -1. -1. 3.]\n", + " [ 3. 1. 1. 3.]\n", + " [ 3. -1. -1. 3.]\n", + " [ 3. -1. -1. 2.]\n", + " [ 3. -1. -1. 3.]\n", + " [ 3. 1. 1. 3.]\n", + " [ 3. -1. -1. 3.]\n", + " [ 3. -1. -1. 2.]]\n" + ] + } + ], + "source": [ + "# even if it ignores nan, if all values are nan, it gives nan.\n", + "\n", + "import numpy as np\n", + "label_np=np.array([[3., -1, -1., 3.],\n", + " [3., 1., 1., 3.],[3., -1, -1., 3.],\n", + " [3., -1., -1., 2.],[3., -1, -1., 3.],\n", + " [3., 1., 1., 3.],[3., -1, -1., 3.],\n", + " [3., -1., -1., 2.]])\n", + "print(label_np)\n", + "\n", + "import numpy as np\n", + "\n", + "label_np=np.where(label_np==-1, np.nan, label_np)\n", + "print(label_np)\n", + "data=[]\n", + "for i in range(1,6):\n", + " for j in range(1,3):\n", + " temp_data=label_np[i-1:i+1,j-1:j+1]\n", + " print(temp_data)\n", + " print(np.nanmean(temp_data,axis=(0,1)))\n", + "\n", + " data.append(np.nanmean(temp_data,axis=(0,1)))\n", + " # label.append(data[0,i,j])\n", + " \n", + "print(data)" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[[nan nan]\n", + " [nan nan]]\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/tmp/ipykernel_18005/3690321154.py:3: RuntimeWarning: Mean of empty slice\n", + " np.nanmean(temp,axis=1)\n" + ] + }, + { + "data": { + "text/plain": [ + "array([nan, nan])" + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "temp=label_np[1,:2,1:3]\n", + "print(temp)\n", + "np.nanmean(temp,axis=1)" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[[[ 3. nan nan 3.]\n", + " [ 3. 1. 1. 3.]]\n", + "\n", + " [[ 3. nan nan 3.]\n", + " [ 3. nan nan 2.]]]\n", + "[[ True False False True]\n", + " [ True False False True]]\n", + "[[3. 3. 3. 3.]\n", + " [3. 3. 3. 2.]]\n", + "[[[3. 6. 7. 3.]\n", + " [3. 1. 1. 3.]]\n", + "\n", + " [[3. 1. 2. 3.]\n", + " [3. 4. 1. 2.]]]\n", + "[3.25 2.75]\n", + "(2, 4, 1, 1)\n" + ] + } + ], + "source": [ + "import numpy as np\n", + "label_np=np.array([[[3., -1, -1., 3.],\n", + " [3., 1., 1., 3.]],[[3., -1, -1., 3.],\n", + " [3., -1., -1., 2.]]])\n", + "label_np=np.where(label_np==-1, np.nan, label_np)\n", + "print(label_np)\n", + "mask=~np.isnan(label_np).any(axis=0)\n", + "print(mask)\n", + "masked_train_labels =label_np[:,mask]\n", + "print(masked_train_labels)\n", + "data=np.array([[[3., 6, 7., 3.],\n", + " [3., 1., 1., 3.]],[[3., 1, 2., 3.],\n", + " [3., 4., 1., 2.]]])\n", + "print(data)\n", + "d=data[:,0:2,0:2]\n", + "print(d.mean(axis=(1,2)))\n", + "a=[]\n", + "a.append(np.random.randn(4,1,1))\n", + "a.append(np.random.randn(4,1,1))\n", + "b=np.array(a)\n", + "print(b.shape)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[[[False True True False]\n", + " [False False False False]]\n", + "\n", + " [[False True True False]\n", + " [False True True False]]]\n", + "[[False True True False]\n", + " [False True True False]]\n", + "last one [[False True True False]\n", + " [False True True False]]\n" + ] + } + ], + "source": [ + "label_np=np.array([[[3., -1, -1., 3.],\n", + " [3., 1., 1., 3.]],[[3., -1, -1., 3.],\n", + " [3., -1., -1., 3.]]])\n", + "label_np=np.where(label_np==-1,np.nan,label_np)\n", + "nan_label=np.isnan(label_np)\n", + "print(nan_label)\n", + "nan_label_2=np.any(np.isnan(label_np),axis=0)\n", + "print(nan_label_2)\n", + "nan_label_3= np.isnan(label_np).any(axis=0)\n", + "print(\"last one\",nan_label_3)" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "block size 4\n", + "0 0\n", + "0 4\n", + "4 0\n", + "4 4\n" + ] + } + ], + "source": [ + "import rasterio\n", + "from rasterio.windows import Window\n", + "\n", + "height=8\n", + "width=8\n", + "block_coverage=1\n", + "total_blocks=4\n", + "total_pixels = height*width\n", + "pixels_covered = total_pixels*block_coverage\n", + "block_size = int(np.sqrt(pixels_covered/total_blocks))\n", + "print(\"block size\",block_size)\n", + "\n", + "# Generate windows for each grid\n", + "windows = []\n", + "for i in range(0, height-block_size+1, block_size):\n", + " for j in range(0, width-block_size+1, block_size):\n", + " print(i,j)\n", + "\n", + " windows.append(Window(j, i, block_size, block_size))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Arguments to be passed" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "train_feat_file=\"/home/nikki/Downloads/urbanization_data_export_22122024_2010-01-01_urban_feat.tif\"\n", + "train_label_file=\"/home/nikki/Downloads/urbanization_data_export_22122024_2010-01-01_urban_label.tif\"\n", + "test_feat_file=\"/home/nikki/Downloads/urbanization_data_export_22122024_2015-01-01_urban_feat.tiff\"\n", + "test_label_file=\"/home/nikki/Downloads/urbanization_data_export_22122024_2015-01-01_urban_label.tiff\"\n", + "output_path=\"output_file_conv_static.tiff\"\n", + "\n", + "block_coverage=0.3\n", + "total_blocks=100\n", + "test_size=0.3" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Loading the input files " + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "\n", + "train_features=utils.rasterio_open(train_feat_file)\n", + "train_labels=utils.rasterio_open(train_label_file)\n", + "test_features=utils.rasterio_open(test_feat_file)\n", + "test_label=utils.rasterio_open(test_label_file)\n", + "\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "2238\n", + "2211\n" + ] + }, + { + "data": { + "text/plain": [ + "(9, 2211, 2238)" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "print(train_features.metadata['width'])\n", + "print(train_features.metadata['height'])\n", + "train_features.data.shape" + ] + }, + { + "cell_type": "code", + "execution_count": 29, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "('pdsi',\n", + " 'pr',\n", + " 'pr_1',\n", + " 'tmmn',\n", + " 'tmmx',\n", + " 'NDVI',\n", + " 'EVI',\n", + " 'LC_Type1',\n", + " 'population_density')" + ] + }, + "execution_count": 29, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "train_features.bands" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "train_feature=np.concatenate((train_features.data[0,:,:].reshape(1,train_features.metadata['height'],train_features.metadata['width'] ), train_features.data[2:,:,:]), axis=0)\n", + "test_feature= np.concatenate((test_features.data[0,:,:].reshape(1,test_features.metadata['height'],test_features.metadata['width']),test_features.data[2:,:,:]),axis=0)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "(8, 2211, 2238)\n", + "(8, 2211, 2238)\n" + ] + } + ], + "source": [ + "print(train_feature.shape)\n", + "print(test_feature.shape)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Pre-processing" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "block size 121\n", + "block size 121\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + " 0%| | 0/226 [00:00" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "import matplotlib.pyplot as plt\n", + "import seaborn as sns\n", + "\n", + "plt.figure(figsize=(12, 8))\n", + "sns.heatmap(coerr, annot=True, xticklabels=['Palmer Drought Severity Index','Precipitation accumulation',\n", + "'min temp','max temp','16-day NDVI avg','16-day EVI avg','LC_Type1','population_density','urban'], yticklabels=['Palmer Drought Severity Index','Precipitation accumulation',\n", + "'min temp','max temp','16-day NDVI avg','16-day EVI avg','LC_Type1','population_density','urban'], cmap='coolwarm', linewidths=0.5)\n", + "plt.title('Correlation Heatmap')\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Training" + ] + }, + { + "cell_type": "code", + "execution_count": 33, + "metadata": {}, + "outputs": [], + "source": [ + "y_train= y_train.astype(np.int32)\n", + "x_train=x_train.astype(np.float64)\n", + "x_val=x_val.astype(np.float64)\n", + "y_val=y_val.astype(np.int32)\n", + "x_test=x_test.astype(np.float64)\n", + "y_test=y_test.astype(np.int32)" + ] + }, + { + "cell_type": "code", + "execution_count": 34, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/nikki/Downloads/venv/lib/python3.12/site-packages/sklearn/preprocessing/_label.py:93: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().\n", + " y = column_or_1d(y, warn=True)\n", + "/home/nikki/Downloads/venv/lib/python3.12/site-packages/sklearn/preprocessing/_label.py:129: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().\n", + " y = column_or_1d(y, dtype=self.classes_.dtype, warn=True)\n", + "/home/nikki/Downloads/venv/lib/python3.12/site-packages/sklearn/utils/deprecation.py:151: FutureWarning: 'force_all_finite' was renamed to 'ensure_all_finite' in 1.6 and will be removed in 1.8.\n", + " warnings.warn(\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.019087 seconds.\n", + "You can set `force_row_wise=true` to remove the overhead.\n", + "And if memory is not enough, you can set `force_col_wise=true`.\n", + "[LightGBM] [Info] Total Bins 3553\n", + "[LightGBM] [Info] Number of data points in the train set: 2124782, number of used features: 15\n", + "[LightGBM] [Info] Start training from score -5.972067\n", + "[LightGBM] [Info] Start training from score -7.375494\n", + "[LightGBM] [Info] Start training from score -6.251169\n", + "[LightGBM] [Info] Start training from score -0.005117\n" + ] + } + ], + "source": [ + "import lightgbm as lgbm\n", + "model = lgbm.LGBMClassifier(objective=\"multiclass\", num_class=4)\n", + "main.train(model,x_train,y_train,x_val,y_val)\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Testing" + ] + }, + { + "cell_type": "code", + "execution_count": 38, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/nikki/Downloads/venv/lib/python3.12/site-packages/sklearn/utils/deprecation.py:151: FutureWarning: 'force_all_finite' was renamed to 'ensure_all_finite' in 1.6 and will be removed in 1.8.\n", + " warnings.warn(\n" + ] + } + ], + "source": [ + "test_pred,cm,report=main.predict(model,x_train,y_train)" + ] + }, + { + "cell_type": "code", + "execution_count": 39, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " precision recall f1-score support\n", + "\n", + " 0 0.21 0.09 0.13 5416\n", + " 1 0.11 0.05 0.07 1331\n", + " 2 0.12 0.02 0.03 4097\n", + " 3 1.00 1.00 1.00 2113938\n", + "\n", + " accuracy 0.99 2124782\n", + " macro avg 0.36 0.29 0.31 2124782\n", + "weighted avg 0.99 0.99 0.99 2124782\n", + "\n" + ] + } + ], + "source": [ + "print(report)" + ] + }, + { + "cell_type": "code", + "execution_count": 40, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[[ 484 8 1 4923]\n", + " [ 22 63 2 1244]\n", + " [ 25 2 71 3999]\n", + " [ 1734 478 530 2111196]]\n" + ] + } + ], + "source": [ + "print(cm)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Saving" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "(2211, 2239)\n", + "(4950429,)\n", + "4950429\n", + "4950429 (4950429,) 3003316\n", + "| 0.01, 0.00, 67.81|\n", + "| 0.00,-0.01, 37.58|\n", + "| 0.00, 0.00, 1.00|\n" + ] + }, + { + "ename": "ValueError", + "evalue": "NumPy boolean array indexing assignment cannot assign 3003316 input values to the 3312817 output values where the mask is true", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mValueError\u001b[0m Traceback (most recent call last)", + "Cell \u001b[0;32mIn[16], line 17\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[38;5;66;03m# def save_prediction(test_profile, pred_label_mask,test_pred,output_path):\u001b[39;00m\n\u001b[1;32m 2\u001b[0m \u001b[38;5;66;03m# shape=pred_label_mask.shape\u001b[39;00m\n\u001b[1;32m 3\u001b[0m \u001b[38;5;66;03m# print(shape)\u001b[39;00m\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 15\u001b[0m \u001b[38;5;66;03m# with rasterio.open(output_path,\"w\",driver=\"GTiff\",height=shape[0],width=shape[1],count=1, dtype=pred_array.dtype,crs=test_profile['crs'],transform=transform) as dst:\u001b[39;00m\n\u001b[1;32m 16\u001b[0m \u001b[38;5;66;03m# dst.write(pred_array, 1)\u001b[39;00m\n\u001b[0;32m---> 17\u001b[0m \u001b[43mmain\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43msave_prediction\u001b[49m\u001b[43m(\u001b[49m\u001b[43mtest_features\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mprofile\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mtest_mask\u001b[49m\u001b[43m,\u001b[49m\u001b[43mtest_pred\u001b[49m\u001b[43m,\u001b[49m\u001b[43moutput_path\u001b[49m\u001b[43m)\u001b[49m\n", + "File \u001b[0;32m~/Downloads/urbanan_google_earth/main.py:187\u001b[0m, in \u001b[0;36msave_prediction\u001b[0;34m(test_profile, pred_label_mask, test_pred, output_path)\u001b[0m\n\u001b[1;32m 183\u001b[0m \u001b[38;5;28mprint\u001b[39m(\u001b[38;5;28mlen\u001b[39m(valid_pred_mask),pred_array\u001b[38;5;241m.\u001b[39mshape, \u001b[38;5;28mlen\u001b[39m(test_pred))\n\u001b[1;32m 184\u001b[0m \u001b[38;5;28mprint\u001b[39m(transform)\n\u001b[0;32m--> 187\u001b[0m \u001b[43mpred_array\u001b[49m\u001b[43m[\u001b[49m\u001b[43mvalid_pred_mask\u001b[49m\u001b[43m]\u001b[49m \u001b[38;5;241m=\u001b[39m test_pred\n\u001b[1;32m 188\u001b[0m pred_array \u001b[38;5;241m=\u001b[39m pred_array\u001b[38;5;241m.\u001b[39mreshape((shape[\u001b[38;5;241m0\u001b[39m], shape[\u001b[38;5;241m1\u001b[39m]))\n\u001b[1;32m 189\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m rasterio\u001b[38;5;241m.\u001b[39mopen(output_path,\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mw\u001b[39m\u001b[38;5;124m\"\u001b[39m,driver\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mGTiff\u001b[39m\u001b[38;5;124m\"\u001b[39m,height\u001b[38;5;241m=\u001b[39mshape[\u001b[38;5;241m0\u001b[39m],width\u001b[38;5;241m=\u001b[39mshape[\u001b[38;5;241m1\u001b[39m],count\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m1\u001b[39m, dtype\u001b[38;5;241m=\u001b[39mpred_array\u001b[38;5;241m.\u001b[39mdtype,crs\u001b[38;5;241m=\u001b[39mtest_profile[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mcrs\u001b[39m\u001b[38;5;124m'\u001b[39m],transform\u001b[38;5;241m=\u001b[39mtransform) \u001b[38;5;28;01mas\u001b[39;00m dst:\n", + "\u001b[0;31mValueError\u001b[0m: NumPy boolean array indexing assignment cannot assign 3003316 input values to the 3312817 output values where the mask is true" + ] + } + ], + "source": [ + "def save_prediction(test_profile, test_pred,output_path):\n", + " shape=(2211,2239)\n", + " print(shape)\n", + " valid_pred_mask=pred_label_mask.flatten()\n", + " pred_array=np.full((shape[0]*shape[1]),np.nan)\n", + " print(pred_array.shape)\n", + " print(len(valid_pred_mask))\n", + " transform=test_profile['transform']\n", + " print(len(valid_pred_mask),pred_array.shape, len(test_pred))\n", + " print(transform)\n", + " \n", + "\n", + " pred_array[valid_pred_mask] = test_pred\n", + " pred_array = pred_array.reshape((shape[0], shape[1]))\n", + " with rasterio.open(output_path,\"w\",driver=\"GTiff\",height=shape[0],width=shape[1],count=1, dtype=pred_array.dtype,crs=test_profile['crs'],transform=transform) as dst:\n", + " dst.write(pred_array, 1)\n", + "main.save_prediction(test_features.profile,test_pred,output_path)" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(3003316,)" + ] + }, + "execution_count": 19, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "test_pred.shape" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(3003316,)" + ] + }, + "execution_count": 20, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "y_test.shape" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "4950429" + ] + }, + "execution_count": 21, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "2211*2239" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(2211, 2239)" + ] + }, + "execution_count": 17, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "test_mask.shape" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "(2211, 2239)\n", + "(4950429,)\n", + "4950429\n", + "4950429 (4950429,) 3003316\n", + "| 0.01, 0.00, 67.81|\n", + "| 0.00,-0.01, 37.58|\n", + "| 0.00, 0.00, 1.00|\n" + ] + }, + { + "ename": "ValueError", + "evalue": "NumPy boolean array indexing assignment cannot assign 3003316 input values to the 3312817 output values where the mask is true", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mValueError\u001b[0m Traceback (most recent call last)", + "Cell \u001b[0;32mIn[18], line 13\u001b[0m\n\u001b[1;32m 9\u001b[0m \u001b[38;5;28mprint\u001b[39m(\u001b[38;5;28mlen\u001b[39m(valid_pred_mask),pred_array\u001b[38;5;241m.\u001b[39mshape, \u001b[38;5;28mlen\u001b[39m(test_pred))\n\u001b[1;32m 10\u001b[0m \u001b[38;5;28mprint\u001b[39m(transform)\n\u001b[0;32m---> 13\u001b[0m \u001b[43mpred_array\u001b[49m\u001b[43m[\u001b[49m\u001b[43mvalid_pred_mask\u001b[49m\u001b[43m]\u001b[49m \u001b[38;5;241m=\u001b[39m test_pred\n\u001b[1;32m 14\u001b[0m pred_array \u001b[38;5;241m=\u001b[39m pred_array\u001b[38;5;241m.\u001b[39mreshape((shape[\u001b[38;5;241m0\u001b[39m], shape[\u001b[38;5;241m1\u001b[39m]))\n\u001b[1;32m 15\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m rasterio\u001b[38;5;241m.\u001b[39mopen(output_path,\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mw\u001b[39m\u001b[38;5;124m\"\u001b[39m,driver\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mGTiff\u001b[39m\u001b[38;5;124m\"\u001b[39m,height\u001b[38;5;241m=\u001b[39mshape[\u001b[38;5;241m0\u001b[39m],width\u001b[38;5;241m=\u001b[39mshape[\u001b[38;5;241m1\u001b[39m],count\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m1\u001b[39m, dtype\u001b[38;5;241m=\u001b[39mpred_array\u001b[38;5;241m.\u001b[39mdtype,crs\u001b[38;5;241m=\u001b[39mtest_profile[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mcrs\u001b[39m\u001b[38;5;124m'\u001b[39m],transform\u001b[38;5;241m=\u001b[39mtransform) \u001b[38;5;28;01mas\u001b[39;00m dst:\n", + "\u001b[0;31mValueError\u001b[0m: NumPy boolean array indexing assignment cannot assign 3003316 input values to the 3312817 output values where the mask is true" + ] + } + ], + "source": [ + "test_profile=test_features.profile\n", + "shape=(2211,2239)\n", + "print(shape)\n", + "valid_pred_mask=test_mask.flatten()\n", + "pred_array=np.full((shape[0]*shape[1]),np.nan)\n", + "print(pred_array.shape)\n", + "print(len(valid_pred_mask))\n", + "transform=test_profile['transform']\n", + "print(len(valid_pred_mask),pred_array.shape, len(test_pred))\n", + "print(transform)\n", + "\n", + "\n", + "pred_array[valid_pred_mask] = test_pred\n", + "pred_array = pred_array.reshape((shape[0], shape[1]))\n", + "with rasterio.open(output_path,\"w\",driver=\"GTiff\",height=shape[0],width=shape[1],count=1, dtype=pred_array.dtype,crs=test_profile['crs'],transform=transform) as dst:\n", + " dst.write(pred_array, 1)\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "CRS.from_wkt('GEOGCS[\"unknown\",DATUM[\"unknown\",SPHEROID[\"Spheroid\",6378137,298.257223563]],PRIMEM[\"Greenwich\",0],UNIT[\"degree\",0.0174532925199433,AUTHORITY[\"EPSG\",\"9122\"]],AXIS[\"Latitude\",NORTH],AXIS[\"Longitude\",EAST]]')" + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "test_profile['crs']" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{'driver': 'GTiff', 'dtype': 'float64', 'nodata': None, 'width': 2239, 'height': 2211, 'count': 1, 'crs': CRS.from_wkt('GEOGCS[\"unknown\",DATUM[\"unknown\",SPHEROID[\"Spheroid\",6378137,298.257223563]],PRIMEM[\"Greenwich\",0],UNIT[\"degree\",0.0174532925199433,AUTHORITY[\"EPSG\",\"9122\"]],AXIS[\"Latitude\",NORTH],AXIS[\"Longitude\",EAST]]'), 'transform': Affine(0.013474729261792823, 0.0, 67.81374585363181,\n", + " 0.0, -0.013474729261792823, 37.58330317162592), 'blockxsize': 2239, 'blockysize': 1, 'tiled': False, 'interleave': 'band'}\n" + ] + } + ], + "source": [ + "out=utils.rasterio_open(\"output_file_1.tiff\")\n", + "print(out.profile)" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{'driver': 'GTiff', 'dtype': 'float32', 'nodata': None, 'width': 2238, 'height': 2211, 'count': 9, 'crs': CRS.from_wkt('GEOGCS[\"unknown\",DATUM[\"unknown\",SPHEROID[\"Spheroid\",6378137,298.257223563]],PRIMEM[\"Greenwich\",0],UNIT[\"degree\",0.0174532925199433,AUTHORITY[\"EPSG\",\"9122\"]],AXIS[\"Latitude\",NORTH],AXIS[\"Longitude\",EAST]]'), 'transform': Affine(0.013474729261792823, 0.0, 67.81374585363181,\n", + " 0.0, -0.013474729261792823, 37.58330317162592), 'blockxsize': 256, 'blockysize': 256, 'tiled': True, 'compress': 'lzw', 'interleave': 'pixel'}\n" + ] + } + ], + "source": [ + "print(test_profile)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "venv", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.3" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/nikki_exp_conv_temporal_static/main.py b/nikki_exp_conv_temporal_static/main.py new file mode 100644 index 0000000..fdc7cb9 --- /dev/null +++ b/nikki_exp_conv_temporal_static/main.py @@ -0,0 +1,223 @@ +import rasterio +import numpy as np +from rasterio.windows import Window +from sklearn.model_selection import train_test_split +from tqdm import tqdm as tqdm +from sklearn.metrics import classification_report, confusion_matrix + +PROJECT_ID = "ai-sandbox-399505" + + + + +def ndarray_tiff(data, src_profile, output_file): + src_profile.update( + dtype=rasterio.int32, # Adjust the data type if needed + count=1, # Number of bands + # compress='lzw' # Optional: Add compression + ) + + + with rasterio.open(output_file, 'w', src_profile) as dst: + dst.write(data, 1) # Write + +def padding(features,labels): + axis_padwidth={} + for i in range(1,len(features.shape)): + if(features.shape[i] 1 else "" + + return bucket_name, file_path + +def rasterio_open(file): + with rasterio.open(file) as src: + tiff_data = src.read() + metadata = src.meta + bands = src.descriptions + profile = src.profile + return TIFF(tiff_data, metadata, bands, profile) + + +def load_tiff(gsc_path): + gcs = gcsfs_init() + with gcs.open(gsc_path, 'rb') as f: + file_bytes = BytesIO(f.read()) + + tif = rasterio_open(file_bytes) + return tif + +def load_tif_data(gcs_tiff_path): + + tif = load_tiff(gcs_tiff_path) + + return tif.data + +def files_in_dir(bucket_path, file_extension): + + all_files = [] + client = get_storage_client() + bucket_name, file_path = get_bucket_name(bucket_path) + bucket = client.get_bucket(bucket_name) + for blob in bucket.list_blobs(prefix = file_path): + all_files.append(blob.name) + file_prefix = "gs://" + bucket_name + "/" + if file_extension: + fileterd_files = [file_prefix+ file for file in all_files if file.endswith(file_extension)] + + client.close() + + return fileterd_files + + + + From e3ad29a2fe540ffb7b6e7660a5609f4da8217de3 Mon Sep 17 00:00:00 2001 From: nikki28 Date: Mon, 13 Jan 2025 18:37:00 +0530 Subject: [PATCH 2/3] adding_experiments --- nikki_exp_conv_temporal_static/demo.ipynb | 1056 ++------------------- nikki_exp_conv_temporal_static/main.py | 97 +- nikki_exp_conv_temporal_static/utils.py | 150 ++- 3 files changed, 275 insertions(+), 1028 deletions(-) diff --git a/nikki_exp_conv_temporal_static/demo.ipynb b/nikki_exp_conv_temporal_static/demo.ipynb index b28347c..0f7879e 100644 --- a/nikki_exp_conv_temporal_static/demo.ipynb +++ b/nikki_exp_conv_temporal_static/demo.ipynb @@ -9,407 +9,17 @@ "import rasterio\n", "import numpy as np\n", "from sklearn.model_selection import train_test_split\n", + "import lightgbm as lgbm\n", "\n", "from typing import Any, Dict, Optional, List\n", + "from tqdm import tqdm as tqdm\n", + "import matplotlib.pyplot as plt\n", + "import seaborn as sns\n", "import utils\n", "import main\n", "import sys" ] }, - { - "cell_type": "code", - "execution_count": 34, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "-1.346652" - ] - }, - "execution_count": 34, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# save_prediction" - ] - }, - { - "cell_type": "code", - "execution_count": 33, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "[[[ 0.09771803]]\n", - "\n", - " [[-1.16380941]]]\n", - "[[[-1.44437387]]\n", - "\n", - " [[ 0.30727828]]]\n", - "[[[-1.34665584]]\n", - "\n", - " [[-0.85653113]]]\n" - ] - } - ], - "source": [ - "a=np.random.randn(2,1,1)\n", - "b=np.random.randn(2,1,1)\n", - "print(a)\n", - "print(b)\n", - "l=a+b\n", - "print(l)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Not useful " - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# steps in test time:\n", - " # two 3D np arrays: pred_feat and pred_label\n", - " # add padding in case they have incompatible shape\n", - " # concatenate to get a single 3D array\n", - " # get a valid_mask: A 2d array which has nan if any of the bands value is nan for every pixel.\n", - " # filtered_stacked_data: nan for all bands if any band is nan otherwise the actual values \n", - " # pred_feat is the 9 bands \n", - " # pred label is the 2D array(1st band)\n", - " # we compute pred_feat_mask and pred_label_mask which is useless\n", - " # \n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# Done with scale, resolution and zoom level.\n", - "# scale is basivally how \n", - "# Each band of an image can have diff scale and it can change depending on the image, the band is present in.\n", - "# Earth engine looks at the scale specified y the output and then finds out the approprate level of image pyramid to use as input.\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# when we ingect an image in an earth engine:\n", - " # It is ingested at native resolution(the lowest level of image pyramid)\n", - " # the image data are also aggregated to create higher pyramid levels.\n", - " # The image data is aggregated until it fits within a 256*256 pixel tile.\n", - " # " - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# Scale==pixel resolution\n", - "# Zoom level\n", - "# Image assests exists at multiple scales, in image pyramid\n", - "# The lowest level of the image is the image at native resolution. During ingestion the data are aggregated to create high pyramid levels. The two ways of aggregration are:\n", - " # For continous values images: As we level up in pyramid, we take mean of 2*2 pixels \n", - " # For discrete values images: As we level up in pyramid, we take a sample (usually the top left pixel) of pixels at the next lower level\n", - "# We do the agregration till the entire image fits within 256*256 pixel tile.\n", - "# Each of this undivided squares which is divided into 4 squares is called a tile.\n", - "# " - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "'f.read() reads the entire contents of the file into memory.\\n BytesIO(f.read()) wraps the file bytes into a BytesIO object so it can be treated like a file object.\\n\\nLoad the TIFF File:\\n\\n rasterio_open(file_bytes) is used to open the file using the rasterio library, which is designed for working with geospatial raster data (e.g., GeoTIFF files).\\n This function converts the in-memory BytesIO object into a raster data object.\\n\\nReturn the TIFF Object:\\n\\n The TIFF file object is returned for further processing'" - ] - }, - "execution_count": 5, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# we have an instance(object) to the GCSfilesystem, we can reda , write, manage files, open files directly from Google Cloud Storage without downloading them locally.\n", - "# Now we can open the files with this opject and read it in binary mode.\n", - "# Read File Data:\n", - "\"\"\"f.read() reads the entire contents of the file into memory.\n", - " BytesIO(f.read()) wraps the file bytes into a BytesIO object so it can be treated like a file object.\n", - "\n", - "Load the TIFF File:\n", - "\n", - " rasterio_open(file_bytes) is used to open the file using the rasterio library, which is designed for working with geospatial raster data (e.g., GeoTIFF files).\n", - " This function converts the in-memory BytesIO object into a raster data object.\n", - "\n", - "Return the TIFF Object:\n", - "\n", - " The TIFF file object is returned for further processing\"\"\"\n", - "# Google cloud has multiple projects\n", - "# each project has its won resources(buckets, databases and compute instances)\n", - "# storage.Client(project=PROJECT_ID) creates an object that allows you to interact with resources belonging to a specific project identified by PROJECT_ID.\n", - "# Buckets are the top-level containers in Google Cloud Storage (like a drive or main folder).\n", - "# Inside a bucket, you can organize files using folder-like paths (e.g., folder1/subfolder/file.txt).\n", - "# Use the storage.Client to get a reference to the bucket with client.get_bucket(bucket_name)\n", - "\n", - "\n", - "# gcs_path is the path of the folder relative to your google cloud storage(has gs://bucket_name/path_to_folder)\n", - "# function that retrieves the bucket name file name on which the tiff is stored\n", - "# client.get_bucket(bucket_name) return an object that refers to that particular bucket\n", - "# In the end we need to close the connection to google cloud storage \n", - "\n", - "# Ideally, we get two tiff files(features and labels), the number of data points are m*n where each data point has b bands\n", - "# Removing the points which has -1 values (corresponsiding label)\n", - "# divioding the grid into train test split \n", - "# " - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "metadata": {}, - "outputs": [], - "source": [ - "# test_features\n", - "# test_features_padded\n", - "# stacked_data\n", - "# valid_mask a 2D which is nan if any of the bands is nan or label is -1\n", - "# filtered_stacked_data is 3D on the basis of valid_mask (all 10 bands(9+label) is nan if valid_mask is nan otherwise org feature +label) \n", - "# pred_feat is the 9 bands features\n", - "# pred_label is the firts channel\n", - "# nan_pred_feat and nan_pred_label are the \n" - ] - }, - { - "cell_type": "code", - "execution_count": 12, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "[[ 3. -1. -1. 3.]\n", - " [ 3. 1. 1. 3.]\n", - " [ 3. -1. -1. 3.]\n", - " [ 3. -1. -1. 2.]\n", - " [ 3. -1. -1. 3.]\n", - " [ 3. 1. 1. 3.]\n", - " [ 3. -1. -1. 3.]\n", - " [ 3. -1. -1. 2.]]\n" - ] - } - ], - "source": [ - "# even if it ignores nan, if all values are nan, it gives nan.\n", - "\n", - "import numpy as np\n", - "label_np=np.array([[3., -1, -1., 3.],\n", - " [3., 1., 1., 3.],[3., -1, -1., 3.],\n", - " [3., -1., -1., 2.],[3., -1, -1., 3.],\n", - " [3., 1., 1., 3.],[3., -1, -1., 3.],\n", - " [3., -1., -1., 2.]])\n", - "print(label_np)\n", - "\n", - "import numpy as np\n", - "\n", - "label_np=np.where(label_np==-1, np.nan, label_np)\n", - "print(label_np)\n", - "data=[]\n", - "for i in range(1,6):\n", - " for j in range(1,3):\n", - " temp_data=label_np[i-1:i+1,j-1:j+1]\n", - " print(temp_data)\n", - " print(np.nanmean(temp_data,axis=(0,1)))\n", - "\n", - " data.append(np.nanmean(temp_data,axis=(0,1)))\n", - " # label.append(data[0,i,j])\n", - " \n", - "print(data)" - ] - }, - { - "cell_type": "code", - "execution_count": 13, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "[[nan nan]\n", - " [nan nan]]\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/tmp/ipykernel_18005/3690321154.py:3: RuntimeWarning: Mean of empty slice\n", - " np.nanmean(temp,axis=1)\n" - ] - }, - { - "data": { - "text/plain": [ - "array([nan, nan])" - ] - }, - "execution_count": 13, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "temp=label_np[1,:2,1:3]\n", - "print(temp)\n", - "np.nanmean(temp,axis=1)" - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "[[[ 3. nan nan 3.]\n", - " [ 3. 1. 1. 3.]]\n", - "\n", - " [[ 3. nan nan 3.]\n", - " [ 3. nan nan 2.]]]\n", - "[[ True False False True]\n", - " [ True False False True]]\n", - "[[3. 3. 3. 3.]\n", - " [3. 3. 3. 2.]]\n", - "[[[3. 6. 7. 3.]\n", - " [3. 1. 1. 3.]]\n", - "\n", - " [[3. 1. 2. 3.]\n", - " [3. 4. 1. 2.]]]\n", - "[3.25 2.75]\n", - "(2, 4, 1, 1)\n" - ] - } - ], - "source": [ - "import numpy as np\n", - "label_np=np.array([[[3., -1, -1., 3.],\n", - " [3., 1., 1., 3.]],[[3., -1, -1., 3.],\n", - " [3., -1., -1., 2.]]])\n", - "label_np=np.where(label_np==-1, np.nan, label_np)\n", - "print(label_np)\n", - "mask=~np.isnan(label_np).any(axis=0)\n", - "print(mask)\n", - "masked_train_labels =label_np[:,mask]\n", - "print(masked_train_labels)\n", - "data=np.array([[[3., 6, 7., 3.],\n", - " [3., 1., 1., 3.]],[[3., 1, 2., 3.],\n", - " [3., 4., 1., 2.]]])\n", - "print(data)\n", - "d=data[:,0:2,0:2]\n", - "print(d.mean(axis=(1,2)))\n", - "a=[]\n", - "a.append(np.random.randn(4,1,1))\n", - "a.append(np.random.randn(4,1,1))\n", - "b=np.array(a)\n", - "print(b.shape)\n" - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "[[[False True True False]\n", - " [False False False False]]\n", - "\n", - " [[False True True False]\n", - " [False True True False]]]\n", - "[[False True True False]\n", - " [False True True False]]\n", - "last one [[False True True False]\n", - " [False True True False]]\n" - ] - } - ], - "source": [ - "label_np=np.array([[[3., -1, -1., 3.],\n", - " [3., 1., 1., 3.]],[[3., -1, -1., 3.],\n", - " [3., -1., -1., 3.]]])\n", - "label_np=np.where(label_np==-1,np.nan,label_np)\n", - "nan_label=np.isnan(label_np)\n", - "print(nan_label)\n", - "nan_label_2=np.any(np.isnan(label_np),axis=0)\n", - "print(nan_label_2)\n", - "nan_label_3= np.isnan(label_np).any(axis=0)\n", - "print(\"last one\",nan_label_3)" - ] - }, - { - "cell_type": "code", - "execution_count": 9, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "block size 4\n", - "0 0\n", - "0 4\n", - "4 0\n", - "4 4\n" - ] - } - ], - "source": [ - "import rasterio\n", - "from rasterio.windows import Window\n", - "\n", - "height=8\n", - "width=8\n", - "block_coverage=1\n", - "total_blocks=4\n", - "total_pixels = height*width\n", - "pixels_covered = total_pixels*block_coverage\n", - "block_size = int(np.sqrt(pixels_covered/total_blocks))\n", - "print(\"block size\",block_size)\n", - "\n", - "# Generate windows for each grid\n", - "windows = []\n", - "for i in range(0, height-block_size+1, block_size):\n", - " for j in range(0, width-block_size+1, block_size):\n", - " print(i,j)\n", - "\n", - " windows.append(Window(j, i, block_size, block_size))" - ] - }, { "cell_type": "markdown", "metadata": {}, @@ -423,12 +33,10 @@ "metadata": {}, "outputs": [], "source": [ - "train_feat_file=\"/home/nikki/Downloads/urbanization_data_export_22122024_2010-01-01_urban_feat.tif\"\n", - "train_label_file=\"/home/nikki/Downloads/urbanization_data_export_22122024_2010-01-01_urban_label.tif\"\n", - "test_feat_file=\"/home/nikki/Downloads/urbanization_data_export_22122024_2015-01-01_urban_feat.tiff\"\n", - "test_label_file=\"/home/nikki/Downloads/urbanization_data_export_22122024_2015-01-01_urban_label.tiff\"\n", - "output_path=\"output_file_conv_static.tiff\"\n", "\n", + "data_dir = \"gs://earth-engine-seminar/urbanization/data/export_22122024\"\n", + "output_path=\"prediction_tiff.tiff\"\n", + "filter_size=5\n", "block_coverage=0.3\n", "total_blocks=100\n", "test_size=0.3" @@ -448,80 +56,39 @@ "outputs": [], "source": [ "\n", - "train_features=utils.rasterio_open(train_feat_file)\n", - "train_labels=utils.rasterio_open(train_label_file)\n", - "test_features=utils.rasterio_open(test_feat_file)\n", - "test_label=utils.rasterio_open(test_label_file)\n", + "labels = utils.files_in_dir(data_dir, \"label.tif\")\n", + "features = utils.files_in_dir(data_dir, \"feat.tif\")\n", + "train_labels = utils.files_in_dir(data_dir, \"label.tif\")\n", + "train_features = utils.files_in_dir(data_dir, \"feat.tif\")\n", + "test_labels = utils.load_tif_data(labels[0])\n", + "test_features = utils.load_tif_data(features[0])\n", "\n", "\n" ] }, { - "cell_type": "code", - "execution_count": 9, + "cell_type": "markdown", "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "2238\n", - "2211\n" - ] - }, - { - "data": { - "text/plain": [ - "(9, 2211, 2238)" - ] - }, - "execution_count": 9, - "metadata": {}, - "output_type": "execute_result" - } - ], "source": [ - "print(train_features.metadata['width'])\n", - "print(train_features.metadata['height'])\n", - "train_features.data.shape" + "## Pre-processing" ] }, { - "cell_type": "code", - "execution_count": 29, + "cell_type": "markdown", "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "('pdsi',\n", - " 'pr',\n", - " 'pr_1',\n", - " 'tmmn',\n", - " 'tmmx',\n", - " 'NDVI',\n", - " 'EVI',\n", - " 'LC_Type1',\n", - " 'population_density')" - ] - }, - "execution_count": 29, - "metadata": {}, - "output_type": "execute_result" - } - ], "source": [ - "train_features.bands" + "### Removing dublicate features from train and test set of features" ] }, { "cell_type": "code", - "execution_count": 4, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ + "\n", "train_feature=np.concatenate((train_features.data[0,:,:].reshape(1,train_features.metadata['height'],train_features.metadata['width'] ), train_features.data[2:,:,:]), axis=0)\n", - "test_feature= np.concatenate((test_features.data[0,:,:].reshape(1,test_features.metadata['height'],test_features.metadata['width']),test_features.data[2:,:,:]),axis=0)\n" + "test_feature= np.concatenate((test_features.data[0,:,:].reshape(1,test_features.metadata['height'],test_features.metadata['width']),test_features.data[2:,:,:]),axis=0)" ] }, { @@ -533,33 +100,6 @@ "name": "stdout", "output_type": "stream", "text": [ - "(8, 2211, 2238)\n", - "(8, 2211, 2238)\n" - ] - } - ], - "source": [ - "print(train_feature.shape)\n", - "print(test_feature.shape)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Pre-processing" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "block size 121\n", "block size 121\n" ] }, @@ -567,251 +107,57 @@ "name": "stderr", "output_type": "stream", "text": [ - " 0%| | 0/226 [00:00" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "import matplotlib.pyplot as plt\n", - "import seaborn as sns\n", + "print(np.std(train_normalized,axis=0))\n", + "coerr=np.corrcoef(train, rowvar=False)\n", + "\n", + "\n", "\n", "plt.figure(figsize=(12, 8))\n", "sns.heatmap(coerr, annot=True, xticklabels=['Palmer Drought Severity Index','Precipitation accumulation',\n", @@ -830,31 +176,13 @@ }, { "cell_type": "code", - "execution_count": 33, - "metadata": {}, - "outputs": [], - "source": [ - "y_train= y_train.astype(np.int32)\n", - "x_train=x_train.astype(np.float64)\n", - "x_val=x_val.astype(np.float64)\n", - "y_val=y_val.astype(np.int32)\n", - "x_test=x_test.astype(np.float64)\n", - "y_test=y_test.astype(np.int32)" - ] - }, - { - "cell_type": "code", - "execution_count": 34, + "execution_count": 7, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ - "/home/nikki/Downloads/venv/lib/python3.12/site-packages/sklearn/preprocessing/_label.py:93: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().\n", - " y = column_or_1d(y, warn=True)\n", - "/home/nikki/Downloads/venv/lib/python3.12/site-packages/sklearn/preprocessing/_label.py:129: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().\n", - " y = column_or_1d(y, dtype=self.classes_.dtype, warn=True)\n", "/home/nikki/Downloads/venv/lib/python3.12/site-packages/sklearn/utils/deprecation.py:151: FutureWarning: 'force_all_finite' was renamed to 'ensure_all_finite' in 1.6 and will be removed in 1.8.\n", " warnings.warn(\n" ] @@ -863,20 +191,22 @@ "name": "stdout", "output_type": "stream", "text": [ - "[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.019087 seconds.\n", + "[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000569 seconds.\n", "You can set `force_row_wise=true` to remove the overhead.\n", "And if memory is not enough, you can set `force_col_wise=true`.\n", - "[LightGBM] [Info] Total Bins 3553\n", - "[LightGBM] [Info] Number of data points in the train set: 2124782, number of used features: 15\n", - "[LightGBM] [Info] Start training from score -5.972067\n", - "[LightGBM] [Info] Start training from score -7.375494\n", - "[LightGBM] [Info] Start training from score -6.251169\n", - "[LightGBM] [Info] Start training from score -0.005117\n" + "[LightGBM] [Info] Total Bins 3483\n", + "[LightGBM] [Info] Number of data points in the train set: 18200, number of used features: 15\n", + "[LightGBM] [Info] Start training from score -6.551080\n", + "[LightGBM] [Info] Start training from score -7.611952\n", + "[LightGBM] [Info] Start training from score -6.198259\n", + "[LightGBM] [Info] Start training from score -0.003964\n", + "[LightGBM] [Warning] No further splits with positive gain, best gain: -inf\n", + "[LightGBM] [Warning] No further splits with positive gain, best gain: -inf\n" ] } ], "source": [ - "import lightgbm as lgbm\n", + "\n", "model = lgbm.LGBMClassifier(objective=\"multiclass\", num_class=4)\n", "main.train(model,x_train,y_train,x_val,y_val)\n" ] @@ -890,7 +220,7 @@ }, { "cell_type": "code", - "execution_count": 38, + "execution_count": 8, "metadata": {}, "outputs": [ { @@ -903,12 +233,19 @@ } ], "source": [ - "test_pred,cm,report=main.predict(model,x_train,y_train)" + "test_pred,cm,report=main.predict(model,x_test,y_test)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Results" ] }, { "cell_type": "code", - "execution_count": 39, + "execution_count": 9, "metadata": {}, "outputs": [ { @@ -917,14 +254,14 @@ "text": [ " precision recall f1-score support\n", "\n", - " 0 0.21 0.09 0.13 5416\n", - " 1 0.11 0.05 0.07 1331\n", - " 2 0.12 0.02 0.03 4097\n", - " 3 1.00 1.00 1.00 2113938\n", + " 0 0.01 0.04 0.02 8586\n", + " 1 0.01 0.06 0.01 2260\n", + " 2 0.02 0.11 0.04 6524\n", + " 3 1.00 0.98 0.99 3286990\n", "\n", - " accuracy 0.99 2124782\n", - " macro avg 0.36 0.29 0.31 2124782\n", - "weighted avg 0.99 0.99 0.99 2124782\n", + " accuracy 0.98 3304360\n", + " macro avg 0.26 0.30 0.27 3304360\n", + "weighted avg 0.99 0.98 0.98 3304360\n", "\n" ] } @@ -935,21 +272,22 @@ }, { "cell_type": "code", - "execution_count": 40, + "execution_count": 10, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "[[ 484 8 1 4923]\n", - " [ 22 63 2 1244]\n", - " [ 25 2 71 3999]\n", - " [ 1734 478 530 2111196]]\n" + "[[ 318 489 152 7627]\n", + " [ 127 143 358 1632]\n", + " [ 213 115 717 5479]\n", + " [ 20568 16523 28307 3221592]]\n" ] } ], "source": [ + "# Confusion matrix\n", "print(cm)" ] }, @@ -960,250 +298,14 @@ "## Saving" ] }, - { - "cell_type": "code", - "execution_count": 16, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "(2211, 2239)\n", - "(4950429,)\n", - "4950429\n", - "4950429 (4950429,) 3003316\n", - "| 0.01, 0.00, 67.81|\n", - "| 0.00,-0.01, 37.58|\n", - "| 0.00, 0.00, 1.00|\n" - ] - }, - { - "ename": "ValueError", - "evalue": "NumPy boolean array indexing assignment cannot assign 3003316 input values to the 3312817 output values where the mask is true", - "output_type": "error", - "traceback": [ - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mValueError\u001b[0m Traceback (most recent call last)", - "Cell \u001b[0;32mIn[16], line 17\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[38;5;66;03m# def save_prediction(test_profile, pred_label_mask,test_pred,output_path):\u001b[39;00m\n\u001b[1;32m 2\u001b[0m \u001b[38;5;66;03m# shape=pred_label_mask.shape\u001b[39;00m\n\u001b[1;32m 3\u001b[0m \u001b[38;5;66;03m# print(shape)\u001b[39;00m\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 15\u001b[0m \u001b[38;5;66;03m# with rasterio.open(output_path,\"w\",driver=\"GTiff\",height=shape[0],width=shape[1],count=1, dtype=pred_array.dtype,crs=test_profile['crs'],transform=transform) as dst:\u001b[39;00m\n\u001b[1;32m 16\u001b[0m \u001b[38;5;66;03m# dst.write(pred_array, 1)\u001b[39;00m\n\u001b[0;32m---> 17\u001b[0m \u001b[43mmain\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43msave_prediction\u001b[49m\u001b[43m(\u001b[49m\u001b[43mtest_features\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mprofile\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mtest_mask\u001b[49m\u001b[43m,\u001b[49m\u001b[43mtest_pred\u001b[49m\u001b[43m,\u001b[49m\u001b[43moutput_path\u001b[49m\u001b[43m)\u001b[49m\n", - "File \u001b[0;32m~/Downloads/urbanan_google_earth/main.py:187\u001b[0m, in \u001b[0;36msave_prediction\u001b[0;34m(test_profile, pred_label_mask, test_pred, output_path)\u001b[0m\n\u001b[1;32m 183\u001b[0m \u001b[38;5;28mprint\u001b[39m(\u001b[38;5;28mlen\u001b[39m(valid_pred_mask),pred_array\u001b[38;5;241m.\u001b[39mshape, \u001b[38;5;28mlen\u001b[39m(test_pred))\n\u001b[1;32m 184\u001b[0m \u001b[38;5;28mprint\u001b[39m(transform)\n\u001b[0;32m--> 187\u001b[0m \u001b[43mpred_array\u001b[49m\u001b[43m[\u001b[49m\u001b[43mvalid_pred_mask\u001b[49m\u001b[43m]\u001b[49m \u001b[38;5;241m=\u001b[39m test_pred\n\u001b[1;32m 188\u001b[0m pred_array \u001b[38;5;241m=\u001b[39m pred_array\u001b[38;5;241m.\u001b[39mreshape((shape[\u001b[38;5;241m0\u001b[39m], shape[\u001b[38;5;241m1\u001b[39m]))\n\u001b[1;32m 189\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m rasterio\u001b[38;5;241m.\u001b[39mopen(output_path,\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mw\u001b[39m\u001b[38;5;124m\"\u001b[39m,driver\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mGTiff\u001b[39m\u001b[38;5;124m\"\u001b[39m,height\u001b[38;5;241m=\u001b[39mshape[\u001b[38;5;241m0\u001b[39m],width\u001b[38;5;241m=\u001b[39mshape[\u001b[38;5;241m1\u001b[39m],count\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m1\u001b[39m, dtype\u001b[38;5;241m=\u001b[39mpred_array\u001b[38;5;241m.\u001b[39mdtype,crs\u001b[38;5;241m=\u001b[39mtest_profile[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mcrs\u001b[39m\u001b[38;5;124m'\u001b[39m],transform\u001b[38;5;241m=\u001b[39mtransform) \u001b[38;5;28;01mas\u001b[39;00m dst:\n", - "\u001b[0;31mValueError\u001b[0m: NumPy boolean array indexing assignment cannot assign 3003316 input values to the 3312817 output values where the mask is true" - ] - } - ], - "source": [ - "def save_prediction(test_profile, test_pred,output_path):\n", - " shape=(2211,2239)\n", - " print(shape)\n", - " valid_pred_mask=pred_label_mask.flatten()\n", - " pred_array=np.full((shape[0]*shape[1]),np.nan)\n", - " print(pred_array.shape)\n", - " print(len(valid_pred_mask))\n", - " transform=test_profile['transform']\n", - " print(len(valid_pred_mask),pred_array.shape, len(test_pred))\n", - " print(transform)\n", - " \n", - "\n", - " pred_array[valid_pred_mask] = test_pred\n", - " pred_array = pred_array.reshape((shape[0], shape[1]))\n", - " with rasterio.open(output_path,\"w\",driver=\"GTiff\",height=shape[0],width=shape[1],count=1, dtype=pred_array.dtype,crs=test_profile['crs'],transform=transform) as dst:\n", - " dst.write(pred_array, 1)\n", - "main.save_prediction(test_features.profile,test_pred,output_path)" - ] - }, - { - "cell_type": "code", - "execution_count": 19, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "(3003316,)" - ] - }, - "execution_count": 19, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "test_pred.shape" - ] - }, - { - "cell_type": "code", - "execution_count": 20, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "(3003316,)" - ] - }, - "execution_count": 20, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "y_test.shape" - ] - }, - { - "cell_type": "code", - "execution_count": 21, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "4950429" - ] - }, - "execution_count": 21, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "2211*2239" - ] - }, - { - "cell_type": "code", - "execution_count": 17, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "(2211, 2239)" - ] - }, - "execution_count": 17, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "test_mask.shape" - ] - }, - { - "cell_type": "code", - "execution_count": 18, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "(2211, 2239)\n", - "(4950429,)\n", - "4950429\n", - "4950429 (4950429,) 3003316\n", - "| 0.01, 0.00, 67.81|\n", - "| 0.00,-0.01, 37.58|\n", - "| 0.00, 0.00, 1.00|\n" - ] - }, - { - "ename": "ValueError", - "evalue": "NumPy boolean array indexing assignment cannot assign 3003316 input values to the 3312817 output values where the mask is true", - "output_type": "error", - "traceback": [ - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mValueError\u001b[0m Traceback (most recent call last)", - "Cell \u001b[0;32mIn[18], line 13\u001b[0m\n\u001b[1;32m 9\u001b[0m \u001b[38;5;28mprint\u001b[39m(\u001b[38;5;28mlen\u001b[39m(valid_pred_mask),pred_array\u001b[38;5;241m.\u001b[39mshape, \u001b[38;5;28mlen\u001b[39m(test_pred))\n\u001b[1;32m 10\u001b[0m \u001b[38;5;28mprint\u001b[39m(transform)\n\u001b[0;32m---> 13\u001b[0m \u001b[43mpred_array\u001b[49m\u001b[43m[\u001b[49m\u001b[43mvalid_pred_mask\u001b[49m\u001b[43m]\u001b[49m \u001b[38;5;241m=\u001b[39m test_pred\n\u001b[1;32m 14\u001b[0m pred_array \u001b[38;5;241m=\u001b[39m pred_array\u001b[38;5;241m.\u001b[39mreshape((shape[\u001b[38;5;241m0\u001b[39m], shape[\u001b[38;5;241m1\u001b[39m]))\n\u001b[1;32m 15\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m rasterio\u001b[38;5;241m.\u001b[39mopen(output_path,\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mw\u001b[39m\u001b[38;5;124m\"\u001b[39m,driver\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mGTiff\u001b[39m\u001b[38;5;124m\"\u001b[39m,height\u001b[38;5;241m=\u001b[39mshape[\u001b[38;5;241m0\u001b[39m],width\u001b[38;5;241m=\u001b[39mshape[\u001b[38;5;241m1\u001b[39m],count\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m1\u001b[39m, dtype\u001b[38;5;241m=\u001b[39mpred_array\u001b[38;5;241m.\u001b[39mdtype,crs\u001b[38;5;241m=\u001b[39mtest_profile[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mcrs\u001b[39m\u001b[38;5;124m'\u001b[39m],transform\u001b[38;5;241m=\u001b[39mtransform) \u001b[38;5;28;01mas\u001b[39;00m dst:\n", - "\u001b[0;31mValueError\u001b[0m: NumPy boolean array indexing assignment cannot assign 3003316 input values to the 3312817 output values where the mask is true" - ] - } - ], - "source": [ - "test_profile=test_features.profile\n", - "shape=(2211,2239)\n", - "print(shape)\n", - "valid_pred_mask=test_mask.flatten()\n", - "pred_array=np.full((shape[0]*shape[1]),np.nan)\n", - "print(pred_array.shape)\n", - "print(len(valid_pred_mask))\n", - "transform=test_profile['transform']\n", - "print(len(valid_pred_mask),pred_array.shape, len(test_pred))\n", - "print(transform)\n", - "\n", - "\n", - "pred_array[valid_pred_mask] = test_pred\n", - "pred_array = pred_array.reshape((shape[0], shape[1]))\n", - "with rasterio.open(output_path,\"w\",driver=\"GTiff\",height=shape[0],width=shape[1],count=1, dtype=pred_array.dtype,crs=test_profile['crs'],transform=transform) as dst:\n", - " dst.write(pred_array, 1)\n", - "\n" - ] - }, - { - "cell_type": "code", - "execution_count": 11, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "CRS.from_wkt('GEOGCS[\"unknown\",DATUM[\"unknown\",SPHEROID[\"Spheroid\",6378137,298.257223563]],PRIMEM[\"Greenwich\",0],UNIT[\"degree\",0.0174532925199433,AUTHORITY[\"EPSG\",\"9122\"]],AXIS[\"Latitude\",NORTH],AXIS[\"Longitude\",EAST]]')" - ] - }, - "execution_count": 11, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "test_profile['crs']" - ] - }, - { - "cell_type": "code", - "execution_count": 12, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "{'driver': 'GTiff', 'dtype': 'float64', 'nodata': None, 'width': 2239, 'height': 2211, 'count': 1, 'crs': CRS.from_wkt('GEOGCS[\"unknown\",DATUM[\"unknown\",SPHEROID[\"Spheroid\",6378137,298.257223563]],PRIMEM[\"Greenwich\",0],UNIT[\"degree\",0.0174532925199433,AUTHORITY[\"EPSG\",\"9122\"]],AXIS[\"Latitude\",NORTH],AXIS[\"Longitude\",EAST]]'), 'transform': Affine(0.013474729261792823, 0.0, 67.81374585363181,\n", - " 0.0, -0.013474729261792823, 37.58330317162592), 'blockxsize': 2239, 'blockysize': 1, 'tiled': False, 'interleave': 'band'}\n" - ] - } - ], - "source": [ - "out=utils.rasterio_open(\"output_file_1.tiff\")\n", - "print(out.profile)" - ] - }, { "cell_type": "code", "execution_count": 14, "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "{'driver': 'GTiff', 'dtype': 'float32', 'nodata': None, 'width': 2238, 'height': 2211, 'count': 9, 'crs': CRS.from_wkt('GEOGCS[\"unknown\",DATUM[\"unknown\",SPHEROID[\"Spheroid\",6378137,298.257223563]],PRIMEM[\"Greenwich\",0],UNIT[\"degree\",0.0174532925199433,AUTHORITY[\"EPSG\",\"9122\"]],AXIS[\"Latitude\",NORTH],AXIS[\"Longitude\",EAST]]'), 'transform': Affine(0.013474729261792823, 0.0, 67.81374585363181,\n", - " 0.0, -0.013474729261792823, 37.58330317162592), 'blockxsize': 256, 'blockysize': 256, 'tiled': True, 'compress': 'lzw', 'interleave': 'pixel'}\n" - ] - } - ], + "outputs": [], "source": [ - "print(test_profile)" + "main.save_prediction(test_labels,filter_size,test_mask,test_pred,output_path)" ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] } ], "metadata": { diff --git a/nikki_exp_conv_temporal_static/main.py b/nikki_exp_conv_temporal_static/main.py index fdc7cb9..dfb6fa0 100644 --- a/nikki_exp_conv_temporal_static/main.py +++ b/nikki_exp_conv_temporal_static/main.py @@ -69,16 +69,6 @@ def get_sampled_data(data, windows): -def train(model,x_train,y_train,x_val,y_val): - - model=model.fit(x_train,y_train,eval_set=[(x_val,y_val)]) - - -def predict(model,x_test,y_test): - y_pred=model.predict(x_test) - cm = confusion_matrix(y_test, y_pred, labels=[0,1,2,3]) - report = classification_report(y_test, y_pred, labels=[0,1,2,3]) - return y_pred,cm,report def get_sampled_test(test_data): new_data=[] @@ -94,6 +84,47 @@ def get_sampled_test(test_data): return new_data, new_label + +def get_sampled_spatial_convolve_data(data,windows): + new_data=[] + new_label=[] + + for window in tqdm(windows): + row_start = int(window.row_off) + row_stop = int(window.row_off + window.height) + col_start = int(window.col_off) + col_stop = int(window.col_off + window.width) + + for i in range(row_start+2,row_stop-2): + for j in range(col_start+2, col_stop-2): + mean_data=np.concatenate((data[1:7,i-2:i+3,j-2:j+3], data[8:9,i-2:i+3,j-2:j+3]),axis=0) + static_data=data[1:,i,j] + new_data.append(np.concatenate(((np.nanmean(mean_data,axis=(1,2))),static_data),axis=0 )) + new_label.append(data[0,i,j]) + + return new_data,new_label + + +def get_sampled_spatial_temporal_convolve_data(data,windows): + + new_data=[] + for window in tqdm(windows): + row_start = int(window.row_off) + row_stop = int(window.row_off + window.height) + col_start = int(window.col_off) + col_stop = int(window.col_off + window.width) + for i in range(row_start+2,row_stop-2): + for j in range(col_start+2, col_stop-2): + temp_data=data[:,i-2:i+2,j-2:j+2] + + print(temp_data) + + new_data.append(np.nanmean(temp_data,axis=(0,1,2))) + + return new_data + + + def save_prediction(test_labels, filter_size,pred_label_mask,test_pred,output_path): test_profile=test_labels.profile @@ -169,51 +200,19 @@ def pre_process(train_features, train_labels,test_features,test_labels, block_co return x_train,y_train,x_test,y_test,x_val,y_val,train_mask, test_mask, val_mask -def get_sampled_spatial_convolve_data(data,windows): - new_data=[] - new_label=[] - - for window in tqdm(windows): - row_start = int(window.row_off) - row_stop = int(window.row_off + window.height) - col_start = int(window.col_off) - col_stop = int(window.col_off + window.width) - - for i in range(row_start+2,row_stop-2): - for j in range(col_start+2, col_stop-2): - mean_data=np.concatenate((data[1:7,i-2:i+3,j-2:j+3], data[8:9,i-2:i+3,j-2:j+3]),axis=0) - static_data=data[1:,i,j] - new_data.append(np.concatenate(((np.nanmean(mean_data,axis=(1,2))),static_data),axis=0 )) - new_label.append(data[0,i,j]) - - return new_data,new_label - - -def get_sampled_spatial_temporal_convolve_data(data,windows): - - new_data=[] - for window in tqdm(windows): - row_start = int(window.row_off) - row_stop = int(window.row_off + window.height) - col_start = int(window.col_off) - col_stop = int(window.col_off + window.width) - for i in range(row_start+2,row_stop-2): - for j in range(col_start+2, col_stop-2): - temp_data=data[:,i-2:i+2,j-2:j+2] - - print(temp_data) - - new_data.append(np.nanmean(temp_data,axis=(0,1,2))) - - return new_data - - +def train(model,x_train,y_train,x_val,y_val): + model=model.fit(x_train,y_train,eval_set=[(x_val,y_val)]) +def predict(model,x_test,y_test): + y_pred=model.predict(x_test) + cm = confusion_matrix(y_test, y_pred, labels=[0,1,2,3]) + report = classification_report(y_test, y_pred, labels=[0,1,2,3]) + return y_pred,cm,report diff --git a/nikki_exp_conv_temporal_static/utils.py b/nikki_exp_conv_temporal_static/utils.py index b0804aa..190f2a0 100644 --- a/nikki_exp_conv_temporal_static/utils.py +++ b/nikki_exp_conv_temporal_static/utils.py @@ -1,8 +1,8 @@ import numpy as np import rasterio -# import gcsfs +import gcsfs from io import BytesIO -# from google.cloud import storage +from google.cloud import storage from dataclasses import dataclass from typing import Any, Dict, Optional, List @@ -82,3 +82,149 @@ def files_in_dir(bucket_path, file_extension): + + + + + + + + + + + + + + + + + + + + +def pre_process(train_features, train_labels,test_features,test_labels, block_coverage, total_blocks,test_size): + + train_labels=np.where(train_labels==-1, np.nan, train_labels) + test_labels=np.where(test_labels==-1, np.nan,test_labels) + train_axis_pad=padding(train_features,train_labels) + test_axis_pad=padding(test_features,test_labels) + train_features=np.pad(train_features, ((0,0),(0,train_axis_pad[1]),(0,train_axis_pad[2])),mode='constant',constant_values=np.nan) + # print("after padding",train_features) + test_features=np.pad(test_features,((0,0),(0,test_axis_pad[1]),(0,test_axis_pad[2])),mode='constant',constant_values=np.nan) + train_stacked=np.concatenate((train_labels,train_features),axis=0) + test_stacked=np.concatenate((test_labels,test_features),axis=0) + train_valid_mask=~np.isnan(train_stacked).any(axis=0) + test_valid_mask=~np.isnan(test_stacked).any(axis=0) + train_stacked=np.where(train_valid_mask,train_stacked,np.nan) + test_data=np.where(test_valid_mask,test_stacked,np.nan) + height, width = train_stacked.shape[1], train_stacked.shape[2] + windows=window(block_coverage,total_blocks,height,width) + train_windows, val_windows = train_test_split(windows, test_size=test_size, random_state=42) + train_final_data,train_final_label= get_sampled_spatial_convolve_data(train_stacked, train_windows) + + val_final_data,val_final_label = get_sampled_spatial_convolve_data(train_stacked, val_windows) + train_final_data,train_final_label=np.array(train_final_data), np.array(train_final_label) + val_final_data,val_final_label=np.array(val_final_data), np.array(val_final_label) + print(train_final_data.shape, train_final_label.shape) + print(val_final_data.shape, val_final_label.shape) + + # train_data,train_final_data= get_sampled_spatial_temporal_convolve_data(train_stacked, train_windows) + + # val_data,val_final_data = get_sampled_spatial_temporal_convolve_data(train_stacked, val_windows) + # print(train_final_data.shape) + # print(val_final_data.shape) + # train_final_data=np.array(train_final_data) + # val_final_data=np.array(val_final_data) + # print(train_final_data.shape) + # print(val_final_data.shape) + nan_train_feat = ~np.any(np.isnan(train_final_data),axis=1) + nan_val_feat=~np.any(np.isnan(val_final_data),axis=1) + train_feat_=train_final_data[nan_train_feat] + val_feat_=val_final_data[nan_val_feat] + train_label_=train_final_label[nan_train_feat] + val_label_=val_final_label[nan_val_feat] + # train_feat_=train_final_data[:,1:] + # train_label_=train_final_data[:,0] + # val_feat_=val_final_data[:,1:] + # val_label_=val_final_data[:,0] + # print(train_feat_.shape) + # print(train_label_.shape) + # print(val_feat_.shape) + # print(val_label_.shape) + + + # train_feat =train_data[1:,:,:] + # train_label=train_data[0,:,:] + # val_feat=val_data[1:,:,:] + # val_label=val_data[0,:,:] + test_feat=test_data[1:,:,:] + test_label=test_data[0,:,:] + test_feat=test_feat[:,test_valid_mask].transpose() + test_label=test_label[test_valid_mask] + + + return train_feat_,train_label_,val_feat_,val_label_,test_feat,test_label,test_valid_mask + + # return x_train, y_train,x_val,y_val,x_test, y_test,test_label_mask,train_feat_, val_feat_, train_label_,val_label_ +def get_sampled_spatial_convolve_data(data,windows): + new_data=[] + new_label=[] + # 4 window size + # data=np.pad(data,((0,0),(4,4),(4,4)),mode='constant',constant_values=np.nan) + for window in tqdm(windows): + row_start = int(window.row_off) + row_stop = int(window.row_off + window.height) + col_start = int(window.col_off) + col_stop = int(window.col_off + window.width) + # row_start=max(row_start,4) + # col_start=max(col_start,4) + for i in range(row_start+2,row_stop-2): + for j in range(col_start+2, col_stop-2): + temp_data=data[1:,i-2:i+3,j-2:j+3] + print(temp_data) + # print(temp_data) + # temp_data=np.where(temp_data==np.nan,0,temp_data) + # print(temp_data) + # print(temp_data.mean(axis=(1,2))) + print(np.nanmean(temp_data,axis=(1,2))) + print(data[0,i,j]) + new_data.append(np.nanmean(temp_data,axis=(1,2))) + new_label.append(data[0,i,j]) + + # if ~np.isnan(data[0,i,j]): + + + + # else: + + # print("nan value",data[:,i,j]) + # if data[0,row_start,col_start]!=np.nan: + # new_data.append(data[:,row_start:row_stop,col_start:col_stop].mean(axis=(1,2))) + return new_data,new_label +def get_sampled_spatial_temporal_convolve_data(data,windows): + + new_data=[] + for window in tqdm(windows): + row_start = int(window.row_off) + row_stop = int(window.row_off + window.height) + col_start = int(window.col_off) + col_stop = int(window.col_off + window.width) + for i in range(row_start+2,row_stop-2): + for j in range(col_start+2, col_stop-2): + temp_data=data[:,i-2:i+2,j-2:j+2] + + print(temp_data) + # print(temp_data) + # temp_data=np.where(temp_data==np.nan,0,temp_data) + # print(temp_data) + # print(temp_data.mean(axis=(1,2))) + new_data.append(np.nanmean(temp_data,axis=(0,1,2))) + + # if data[0,row_start,col_start]!=np.nan: + # new_data.append(data[:,row_start:row_stop,col_start:col_stop].mean(axis=(0,1,2))) + return new_data + + + + + From f78b0893953633230c3a4080ecbe15accb544f59 Mon Sep 17 00:00:00 2001 From: nikki28 Date: Fri, 31 Jan 2025 18:15:34 +0530 Subject: [PATCH 3/3] added conversion to gif --- vis_to_gif.py | 0 1 file changed, 0 insertions(+), 0 deletions(-) create mode 100644 vis_to_gif.py diff --git a/vis_to_gif.py b/vis_to_gif.py new file mode 100644 index 0000000..e69de29