From 9549dbb6d3fc588d26ff6ef680f7553397c871dd Mon Sep 17 00:00:00 2001 From: Fan Zhang Date: Mon, 11 May 2026 09:26:20 -0700 Subject: [PATCH] Removed numpy pin and added transformer pin --- ...trained_clip_large_inference_on_inf2.ipynb | 359 +++++++++--------- 1 file changed, 177 insertions(+), 182 deletions(-) diff --git a/torch-neuronx/inference/hf_pretrained_clip_large_inference_on_inf2.ipynb b/torch-neuronx/inference/hf_pretrained_clip_large_inference_on_inf2.ipynb index d46feda..90d8f95 100644 --- a/torch-neuronx/inference/hf_pretrained_clip_large_inference_on_inf2.ipynb +++ b/torch-neuronx/inference/hf_pretrained_clip_large_inference_on_inf2.ipynb @@ -1,183 +1,178 @@ { - "cells": [ - { - "cell_type": "markdown", - "id": "7e9a2b5b", - "metadata": {}, - "source": [ - "# HuggingFace Pretrained CLIP Inference on Inf2" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "id": "2f4b19d5", - "metadata": {}, - "source": [ - "## Introduction\n", - "\n", - "This notebook demonstrates how to compile and run a HuggingFace 🤗 Contrastive Language-Image Pretraining (CLIP) model for accelerated inference on Neuron. This notebook will use the [`openai/clip-vit-large-patch14`](https://huggingface.co/openai/clip-vit-large-patch14) model.\n", - "\n", - "This Jupyter notebook should be run on an Inf2 or Trn1 instance, of size Inf2.8xlarge or Trn1.2xlarge or larger.\n", - "\n", - "Note: for deployment, it is recommended to pre-compile the model on a compute instance using `torch_neuronx.trace()`, save the compiled model as a `.pt` file, and then distribute the `.pt` to Inf2.xlarge instances for inference." - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Verify that this Jupyter notebook is running the Python kernel environment that was set up according to the [PyTorch Installation Guide](https://awsdocs-neuron.readthedocs-hosted.com/en/latest/general/setup/torch-neuronx.html#setup-torch-neuronx). You can select the kernel from the 'Kernel -> Change Kernel' option on the top of this Jupyter notebook page." - ] - }, - { - "cell_type": "markdown", - "id": "23575b62", - "metadata": {}, - "source": [ - "## Install Dependencies\n", - "This tutorial requires the following pip packages:\n", - "\n", - "- `torch-neuronx`\n", - "- `neuronx-cc`\n", - "- `transformers`\n", - "\n", - "Most of these packages will be installed when configuring your environment using the Trn1 setup guide. The additional dependencies must be installed here:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "4df33ddc", - "metadata": {}, - "outputs": [], - "source": [ - "%env TOKENIZERS_PARALLELISM=True #Supresses tokenizer warnings making errors easier to detect\n", - "# torchvision version pinned to avoid pulling in torch 2.0\n", - "%env HF_HUB_DISABLE_PROGRESS_BARS=1 # Avoids xet progress bar model download error\n", - "!pip install -U transformers numpy==1.26.4 opencv-python Pillow" - ] - }, - { - "cell_type": "markdown", - "id": "9dc8c2b1", - "metadata": {}, - "source": [ - "## Compile the model into an AWS Neuron optimized TorchScript" - ] - }, - { - "cell_type": "markdown", - "id": "5ca0f1b2", - "metadata": {}, - "source": [ - "In the following section, we load the model, and input preprocessor, get a sample input, run inference on CPU, compile the model for Neuron using `torch_neuronx.trace()`, and save the optimized model as `TorchScript`.\n", - "\n", - "`torch_neuronx.trace()` expects a tensor or tuple of tensor inputs to use for tracing, so we unpack the input preprocessor's output. Additionally, the input shape that's used during compilation must match the input shape that's used during inference." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "7ce06089", - "metadata": {}, - "outputs": [], - "source": [ - "import os\n", - "\n", - "import torch\n", - "import torch_neuronx\n", - "from transformers import CLIPProcessor, CLIPModel\n", - "from torchvision.datasets import CIFAR100\n", - "\n", - "model_name = 'openai/clip-vit-large-patch14'\n", - "\n", - "# Create the input preprocessor and model\n", - "processor = CLIPProcessor.from_pretrained(model_name)\n", - "model = CLIPModel.from_pretrained(model_name, return_dict=True)\n", - "model.eval()\n", - "\n", - "# Get text captions for the model to classify the image against\n", - "cifar100 = CIFAR100(root=os.path.expanduser(\"~/.cache\"), download=True, train=False)\n", - "text = []\n", - "# Classify the image against the first 100 classes of CIFAR100\n", - "for i in range(0, 100):\n", - " text.append(f'a photo of a {cifar100.classes[i]}')\n", - "\n", - "# Get an example input\n", - "image = cifar100[0][0]\n", - "\n", - "inputs = processor(text=text, images=image, return_tensors=\"pt\", padding=True)\n", - "\n", - "example = (inputs['input_ids'], inputs['pixel_values'])\n", - "\n", - "# Run inference on CPU\n", - "output_cpu = model(*example)\n", - "\n", - "# Compile the model\n", - "model_neuron = torch_neuronx.trace(model, example, compiler_args='--enable-saturate-infinity')\n", - "\n", - "# Save the TorchScript for inference deployment\n", - "filename = 'model.pt'\n", - "torch.jit.save(model_neuron, filename)" - ] - }, - { - "cell_type": "markdown", - "id": "509f20ef", - "metadata": {}, - "source": [ - "## Run inference and compare results\n", - "\n", - "In this section we load the compiled model, run inference on Neuron, and compare the CPU and Neuron outputs." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "c1832297", - "metadata": {}, - "outputs": [], - "source": [ - "# Load the TorchScript compiled model\n", - "model_neuron = torch.jit.load(filename)\n", - "\n", - "# Run inference using the Neuron model\n", - "output_neuron = model_neuron(*example)\n", - "\n", - "# Compare the results\n", - "cpu_top5 = output_cpu.logits_per_image[0].softmax(dim=-1).topk(5)\n", - "neuron_top5 = output_neuron[0][0].softmax(dim=-1).topk(5)\n", - "\n", - "print('CPU top 5 classifications')\n", - "for value, index in zip(cpu_top5[0], cpu_top5[1]):\n", - " print(f\"{cifar100.classes[index.item()]:>16s}: {100 * value.item():.2f}%\")\n", - "\n", - "print('Neuron top 5 classifications')\n", - "for value, index in zip(neuron_top5[0], neuron_top5[1]):\n", - " print(f\"{cifar100.classes[index.item()]:>16s}: {100 * value.item():.2f}%\")" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python (Neuron PyTorch)", - "language": "python", - "name": "pytorch_venv" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.8.10" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} + "cells": [ + { + "cell_type": "markdown", + "id": "7e9a2b5b", + "metadata": {}, + "source": [ + "# HuggingFace Pretrained CLIP Inference on Inf2" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "id": "2f4b19d5", + "metadata": {}, + "source": [ + "## Introduction\n", + "\n", + "This notebook demonstrates how to compile and run a HuggingFace 🤗 Contrastive Language-Image Pretraining (CLIP) model for accelerated inference on Neuron. This notebook will use the [`openai/clip-vit-large-patch14`](https://huggingface.co/openai/clip-vit-large-patch14) model.\n", + "\n", + "This Jupyter notebook should be run on an Inf2 or Trn1 instance, of size Inf2.8xlarge or Trn1.2xlarge or larger.\n", + "\n", + "Note: for deployment, it is recommended to pre-compile the model on a compute instance using `torch_neuronx.trace()`, save the compiled model as a `.pt` file, and then distribute the `.pt` to Inf2.xlarge instances for inference." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Verify that this Jupyter notebook is running the Python kernel environment that was set up according to the [PyTorch Installation Guide](https://awsdocs-neuron.readthedocs-hosted.com/en/latest/general/setup/torch-neuronx.html#setup-torch-neuronx). You can select the kernel from the 'Kernel -> Change Kernel' option on the top of this Jupyter notebook page." + ] + }, + { + "cell_type": "markdown", + "id": "23575b62", + "metadata": {}, + "source": [ + "## Install Dependencies\n", + "This tutorial requires the following pip packages:\n", + "\n", + "- `torch-neuronx`\n", + "- `neuronx-cc`\n", + "- `transformers`\n", + "\n", + "Most of these packages will be installed when configuring your environment using the Trn1 setup guide. The additional dependencies must be installed here:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4df33ddc", + "metadata": {}, + "outputs": [], + "source": "%env TOKENIZERS_PARALLELISM=True #Supresses tokenizer warnings making errors easier to detect\n# torchvision version pinned to avoid pulling in torch 2.0\n%env HF_HUB_DISABLE_PROGRESS_BARS=1 # Avoids xet progress bar model download error\n!pip install -U transformers==4.57.6 opencv-python Pillow" + }, + { + "cell_type": "markdown", + "id": "9dc8c2b1", + "metadata": {}, + "source": [ + "## Compile the model into an AWS Neuron optimized TorchScript" + ] + }, + { + "cell_type": "markdown", + "id": "5ca0f1b2", + "metadata": {}, + "source": [ + "In the following section, we load the model, and input preprocessor, get a sample input, run inference on CPU, compile the model for Neuron using `torch_neuronx.trace()`, and save the optimized model as `TorchScript`.\n", + "\n", + "`torch_neuronx.trace()` expects a tensor or tuple of tensor inputs to use for tracing, so we unpack the input preprocessor's output. Additionally, the input shape that's used during compilation must match the input shape that's used during inference." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "7ce06089", + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "\n", + "import torch\n", + "import torch_neuronx\n", + "from transformers import CLIPProcessor, CLIPModel\n", + "from torchvision.datasets import CIFAR100\n", + "\n", + "model_name = 'openai/clip-vit-large-patch14'\n", + "\n", + "# Create the input preprocessor and model\n", + "processor = CLIPProcessor.from_pretrained(model_name)\n", + "model = CLIPModel.from_pretrained(model_name, return_dict=True)\n", + "model.eval()\n", + "\n", + "# Get text captions for the model to classify the image against\n", + "cifar100 = CIFAR100(root=os.path.expanduser(\"~/.cache\"), download=True, train=False)\n", + "text = []\n", + "# Classify the image against the first 100 classes of CIFAR100\n", + "for i in range(0, 100):\n", + " text.append(f'a photo of a {cifar100.classes[i]}')\n", + "\n", + "# Get an example input\n", + "image = cifar100[0][0]\n", + "\n", + "inputs = processor(text=text, images=image, return_tensors=\"pt\", padding=True)\n", + "\n", + "example = (inputs['input_ids'], inputs['pixel_values'])\n", + "\n", + "# Run inference on CPU\n", + "output_cpu = model(*example)\n", + "\n", + "# Compile the model\n", + "model_neuron = torch_neuronx.trace(model, example, compiler_args='--enable-saturate-infinity')\n", + "\n", + "# Save the TorchScript for inference deployment\n", + "filename = 'model.pt'\n", + "torch.jit.save(model_neuron, filename)" + ] + }, + { + "cell_type": "markdown", + "id": "509f20ef", + "metadata": {}, + "source": [ + "## Run inference and compare results\n", + "\n", + "In this section we load the compiled model, run inference on Neuron, and compare the CPU and Neuron outputs." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c1832297", + "metadata": {}, + "outputs": [], + "source": [ + "# Load the TorchScript compiled model\n", + "model_neuron = torch.jit.load(filename)\n", + "\n", + "# Run inference using the Neuron model\n", + "output_neuron = model_neuron(*example)\n", + "\n", + "# Compare the results\n", + "cpu_top5 = output_cpu.logits_per_image[0].softmax(dim=-1).topk(5)\n", + "neuron_top5 = output_neuron[0][0].softmax(dim=-1).topk(5)\n", + "\n", + "print('CPU top 5 classifications')\n", + "for value, index in zip(cpu_top5[0], cpu_top5[1]):\n", + " print(f\"{cifar100.classes[index.item()]:>16s}: {100 * value.item():.2f}%\")\n", + "\n", + "print('Neuron top 5 classifications')\n", + "for value, index in zip(neuron_top5[0], neuron_top5[1]):\n", + " print(f\"{cifar100.classes[index.item()]:>16s}: {100 * value.item():.2f}%\")" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python (Neuron PyTorch)", + "language": "python", + "name": "pytorch_venv" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.10" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} \ No newline at end of file