awslabs · github-actions · Jul 18, 2025 · Aug 11, 2025 · Aug 13, 2025 · Aug 18, 2025
diff --git a/.github/workflows/code.smoke-test.yml b/.github/workflows/code.smoke-test.yml
@@ -1,5 +1,8 @@
 name: Smoke Tests
 
+permissions:
+  contents: read
+
 # Trigger on every push to any branch
 on:
   push:
@@ -22,9 +25,6 @@ jobs:
       - name: Install base dependencies
         run: npm ci
 
-      - name: Install Cypress deps
-        run: npm ci --prefix cypress
-
       # ────────────────────────────────────────────────────
       # Start LISA UI in background & wait until it’s ready
       # ────────────────────────────────────────────────────

diff --git a/.github/workflows/issues.alert.yml b/.github/workflows/issues.alert.yml
@@ -1,4 +1,5 @@
 name: Alert on Issue Creation
+permissions: {}
 on:
   issues:
     types: [opened, reopened]

diff --git a/Makefile b/Makefile
@@ -177,36 +177,40 @@ dockerCheck:
 
 ## Check if models are uploaded
 modelCheck:
-	@$(foreach MODEL_ID,$(MODEL_IDS), \
-		$(PROJECT_DIR)/scripts/check-for-models.sh -m $(MODEL_ID) -s $(MODEL_BUCKET); \
-		if \
-			[ $$? != 0 ]; \
-			then \
-				localModelDir="./models"; \
-				if \
-					[ ! -d "$localModelDir" ]; \
-					then \
-						mkdir "$localModelDir"; \
-				fi; \
-				echo; \
-				echo "Preparing to download, convert, and upload safetensors for model: $(MODEL_ID)"; \
-				echo "Local directory: '$$localModelDir' will be used to store downloaded and converted model weights"; \
-				echo "Note: sudo privileges required to remove model dir due to docker mount using root"; \
-				echo "Would you like to continue? [y/N] "; \
-				read confirm_download; \
-				if \
-					[ $${confirm_download:-'N'} = 'y' ]; \
-					then \
-						mkdir -p $$localModelDir; \
+	@access_token=""; \
+	for MODEL_ID in $(MODEL_IDS); do \
+		$(PROJECT_DIR)/scripts/check-for-models.sh -m $$MODEL_ID -s $(MODEL_BUCKET); \
+		if [ $$? != 0 ]; then \
+			localModelDir="./models"; \
+			if [ ! -d "$$localModelDir" ]; then \
+				mkdir "$$localModelDir"; \
+			fi; \
+			echo; \
+			echo "Preparing to download, convert, and upload safetensors for model: $$MODEL_ID"; \
+			echo "Local directory: '$$localModelDir' will be used to store downloaded and converted model weights"; \
+			echo "Note: sudo privileges required to remove model dir due to docker mount using root"; \
+			echo "Would you like to continue? [y/N] "; \
+			read confirm_download; \
+			if [ $${confirm_download:-'N'} = 'y' ]; then \
+				mkdir -p $$localModelDir; \
+				if [ -z "$$access_token" ]; then \
+					if [ -n "$$HUGGINGFACE_TOKEN" ]; then \
+						access_token="$$HUGGINGFACE_TOKEN"; \
+					elif [ -f ".hf_token_cache" ]; then \
+						access_token=$$(cat .hf_token_cache); \
+					else \
 						echo "What is your huggingface access token? "; \
 						read access_token; \
-						echo "Converting and uploading safetensors for model: $(MODEL_ID)"; \
-						tgiImage=$$(yq -r '[.ecsModels[] | select(.inferenceContainer == "tgi") | .baseImage] | first' $(PROJECT_DIR)/config-custom.yaml); \
-						echo $$tgiImage; \
-						$(PROJECT_DIR)/scripts/convert-and-upload-model.sh -m $(MODEL_ID) -s $(MODEL_BUCKET) -a $$access_token -t $$tgiImage -d $$localModelDir; \
+						echo "$$access_token" > .hf_token_cache; \
+					fi; \
 				fi; \
+				echo "Converting and uploading safetensors for model: $$MODEL_ID"; \
+				tgiImage=$$(yq -r '[.ecsModels[] | select(.inferenceContainer == "tgi") | .baseImage] | first' $(PROJECT_DIR)/config-custom.yaml); \
+				echo $$tgiImage; \
+				$(PROJECT_DIR)/scripts/convert-and-upload-model.sh -m $$MODEL_ID -s $(MODEL_BUCKET) -a $$access_token -t $$tgiImage -d $$localModelDir; \
+			fi; \
 		fi; \
-	)
+	done
 
 ## Run all clean commands
 clean: cleanTypeScript cleanPython cleanCfn cleanMisc
@@ -243,6 +247,7 @@ cleanCfn:
 ## Delete all misc files
 cleanMisc:
 	@find . -type f -name "*.DS_Store" -delete
+	@rm -f .hf_token_cache
 
 
 ## Login Docker CLI to Amazon Elastic Container Registry
@@ -284,7 +289,7 @@ endef
 ## Deploy all infrastructure
 deploy: installPythonRequirements dockerCheck dockerLogin cleanMisc modelCheck buildNpmModules
 	$(call print_config)
-ifneq (,$(findstring true, $(HEADLESS)))
+ifeq ($(HEADLESS),true)
 	npx cdk deploy ${STACK} $(if $(PROFILE),--profile ${PROFILE}) --require-approval never -c ${ENV}='$(shell echo '${${ENV}}')';
 else
 	@printf "Is the configuration correct? [y/N]  "\
@@ -298,7 +303,7 @@ endif
 ## Tear down all infrastructure
 destroy: cleanMisc
 	$(call print_config)
-ifneq (,$(findstring true, $(HEADLESS)))
+ifeq ($(HEADLESS),true)
 	npx cdk destroy ${STACK} --force $(if $(PROFILE),--profile ${PROFILE});
 else
 	@printf "Is the configuration correct? [y/N]  "\

diff --git a/README.md b/README.md
@@ -1,82 +1,34 @@
 # LLM Inference Solution for Amazon Dedicated Cloud (LISA)
-
 [![Full Documentation](https://img.shields.io/badge/Full%20Documentation-blue?style=for-the-badge&logo=Vite&logoColor=white)](https://awslabs.github.io/LISA/)
-
 ## What is LISA?
-
-LISA is an infrastructure-as-code solution providing scalable, low latency access to customers’ generative LLMs and
-embedding language models. LISA accelerates and supports customers’ GenAI experimentation and adoption, particularly in
-regions where Amazon Bedrock is not available. LISA allows customers to move quickly rather than independently solve the
-undifferentiated heavy lifting of hosting and inference architecture. Customers deploy LISA into a single AWS account
-and integrate it with an identity provider. Customers bring their own models to LISA for self-hosting and inference
-supported by Amazon Elastic Container Service (ECS). Model configuration is managed through LISA’s model management
-APIs.
-
-As use cases and model requirements grow, customers can configure LISA with external model providers. Through OpenAI's
-API spec via the LiteLLM proxy, LISA is compatible with 100+ models from various providers, including Amazon Bedrock and
-Amazon Jumpstart. LISA customers can centralize communication across many model providers via LiteLLM, leveraging LISA
-for model orchestration. Using LISA as a model orchestration layer allows customers to standardize integrations with
-externally hosted models in a single place. Without an orchestration layer, customers must individually manage unique
-API integrations with each provider.
-
+Our large language model (LLM) inference solution for the Amazon Dedicated Cloud (ADC), LISA, is an open source infrastructure-as-code solution. Customers deploy LISA directly into an Amazon Web Services (AWS) account. While specially designed for ADC regions that support government customers' most sensitive workloads, LISA is also compatible with commercial regions. LISA supports model self-hosting via Amazon Elastic Container Service (ECS). LISA's LiteLLM support also makes it compatible with 100+ models hosted by external model providers, including Amazon Bedrock. LISA further complements Amazon Bedrock by accelerating GenAI adoption. LISA's optional chat assistant user interface (UI) supports model management, model prompting, document summarization, chat session management, prompt libraries, retrieval augmented generation (RAG), automated document ingestion pipelines, and other advanced features. Customers can choose to integrate custom UIs directly with LISA, relying on LISA for centralized model orchestration, chat session management, and RAG. LISA is scalable and ready to support production use cases. The roadmap is customer-driven, with new capabilities launching monthly.
 ## Key Features
-
-* **Self Host Models:** Bring your own text generation and embedding models to LISA for hosting and inference.
-* **Model Orchestration:** Centralize and standardize configuration with 100+ models from model providers via LiteLLM,
-  including Amazon Bedrock models.
-* **Chatbot User Interface:** Through the chatbot user interface, users can prompt LLMs, receive responses, modify prompt
-  templates, change model arguments, and manage their session history. Administrators can control available features via
-  the configuration page.
-* **Retrieval-augmented generation (RAG):** RAG reduces the need for fine-tuning, an expensive and time-consuming
-  undertaking, and delivers more contextually relevant outputs. LISA offers RAG through Amazon OpenSearch or
-  PostgreSQL’s PGVector extension on Amazon RDS.
-* **Non-RAG Model Context:** Users can upload documents to their chat sessions to enhance responses or support use cases
-  like document summarization.
-* **Model Management:** Administrators can add, remove, and update models configured with LISA through the model management
-  configuration page or APIs.
-* **OpenAI API spec:** LISA can be configured with compatible tooling. For example, customers can configure LISA as the
-  model provider for the [Continue](https://www.continue.dev/) plugin, an open-source AI code assistance for JetBrains and Visual Studio Code
-  integrated development environments (IDEs). This allows users to select from any LISA-configured model to support LLM
-  prompting directly in their IDE.
-* **Libraries:** If your workflow includes libraries such as [LangChain](https://python.langchain.com/)
-  or [OpenAI](https://github.com/openai/openai-python), then you can place LISA in your
-  application by changing only the endpoint and headers for the client objects.
-* **FedRAMP:** The AWS services that LISA leverages are FedRAMP High compliant.
-* **Ongoing Releases:** We offer on-going release with new functionality. LISA’s roadmap is customer driven.
-
+* **Open source**: No subscription or licensing fees. LISA costs are based on service usage. The roadmap is customer-driven with monthly releases. LISA is backed by a software development team.
+* **Model Flexibility**: Bring your own models for self-hosting, or quickly configure LISA with 100+ models supported by third-party model providers, including Amazon Bedrock.
+* **Model Orchestration**: Centralize and standardize unique API calls to third-party model providers automatically with LISA via LiteLLM. LISA standardizes the unique API calls into the OpenAI format automatically. All that is required is an API key, model name, and API endpoint.
+* **Modular Components**: Accelerate GenAI adoption with secure, scalable software. LISA supports various use cases through configurable components: model serving and orchestration, chat user interface with advanced capabilities, authentication, retrieval augmented generation (RAG), Anthropic’s Model Context Protocol (MCP), and APIs.
+* **CodeGen**: Supports OpenAI’s API specification, making LISA easily configurable with compatible solutions like the Continue plugin for VSCode and JetBrains integrated development environments (IDEs). This allows users to select from any LISA configured model to support LLM prompting directly in their IDE.
+* **FedRAMP**: Leverages FedRAMP High compliant services.
 ## Deployment Prerequisites
-
 ### Pre-Deployment Steps
-
-* Set up and have access to an AWS account with appropriate permissions
-    * All the resource creation that happens as part of CDK deployments expects Administrator or Administrator-like
-      permissions with resource creation and mutation permissions. Installation will not succeed if this profile does
-      not have permissions to create and edit arbitrary resources for the system. Note: This level of permissions is not
-      required for the runtime of LISA. This is only necessary for deployment and subsequent updates.
-* Familiarity with AWS Cloud Development Kit (CDK) and infrastructure-as-code principles
-* Optional: If using the chat UI, Have your Identity Provider (IdP) information and access
-* Optional: Have your VPC information available, if you are using an existing one for your deployment
-* Note: CDK and Model Management both leverage AWS Systems Manager Agent (SSM) parameter store. Confirm that SSM is approved for use by your organization before beginning.
-
+* Set up or have access to an AWS account.
+* Ensure that your AWS account has the appropriate permissions. Resource creation during the AWS CDK deployment expects Administrator or Administrator-like permissions, to include resource creation and mutation permissions. Installation will not succeed if this profile does not have permissions to create and edit arbitrary resources for the system. This level of permissions is not required for the runtime of LISA. This is only necessary for deployment and subsequent updates.
+* If using the chat UI, have your Identity Provider (IdP) information available, and access.
+* If using an existing VPC, have its information available.
+* Familiarity with AWS Cloud Development Kit (CDK) and infrastructure-as-code principles is a plus.
+* AWS CDK and Model Management both leverage AWS Systems Manager Agent (SSM) parameter store. Confirm that SSM is approved for use by your organization before beginning. If you're new to CDK, review the [AWS CDK Documentation](https://docs.aws.amazon.com/cdk/v2/guide/home.html) and consult with your AWS support team.
 ### Software
-
 * AWS CLI installed and configured
 * Python 3.9 or later
 * Node.js 14 or later
 * Docker installed and running
 * Sufficient disk space for model downloads and conversions
-
-
 ## Getting Started
-
 For detailed instructions on setting up, configuring, and deploying LISA, please refer to our separate documentation on
 installation and usage.
-
 - [Deployment Guide](lib/docs/admin/getting-started.md)
 - [Configuration](lib/docs/config/configuration.md)
-
 ## License
-
 Although this repository is released under the Apache 2.0 license, when configured to use PGVector as a RAG store it
 uses
 the third party `psycopg2-binary` library. The `psycopg2-binary` project's licensing includes

diff --git a/VERSION b/VERSION
@@ -1 +1 @@
-5.0.0
+5.0.1