diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml new file mode 100644 index 0000000..b490243 --- /dev/null +++ b/.github/workflows/ci.yml @@ -0,0 +1,27 @@ +name: CUDA CMake CI + +on: + push: + branches: [ main ] + pull_request: + branches: [ main ] + +jobs: + build: + runs-on: ubuntu-latest + container: + image: leonardeyer/cuda-builder:latest + steps: + - name: Checkout source + uses: actions/checkout@v4 + + - name: Configure with CMake + run: cmake -B build -S . + + - name: Build + run: cmake --build build --parallel + + - uses: actions/upload-artifact@v4 + with: + name: mps + path: build/mps diff --git a/.gitignore b/.gitignore index cb385db..c4b8c6f 100644 --- a/.gitignore +++ b/.gitignore @@ -4,3 +4,5 @@ *.ptx *.cubin *.fatbin +.* +*__pycache__* \ No newline at end of file diff --git a/CMakeLists.txt b/CMakeLists.txt index 8b82b19..5bbc13c 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -35,18 +35,7 @@ CPMAddPackage("gh:gabime/spdlog@1.8.2") include_directories(include) # mps -add_executable(mps - src/mps.cu - - include/tensor.h - include/decomposition.h - include/permutation.h - include/Operators.hpp - include/mps.h - include/tensordot.h - include/cutensor_utils.h - include/reduction.h -) +add_executable(mps src/mps.cu) target_link_libraries(mps PRIVATE cuTENSOR::cuTENSOR @@ -70,8 +59,7 @@ target_compile_options(cuda_compile_options INTERFACE target_link_libraries(mps PRIVATE cuda_compile_options) -set_target_properties(mps PROPERTIES - CUDA_SEPARABLE_COMPILATION ON) +set_target_properties(mps PROPERTIES CUDA_SEPARABLE_COMPILATION ON) # test_mps @@ -86,61 +74,4 @@ target_link_libraries(test_mps PRIVATE target_compile_options(test_mps PRIVATE $<$:--extended-lambda>) -set_target_properties(test_mps PROPERTIES - CUDA_SEPARABLE_COMPILATION ON) - -# Add the test executable -add_executable(test_tensordot tests/test_tensordot.cu) - -# Link against CUTENSOR and CUDA libraries. -target_link_libraries(test_tensordot - cuTENSOR::cuTENSOR - CUDA::cudart - CUDA::cublas - spdlog::spdlog -) - -set_target_properties(test_tensordot PROPERTIES - CUDA_SEPARABLE_COMPILATION ON) - - -add_executable(test_einsum tests/test_einsum.cu) - -# Link against CUTENSOR and CUDA libraries. -target_link_libraries(test_einsum - cuTENSOR::cuTENSOR - CUDA::cudart - CUDA::cublas - spdlog::spdlog -) - -set_target_properties(test_einsum PROPERTIES - CUDA_SEPARABLE_COMPILATION ON) - -add_executable(test_permutation tests/test_permutation.cu) - -# Link against CUTENSOR and CUDA libraries. -target_link_libraries(test_permutation - cuTENSOR::cuTENSOR - CUDA::cudart - CUDA::cublas - spdlog::spdlog -) - -set_target_properties(test_permutation PROPERTIES - CUDA_SEPARABLE_COMPILATION ON) - - -add_executable(test_truncation tests/test_reduction.cu) - -# Link against CUTENSOR and CUDA libraries. -target_link_libraries(test_truncation - cuTENSOR::cuTENSOR - CUDA::cudart - CUDA::cublas - spdlog::spdlog - cuda_compile_options -) - -set_target_properties(test_truncation PROPERTIES - CUDA_SEPARABLE_COMPILATION ON) +set_target_properties(test_mps PROPERTIES CUDA_SEPARABLE_COMPILATION ON) \ No newline at end of file diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..5e6dd37 --- /dev/null +++ b/Dockerfile @@ -0,0 +1,20 @@ +# Use the NVIDIA CUDA 12.8.0 development image with Ubuntu 24.04 +FROM nvidia/cuda:12.8.0-devel-ubuntu24.04 + +# Set non-interactive mode for apt +ENV DEBIAN_FRONTEND=noninteractive + +# Install system dependencies +RUN apt-get update && apt-get install -y \ + build-essential \ + cmake \ + git \ + nvtop \ + python3 \ + python3-pip \ + python3-dev \ + libcutensor2 \ + && rm -rf /var/lib/apt/lists/* + +# Install CuPy for CUDA 12.x +RUN python3 -m pip install cupy-cuda12x --break-system-packages diff --git a/README.md b/README.md index 8c92515..e0057be 100644 --- a/README.md +++ b/README.md @@ -14,7 +14,7 @@ The main computational task involves decomposing a tensor of shape $(D_1, d, d, Since exact SVD decompositions do not parallelize well on GPUs ([ref](https://arxiv.org/pdf/2212.09782)) I opted for the existing one-sided Jacobi based implementation `cusolverDngesvdj` from [cuSolver](https://docs.nvidia.com/cuda/cusolver/index.html#cusolverdn-t-gesvdj). -The implementation focused mainly on avoiding copying data to the host when possible and preallocating all the required memory / workspaces up front to aid getting the data ready for the decomposition. This involved using scractch tensors and custom out-of-place kernels to allow directly writing to the destination tensor memory. The truncation and normalization procedure was fused into a single kernel making use of cooperative groups to synchronize across blocks. +The implementation focuses mainly on avoiding copying data to the host when possible and pre-allocating all the required memory / workspaces up front to aid getting the data ready for the decomposition. This involved using scractch tensors and custom out-of-place kernels to allow directly writing to the destination tensor memory. --- ##### cuTensorNet diff --git a/include/MPSSimulator.cuh b/include/MPSSimulator.cuh deleted file mode 100644 index e02f2e3..0000000 --- a/include/MPSSimulator.cuh +++ /dev/null @@ -1,362 +0,0 @@ -#pragma once - -#include "Operators.hpp" - -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include // To extend fmt functionality for custom types -#include - - -#define HANDLE_CUDA_ERROR(x) \ -{ const auto err = x; \ - if( err != cudaSuccess ) \ - { printf("CUDA error %s in line %d\n", cudaGetErrorString(err), __LINE__); fflush(stdout); std::abort(); } \ -}; - -#define HANDLE_CUTN_ERROR(x) \ -{ const auto err = x; \ - if( err != CUTENSORNET_STATUS_SUCCESS ) \ - { printf("cuTensorNet error %s in line %d\n", cutensornetGetErrorString(err), __LINE__); fflush(stdout); std::abort(); } \ -}; - - - - -namespace mps { - using Scalar = float; - static auto dataType = CUDA_C_32F; - - static std::size_t fpsize = sizeof(Scalar); - - struct Extents { - std::vector > extents; - std::vector extentsPtr; - std::vector d_mpsTensors; - - explicit Extents(int32_t numQubits, int64_t maxExtent) : extentsPtr(numQubits), - d_mpsTensors(numQubits, nullptr) { - for (int32_t i = 0; i < numQubits; i++) { - if (i == 0) { - // left boundary MPS tensor - extents.push_back({2, maxExtent}); - HANDLE_CUDA_ERROR(cudaMalloc(&d_mpsTensors[i], 2 * maxExtent * 2 * fpsize)); - } else if (i == numQubits - 1) { - // right boundary MPS tensor - extents.push_back({maxExtent, 2}); - HANDLE_CUDA_ERROR(cudaMalloc(&d_mpsTensors[i], 2 * maxExtent * 2 * fpsize)); - } else { - // middle MPS tensors - extents.push_back({maxExtent, 2, maxExtent}); - HANDLE_CUDA_ERROR(cudaMalloc(&d_mpsTensors[i], 2 * maxExtent * maxExtent * 2 * fpsize)); - } - extentsPtr[i] = extents[i].data(); - } - } - - ~Extents() { - for (auto &d_mpsTensor: d_mpsTensors) { - HANDLE_CUDA_ERROR(cudaFree(d_mpsTensor)); - } - } - }; - - struct Simulator { - cutensornetHandle_t cutnHandle{nullptr}; - cutensornetState_t state{nullptr}; - - void *d_scratch{nullptr}; - std::size_t scratchSize{0}; - - cutensornetWorkspaceDescriptor_t workDesc{nullptr}; - - cutensornetStateExpectation_t expectation{nullptr}; - - int32_t numQubits; - std::vector qubitDims; - Extents extents; // Stores tensor dimensions for MPS representation. - - // Constructor initializes everything. - Simulator(int deviceId, int numQubits, int64_t maxExtent) : numQubits(numQubits), qubitDims(numQubits, 2), - extents(numQubits, maxExtent) { - - // Initialize the cuTensorNet library - HANDLE_CUDA_ERROR(cudaSetDevice(deviceId)); - HANDLE_CUTN_ERROR(cutensornetCreate(&cutnHandle)); - - // Initialize the quantum state and MPS representation - { - HANDLE_CUTN_ERROR(cutensornetCreateState(cutnHandle, - CUTENSORNET_STATE_PURITY_PURE, - numQubits, - std::vector(numQubits, 2).data(), - dataType, - &state)); - - HANDLE_CUTN_ERROR(cutensornetStateFinalizeMPS(cutnHandle, state, - CUTENSORNET_BOUNDARY_CONDITION_OPEN, - extents.extentsPtr.data(), /*strides=*/nullptr)); - } - - // Allocate scratch memory - { - // Query the free memory on Device - std::size_t freeSize{0}, totalSize{0}; - HANDLE_CUDA_ERROR(cudaMemGetInfo(&freeSize, &totalSize)); - scratchSize = (freeSize - (freeSize % 4096)) / 2; // use half of available memory with alignment - HANDLE_CUDA_ERROR(cudaMalloc(&d_scratch, scratchSize)); - } - // Setup workspace - { - HANDLE_CUTN_ERROR(cutensornetCreateWorkspaceDescriptor(cutnHandle, &workDesc)); - } - - // Set up the SVD method for truncation. - { - // Optional, set up the SVD method for truncation. - cutensornetTensorSVDAlgo_t algo = CUTENSORNET_TENSOR_SVD_ALGO_GESVDJ; - HANDLE_CUTN_ERROR(cutensornetStateConfigure(cutnHandle, state, - CUTENSORNET_STATE_CONFIG_MPS_SVD_ALGO, &algo, sizeof(algo))); - - double absCutoff = 1e-12; - HANDLE_CUTN_ERROR(cutensornetStateConfigure(cutnHandle, state, - CUTENSORNET_STATE_CONFIG_MPS_SVD_ABS_CUTOFF, &absCutoff, sizeof(absCutoff))); - - /*double relCutoff = 1e-12; - HANDLE_CUTN_ERROR(cutensornetStateConfigure(cutnHandle, state, - CUTENSORNET_STATE_CONFIG_MPS_SVD_REL_CUTOFF, &relCutoff, sizeof(relCutoff)));*/ - - // CUTENSORNET_STATE_CONFIG_MPS_SVD_ALGO_PARAMS - // cutensornetGesvdrParams_t algoParams = CUTENSORNET_TENSOR_SVD_ALGO_PARAMS_DEFAULT; - - cutensornetTensorSVDNormalization_t normalization = CUTENSORNET_TENSOR_SVD_NORMALIZATION_L2; - HANDLE_CUTN_ERROR(cutensornetStateConfigure(cutnHandle, state, - CUTENSORNET_STATE_CONFIG_MPS_SVD_S_NORMALIZATION, &normalization, sizeof(normalization))); - } - } - - int64_t apply_operator(operators::DeviceOperator &op, const std::vector &qubits, bool unitary = true) { - int64_t id; - HANDLE_CUTN_ERROR( - cutensornetStateApplyTensorOperator(cutnHandle, state, - qubits.size(), - qubits.data(), - op.ptr(), - nullptr, 1, 0, unitary, &id)); - return id; - } - - void mps_factorization() { - // Prepare the MPS factorization & setup workspace memory requirements - { - spdlog::debug("Preparing MPS factorization..."); - HANDLE_CUTN_ERROR(cutensornetStatePrepare(cutnHandle, state, scratchSize, workDesc, 0x0)); - - double flops {0.0}; - HANDLE_CUTN_ERROR(cutensornetStateGetInfo(cutnHandle, state, - CUTENSORNET_STATE_INFO_FLOPS, &flops, sizeof(flops))); - - - spdlog::debug("Total flop count = {} GFlop", (flops / 1e9)); - if(flops < 0.0) { - spdlog::error("ERROR: Negative Flop count!"); - std::abort(); - } - - int64_t workSize = 0; - // Attach the workspace buffer - HANDLE_CUTN_ERROR(cutensornetWorkspaceGetMemorySize(cutnHandle, - workDesc, - CUTENSORNET_WORKSIZE_PREF_MAX, - CUTENSORNET_MEMSPACE_DEVICE, - CUTENSORNET_WORKSPACE_SCRATCH, - &workSize)); - spdlog::debug("Required scratch GPU workspace size (bytes) = {}", workSize); - if (workSize <= scratchSize) { - HANDLE_CUTN_ERROR(cutensornetWorkspaceSetMemory(cutnHandle, workDesc, CUTENSORNET_MEMSPACE_DEVICE, - CUTENSORNET_WORKSPACE_SCRATCH, d_scratch, - workSize)); - } else { - spdlog::error("ERROR: Insufficient workspace size on Device!"); - std::abort(); - } - - int64_t requiredWorkspaceSizeCache = 0; - HANDLE_CUTN_ERROR( cutensornetWorkspaceGetMemorySize(cutnHandle, - workDesc, - CUTENSORNET_WORKSIZE_PREF_MIN, - CUTENSORNET_MEMSPACE_DEVICE, - CUTENSORNET_WORKSPACE_CACHE, - - &requiredWorkspaceSizeCache) ); - - spdlog::debug("Required cache GPU workspace size (bytes) = {}", requiredWorkspaceSizeCache); - - - void* workCache = nullptr; - HANDLE_CUDA_ERROR( cudaMalloc(&workCache, requiredWorkspaceSizeCache) ); - HANDLE_CUTN_ERROR( cutensornetWorkspaceSetMemory(cutnHandle, - workDesc, - CUTENSORNET_MEMSPACE_DEVICE, - CUTENSORNET_WORKSPACE_CACHE, - workCache, - requiredWorkspaceSizeCache) ); - } - - // Set the workspace buffer - auto &extentsPtr = extents.extentsPtr; - auto &d_mpsTensors = extents.d_mpsTensors; - - HANDLE_CUTN_ERROR(cutensornetStateCompute(cutnHandle, state, - workDesc, extentsPtr.data(), /*strides=*/nullptr, - d_mpsTensors.data(), - 0x0)); - - /* - { - // cutensornetGetOutputStateDetails needs to be called 3 times to get all the details - auto numTensorsOut = 0; - HANDLE_CUTN_ERROR(cutensornetGetOutputStateDetails(cutnHandle, state, &numTensorsOut, nullptr, nullptr, nullptr)); - spdlog::debug("Number of output tensors: {}", numTensorsOut); - - auto numModesOut = std::vector(numTensorsOut); - HANDLE_CUTN_ERROR(cutensornetGetOutputStateDetails(cutnHandle, state, &numTensorsOut, numModesOut.data(), nullptr, nullptr)); - spdlog::debug("Number of modes for each tensor: [{}]", fmt::join(numModesOut, ", ")); - - auto extentsOutVec = std::vector>(numTensorsOut); - auto extentsOutPtr = std::vector(numTensorsOut); - for (int i = 0; i < numTensorsOut; i++) { - auto nModes = numModesOut[i]; - if (nModes <= 0) { - spdlog::error("Invalid number of modes for tensor {}: {}", i, nModes); - throw std::runtime_error("Invalid number of modes"); - } - extentsOutVec[i].resize(nModes); - extentsOutPtr[i] = extentsOutVec[i].data(); - } - - HANDLE_CUTN_ERROR(cutensornetGetOutputStateDetails(cutnHandle, state, &numTensorsOut, nullptr, extentsOutPtr.data(), nullptr)); - - spdlog::debug("MPS factorization computed successfully!"); - for (int i = 0; i < numTensorsOut; i++) { - spdlog::debug("Tensor {} extents: [{}]", i, fmt::join(extentsOutVec[i], ", ")); - } - } - */ - - } - - - Scalar expectation_value(int32_t index, const operators::DeviceOperator &op) { - - int64_t id; - // Create an empty tensor network operator (projector 00 - cutensornetNetworkOperator_t hamiltonian; - HANDLE_CUTN_ERROR( - cutensornetCreateNetworkOperator(cutnHandle, numQubits, qubitDims.data(), dataType, &hamiltonian)); - // Append component P0 (projector 00) to the tensor network operator - { - const int32_t numModes[] = {1}; - const int32_t modes[] = {index}; - const int32_t *stateModes[] = {modes}; - const void *gateData[] = {op.ptr()}; // GPU pointers to gate data - HANDLE_CUTN_ERROR( - cutensornetNetworkOperatorAppendProduct(cutnHandle, hamiltonian, cuDoubleComplex{1.0, 0.0}, - 1, numModes, stateModes, NULL, gateData, &id)); - } - - - HANDLE_CUTN_ERROR(cutensornetCreateExpectation(cutnHandle, state, hamiltonian, &expectation)); - - int32_t numHyperSamples = 8; - HANDLE_CUTN_ERROR(cutensornetExpectationConfigure(cutnHandle, expectation, - CUTENSORNET_EXPECTATION_CONFIG_NUM_HYPER_SAMPLES, - &numHyperSamples, sizeof(numHyperSamples))); - - // Prepare the specified quantum circuit expectation value for computation - HANDLE_CUTN_ERROR( - cutensornetExpectationPrepare(cutnHandle, expectation, scratchSize, workDesc, - 0x0)); - - spdlog::debug("Prepared the specified quantum circuit expectation value"); - auto flops = 0.0; - HANDLE_CUTN_ERROR(cutensornetExpectationGetInfo(cutnHandle, expectation, - CUTENSORNET_EXPECTATION_INFO_FLOPS, &flops, sizeof(flops))); - spdlog::debug("Total flop count = {} GFlop", (flops / 1e9)); - if (flops <= 0.0) { - spdlog::error("ERROR: Invalid Flop count!"); - std::abort(); - } - - { - int64_t workSize = 0; - // Attach the workspace buffer - HANDLE_CUTN_ERROR(cutensornetWorkspaceGetMemorySize(cutnHandle, - workDesc, - CUTENSORNET_WORKSIZE_PREF_MAX, - CUTENSORNET_MEMSPACE_DEVICE, - CUTENSORNET_WORKSPACE_SCRATCH, - &workSize)); - spdlog::debug("Required scratch GPU workspace size (bytes) = {}", workSize); - if (workSize <= scratchSize) { - HANDLE_CUTN_ERROR(cutensornetWorkspaceSetMemory(cutnHandle, workDesc, CUTENSORNET_MEMSPACE_DEVICE, - CUTENSORNET_WORKSPACE_SCRATCH, d_scratch, - workSize)); - } else { - spdlog::error("ERROR: Insufficient workspace size on Device!"); - std::abort(); - } - } - - const auto [expectVal, stateNorm2] = [&]() -> std::pair { - // Compute the specified quantum circuit expectation value - Complex expectVal{0.0, 0.0}, stateNorm2{0.0, 0.0}; - HANDLE_CUTN_ERROR(cutensornetExpectationCompute(cutnHandle, expectation, workDesc, - static_cast(&expectVal), - static_cast(&stateNorm2), 0x0)); - - return {expectVal, stateNorm2}; - }(); - - HANDLE_CUTN_ERROR(cutensornetDestroyNetworkOperator(hamiltonian)); - HANDLE_CUTN_ERROR(cutensornetDestroyExpectation(expectation)); - - return (expectVal / stateNorm2).real(); - } - - std::vector expectation_value(operators::DeviceOperator &op, const std::vector &qubits) { - std::vector probs{}; - probs.resize(qubits.size()); - for (int i = 0; i < qubits.size(); i++) { - probs[i] = expectation_value(qubits[i], op); - } - return probs; - } - - std::vector expectation_value(operators::DeviceOperator &op) { - std::vector probs{}; - probs.resize(numQubits); - for (int i = 0; i < numQubits; i++) { - probs[i] = expectation_value(i, op); - } - - return probs; - } - - ~Simulator() { - HANDLE_CUTN_ERROR(cutensornetDestroy(cutnHandle)); - HANDLE_CUTN_ERROR(cutensornetDestroyState(state)); - - HANDLE_CUDA_ERROR(cudaFree(d_scratch)); - HANDLE_CUTN_ERROR(cutensornetDestroyWorkspaceDescriptor(workDesc)); - } - }; -} // namespace mps::simulator diff --git a/include/Operators.hpp b/include/Operators.hpp index 461cdb9..1f8b6f5 100644 --- a/include/Operators.hpp +++ b/include/Operators.hpp @@ -10,7 +10,6 @@ #include #include -// TODO: remove operators and simply merge into tensor namespace operators { using Scalar = ScalarPrecision; using Complex = std::complex; diff --git a/include/cusolver_utils.h b/include/cusolver_utils.h index e0cd7ce..7f9c3d6 100644 --- a/include/cusolver_utils.h +++ b/include/cusolver_utils.h @@ -1,45 +1,10 @@ -/* - * Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of NVIDIA CORPORATION nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY - * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - #pragma once -#include -#include #include -#include #include -#include -#include -#include #include #include -#include // cusolver API error checking @@ -61,241 +26,3 @@ throw std::runtime_error("cublas error"); \ } \ } while (0) - -// cublas API error checking -#define CUSPARSE_CHECK(err) \ - do { \ - cusparseStatus_t err_ = (err); \ - if (err_ != CUSPARSE_STATUS_SUCCESS) { \ - printf("cusparse error %d at %s:%d\n", err_, __FILE__, __LINE__); \ - throw std::runtime_error("cusparse error"); \ - } \ - } while (0) - -// memory alignment -#define ALIGN_TO(A, B) (((A + B - 1) / B) * B) - -// device memory pitch alignment -static const size_t device_alignment = 32; - -// type traits -template struct traits; - -template <> struct traits { - // scalar type - typedef float T; - typedef T S; - - static constexpr T zero = 0.f; - static constexpr cudaDataType cuda_data_type = CUDA_R_32F; -#if CUDART_VERSION >= 11000 - static constexpr cusolverPrecType_t cusolver_precision_type = CUSOLVER_R_32F; -#endif - - inline static S abs(T val) { return fabs(val); } - - template inline static T rand(RNG &gen) { return (S)gen(); } - - inline static T add(T a, T b) { return a + b; } - - inline static T mul(T v, S f) { return v * f; } -}; - -template <> struct traits { - // scalar type - typedef double T; - typedef T S; - - static constexpr T zero = 0.; - static constexpr cudaDataType cuda_data_type = CUDA_R_64F; -#if CUDART_VERSION >= 11000 - static constexpr cusolverPrecType_t cusolver_precision_type = CUSOLVER_R_64F; -#endif - - inline static S abs(T val) { return fabs(val); } - - template inline static T rand(RNG &gen) { return (S)gen(); } - - inline static T add(T a, T b) { return a + b; } - - inline static T mul(T v, S f) { return v * f; } -}; - -template <> struct traits { - // scalar type - typedef float S; - typedef cuFloatComplex T; - - static constexpr T zero = {0.f, 0.f}; - static constexpr cudaDataType cuda_data_type = CUDA_C_32F; -#if CUDART_VERSION >= 11000 - static constexpr cusolverPrecType_t cusolver_precision_type = CUSOLVER_C_32F; -#endif - - inline static S abs(T val) { return cuCabsf(val); } - - template inline static T rand(RNG &gen) { - return make_cuFloatComplex((S)gen(), (S)gen()); - } - - inline static T add(T a, T b) { return cuCaddf(a, b); } - inline static T add(T a, S b) { return cuCaddf(a, make_cuFloatComplex(b, 0.f)); } - - inline static T mul(T v, S f) { return make_cuFloatComplex(v.x * f, v.y * f); } -}; - -template <> struct traits { - // scalar type - typedef double S; - typedef cuDoubleComplex T; - - static constexpr T zero = {0., 0.}; - static constexpr cudaDataType cuda_data_type = CUDA_C_64F; -#if CUDART_VERSION >= 11000 - static constexpr cusolverPrecType_t cusolver_precision_type = CUSOLVER_C_64F; -#endif - - inline static S abs(T val) { return cuCabs(val); } - - template inline static T rand(RNG &gen) { - return make_cuDoubleComplex((S)gen(), (S)gen()); - } - - inline static T add(T a, T b) { return cuCadd(a, b); } - inline static T add(T a, S b) { return cuCadd(a, make_cuDoubleComplex(b, 0.)); } - - inline static T mul(T v, S f) { return make_cuDoubleComplex(v.x * f, v.y * f); } -}; - -template void print_matrix(const int &m, const int &n, const T *A, const int &lda); - -template <> void print_matrix(const int &m, const int &n, const float *A, const int &lda) { - for (int i = 0; i < m; i++) { - for (int j = 0; j < n; j++) { - std::printf("%0.2f ", A[j * lda + i]); - } - std::printf("\n"); - } -} - -template <> void print_matrix(const int &m, const int &n, const double *A, const int &lda) { - for (int i = 0; i < m; i++) { - for (int j = 0; j < n; j++) { - std::printf("%0.2f ", A[j * lda + i]); - } - std::printf("\n"); - } -} - -template <> void print_matrix(const int &m, const int &n, const cuComplex *A, const int &lda) { - for (int i = 0; i < m; i++) { - for (int j = 0; j < n; j++) { - std::printf("%0.2f + %0.2fj ", A[j * lda + i].x, A[j * lda + i].y); - } - std::printf("\n"); - } -} - -template <> -void print_matrix(const int &m, const int &n, const cuDoubleComplex *A, const int &lda) { - for (int i = 0; i < m; i++) { - for (int j = 0; j < n; j++) { - std::printf("%0.2f + %0.2fj ", A[j * lda + i].x, A[j * lda + i].y); - } - std::printf("\n"); - } -} - -template -void generate_random_matrix(cusolver_int_t m, cusolver_int_t n, T **A, int *lda) { - std::random_device rd; - std::mt19937 gen(rd()); - std::uniform_real_distribution::S> dis(-1.0, 1.0); - auto rand_gen = std::bind(dis, gen); - - *lda = n; - - size_t matrix_mem_size = static_cast(*lda * m * sizeof(T)); - // suppress gcc 7 size warning - if (matrix_mem_size <= PTRDIFF_MAX) - *A = (T *)malloc(matrix_mem_size); - else - throw std::runtime_error("Memory allocation size is too large"); - - if (*A == NULL) - throw std::runtime_error("Unable to allocate host matrix"); - - // random matrix and accumulate row sums - for (int i = 0; i < m; ++i) { - for (int j = 0; j < n; ++j) { - T *A_row = (*A) + *lda * i; - A_row[j] = traits::rand(rand_gen); - } - } -} - -// Makes matrix A of size mxn and leading dimension lda diagonal dominant -template -void make_diag_dominant_matrix(cusolver_int_t m, cusolver_int_t n, T *A, int lda) { - for (int i = 0; i < std::min(m, n); ++i) { - T *A_row = A + lda * i; - auto row_sum = traits::S>::zero; - for (int j = 0; j < n; ++j) { - row_sum += traits::abs(A_row[j]); - } - A_row[i] = traits::add(A_row[i], row_sum); - } -} - -// Returns cudaDataType value as defined in library_types.h for the string containing type name -cudaDataType get_cuda_library_type(std::string type_string) { - if (type_string.compare("CUDA_R_16F") == 0) - return CUDA_R_16F; - else if (type_string.compare("CUDA_C_16F") == 0) - return CUDA_C_16F; - else if (type_string.compare("CUDA_R_32F") == 0) - return CUDA_R_32F; - else if (type_string.compare("CUDA_C_32F") == 0) - return CUDA_C_32F; - else if (type_string.compare("CUDA_R_64F") == 0) - return CUDA_R_64F; - else if (type_string.compare("CUDA_C_64F") == 0) - return CUDA_C_64F; - else if (type_string.compare("CUDA_R_8I") == 0) - return CUDA_R_8I; - else if (type_string.compare("CUDA_C_8I") == 0) - return CUDA_C_8I; - else if (type_string.compare("CUDA_R_8U") == 0) - return CUDA_R_8U; - else if (type_string.compare("CUDA_C_8U") == 0) - return CUDA_C_8U; - else if (type_string.compare("CUDA_R_32I") == 0) - return CUDA_R_32I; - else if (type_string.compare("CUDA_C_32I") == 0) - return CUDA_C_32I; - else if (type_string.compare("CUDA_R_32U") == 0) - return CUDA_R_32U; - else if (type_string.compare("CUDA_C_32U") == 0) - return CUDA_C_32U; - else - throw std::runtime_error("Unknown CUDA datatype"); -} - -// Returns cusolverIRSRefinement_t value as defined in cusolver_common.h for the string containing -// solver name -cusolverIRSRefinement_t get_cusolver_refinement_solver(std::string solver_string) { - if (solver_string.compare("CUSOLVER_IRS_REFINE_NONE") == 0) - return CUSOLVER_IRS_REFINE_NONE; - else if (solver_string.compare("CUSOLVER_IRS_REFINE_CLASSICAL") == 0) - return CUSOLVER_IRS_REFINE_CLASSICAL; - else if (solver_string.compare("CUSOLVER_IRS_REFINE_GMRES") == 0) - return CUSOLVER_IRS_REFINE_GMRES; - else if (solver_string.compare("CUSOLVER_IRS_REFINE_CLASSICAL_GMRES") == 0) - return CUSOLVER_IRS_REFINE_CLASSICAL_GMRES; - else if (solver_string.compare("CUSOLVER_IRS_REFINE_GMRES_GMRES") == 0) - return CUSOLVER_IRS_REFINE_GMRES_GMRES; - else - printf("Unknown solver parameter: \"%s\"\n", solver_string.c_str()); - - return CUSOLVER_IRS_REFINE_NOT_SET; -} diff --git a/include/decomposition.h b/include/decomposition.h index eca4452..175ac0a 100644 --- a/include/decomposition.h +++ b/include/decomposition.h @@ -420,6 +420,7 @@ namespace decomposition { } } + // TODO: To perform fixed extent truncation, we directly set maxExtent to half of the full extent corresponding to exact SVD. void compute(tensor::DeviceTensor &A, tensor::DeviceTensor &S, tensor::DeviceTensor &U, diff --git a/include/reduction.h b/include/reduction.h index eef4c76..d57b805 100644 --- a/include/reduction.h +++ b/include/reduction.h @@ -30,13 +30,13 @@ namespace reduction { auto stride = blockDim.x * gridDim.x; while (idx < n) { - auto val = input[idx]; + auto val = input[idx] * input[idx]; if (val < tol) { // since the input is sorted, we can break here return {sum, norm}; } sum++; - norm += val * val; + norm += val; idx += stride; } return {sum, norm}; @@ -116,14 +116,11 @@ namespace reduction { atomicAdd(count_d, block_count_sum); atomicAdd(normsq_d, block_norm_sum); } + } - // now we normalize the count for this we want a global sync to make sure all blocks have finished - // Create a grid-level group and synchronize all blocks. - cg::grid_group grid = cg::this_grid(); - grid.sync(); - - // We could actually just divide up to count_d - thread_divide(input, n, block_norm_sum); + template + __global__ void truncation_normalize_divide(T *input, int n, Scalar division) { + thread_divide(input, n, division); } template @@ -139,21 +136,18 @@ namespace reduction { size_t aligned_offset = align_up>(base_offset); size_t sharedMemSize = aligned_offset + blockSize * sizeof(Scalar); - void *kernelArgs[] = { - (void *)&count_d, - (void *)&normsq_d, - (void *)&input_d, - (void *)&n, - (void *)&tol - }; - - CUDA_CHECK(cudaLaunchCooperativeKernel( - (void *)truncation_normalize_count, - gridSize, blockSize, kernelArgs, sharedMemSize, stream - )); + truncation_normalize_count<<>>(count_d, normsq_d, input_d, n, tol); + // Copy results back to host int count; + Scalar norm; CUDA_CHECK(cudaMemcpyAsync(&count, count_d, sizeof(int), cudaMemcpyDeviceToHost, stream)); + CUDA_CHECK(cudaMemcpyAsync(&norm, normsq_d, sizeof(Scalar),cudaMemcpyDeviceToHost, stream)); + CUDA_CHECK(cudaStreamSynchronize(stream)); + + // Launch normalization kernel + const Scalar divisor = sqrt(norm); + truncation_normalize_divide<<>>(input_d, n, divisor); return count; } diff --git a/julia/main.jl b/julia/main.jl new file mode 100644 index 0000000..c47d363 --- /dev/null +++ b/julia/main.jl @@ -0,0 +1,85 @@ +using ITensors, ITensorMPS +using Printf +using CUDA + +""" + Runs an MPS simulation for a Trotterized quantum circuit. + + Args: + T: Number of Trotter steps. + L: Number of sites in the chain. + chi: Maximum bond dimension. + theta: Angle for the Rx and CRx gates. +""" +function run_mps_simulation(; T=50, L=30, chi=1024, theta=0.14, verbose=true) + cutoff = 1e-12 + # Initialize the sites for the 1D quantum system + sites = siteinds("S=1/2", L) + states = ["Dn" for n in 1:L] + + # Initialize MPS to the |11...1> state + psi = cu(MPS(sites, states)) + # Record mean-P1 at each step + mean_expectations = Float64[] + + # Perform the Trotterized time evolution + for t in 1:T + # Trotter sweep (4 layers of two-site gates) + # Layer 1: CRX on even sites (1-indexed: 1, 3, 5...) + for i in 1:2:(L-1) + op_crx = cu(op("CRx", sites[i], sites[i+1]; θ=theta)) + psi = apply(op_crx, psi; maxdim=chi, cutoff=cutoff) + end + + # Layer 2: CLRX on odd sites (1-indexed: 2, 4, 6...) + for i in 2:2:(L-1) + op_clrx = cu(op("CRx", sites[i+1], sites[i]; θ=theta)) + psi = apply(op_clrx, psi; maxdim=chi, cutoff=cutoff) + end + + # Layer 3: CRX on odd sites (1-indexed: 2, 4, 6...) + for i in 2:2:(L-1) + op_crx = cu(op("CRx", sites[i], sites[i+1]; θ=theta)) + psi = apply(op_crx, psi; maxdim=chi, cutoff=cutoff) + end + + # Layer 4: CLRX on even sites (1-indexed: 1, 3, 5...) + for i in 1:2:(L-1) + op_clrx = cu(op("CRx", sites[i+1], sites[i]; θ=theta)) + psi = apply(op_clrx, psi; maxdim=chi, cutoff=cutoff) + end + + # Compute mean ⟨P1⟩ on all sites + exps = expect(psi, "Proj1") + total = sum(exps) + mean = total / L + push!(mean_expectations, mean) + + # Bond-dimension stats + dims = linkdims(psi) + if verbose + @printf "t = %2d, max(chi) = %4d, mean(chi) = %8.2f\n" t maximum(dims) sum(dims) / L +# for (i, d) in enumerate(dims) +# left_dim = i == 1 ? 1 : dim(linkind(psi, i-1)) +# phys_dim = dim(siteind(psi, i)) +# right_dim = i == L ? 1 : dim(linkind(psi, i)) +# println(" Site $(i-1): chi = $right_dim: shape = ($left_dim, $phys_dim, $right_dim)") +# end + end + end + + # 7. Final printout + if verbose + print("[1, " * join([@sprintf("%.6f", m) for m in mean_expectations], ", ") * "]\n") + end +end + +# call the function once for warming up +run_mps_simulation(verbose=false) + +let + start_time = time() + run_mps_simulation(verbose=false) + dt = (time() - start_time) * 1000 + @printf "Execution time: %.0f ms\n" dt +end diff --git a/python/Notebook.ipynb b/python/Notebook.ipynb deleted file mode 100644 index b48ea07..0000000 --- a/python/Notebook.ipynb +++ /dev/null @@ -1,813 +0,0 @@ -{ - "cells": [ - { - "metadata": { - "ExecuteTime": { - "end_time": "2025-02-12T09:30:09.715602Z", - "start_time": "2025-02-12T09:30:09.706367Z" - } - }, - "cell_type": "code", - "source": [ - "import matplotlib.pyplot as plt\n", - "import numpy as np" - ], - "id": "42ad527ae2278efa", - "outputs": [], - "execution_count": 74 - }, - { - "metadata": { - "ExecuteTime": { - "end_time": "2025-02-12T09:30:09.747371Z", - "start_time": "2025-02-12T09:30:09.721308Z" - } - }, - "cell_type": "code", - "source": [ - "def plus():\n", - " return np.array([1, 1]) / np.sqrt(2)\n", - "\n", - "def minus():\n", - " return np.array([1, -1]) / np.sqrt(2)\n", - "\n", - "def null():\n", - " return np.array([1, 0])\n", - "\n", - "def one():\n", - " return np.array([0, 1])\n", - "\n", - "def multi_kron(*args):\n", - " if len(args) == 1:\n", - " return args[0]\n", - " return np.kron(args[0], multi_kron(*args[1:]))\n", - "\n", - "\n", - "def Rxy(theta):\n", - " cos_theta_2 = np.cos(theta / 2)\n", - " sin_theta_2 = np.sin(theta / 2)\n", - " return np.array([\n", - " [1, 0, 0, 0],\n", - " [0, cos_theta_2, -1j * sin_theta_2, 0],\n", - " [0, -1j * sin_theta_2, cos_theta_2, 0],\n", - " [0, 0, 0, 1]\n", - " ])\n", - "\n", - "def Rx(theta):\n", - " cos_theta_2 = np.cos(theta / 2)\n", - " sin_theta_2 = np.sin(theta / 2)\n", - " return np.array([\n", - " [cos_theta_2, -1j * sin_theta_2],\n", - " [-1j * sin_theta_2, cos_theta_2]\n", - " ])\n", - "\n", - "def SWAP():\n", - " return np.array([\n", - " [1, 0, 0, 0],\n", - " [0, 0, 1, 0],\n", - " [0, 1, 0, 0],\n", - " [0, 0, 0, 1]\n", - " ])\n", - "\n", - "def CNOT():\n", - " return np.array([\n", - " [1, 0, 0, 0],\n", - " [0, 1, 0, 0],\n", - " [0, 0, 0, 1],\n", - " [0, 0, 1, 0]\n", - " ])\n", - "\n", - "\n", - "def X():\n", - " return np.array([[0, 1], [1, 0]])\n", - "\n", - "def Z():\n", - " return np.array([[1, 0], [0, -1]])\n", - "\n", - "def Y():\n", - " return 1j * X() @ Z()\n", - "\n", - "def H():\n", - " return 1 / np.sqrt(2) * (X() + Z())\n", - "\n", - "\n", - "def Z_spider(alpha, n=1, m=1):\n", - "\n", - " zeros_in = multi_kron([null()] * m)\n", - " zeros_out = multi_kron([null()] * n)\n", - "\n", - " ones_in = multi_kron([one()] * m)\n", - " ones_out = multi_kron([one()] * n)\n", - "\n", - " return np.outer(zeros_in, zeros_out) + np.exp(1j*alpha) * np.outer(ones_in, ones_out)\n", - "\n", - "def X_spider(alpha, n=1, m=1):\n", - "\n", - " plus_in = multi_kron([plus()] * m)\n", - " plus_out = multi_kron([plus()] * n)\n", - "\n", - " minus_in = multi_kron([minus()] * m)\n", - " minus_out = multi_kron([minus()] * n)\n", - "\n", - " return np.outer(plus_in, plus_out) + np.exp(1j*alpha) * np.outer(minus_in, minus_out)\n", - "\n", - "\n", - "# Projection operators\n", - "def P0():\n", - " return np.outer(ket0(), ket0())\n", - "\n", - "def P1():\n", - " return np.outer(ket1(), ket1())\n", - "\n", - "def CONTROL(U):\n", - " return np.kron(P0(), np.eye(2)) + np.kron(P1(), U)\n", - "\n", - "def CONTROL_L(U):\n", - " return np.kron(np.eye(2), P0()) + np.kron(U, P1())\n", - "\n", - "def ket0():\n", - " \"\"\"Representing spin up\"\"\"\n", - " return np.array([1, 0])\n", - "\n", - "def ket1():\n", - " \"\"\"Representing spin down\"\"\"\n", - " return np.array([0, 1])\n", - "\n", - "def bin2dec(b):\n", - " return int(''.join(str(i) for i in b), 2)\n", - "\n", - "def dec2bin(d, L):\n", - " return [int(i) for i in bin(d)[2:].zfill(L)]\n", - "\n", - "def ket(i, dim=2):\n", - " assert 0 <= i < dim\n", - " return np.eye(dim)[:, i]\n", - "\n", - "def ket_bin(b):\n", - " dec = bin2dec(b)\n", - " dim = 2 ** len(b)\n", - " return ket(dec, dim)\n", - "\n", - "def make_state(L):\n", - " # L times spin down, 1 time spin up\n", - " return np.array([ket1()] * L + [ket0()])\n", - "\n", - "def measure_operator(op, state, qbit_index):\n", - " # measure the expectation value of the operator op on the qbit_index-th qbit of the state\n", - " # return the expectation value and the normalized state\n", - " # op is a 2x2 matrix\n", - " # state is a 2^L dimensional vector\n", - " # qbit_index is an integer\n", - " # return a tuple (expectation_value, normalized_state)\n", - "\n", - " n_qbits = int(np.log2(len(state)))\n", - " assert n_qbits > qbit_index\n", - " assert op.shape == (2, 2)\n", - "\n", - " ops = [np.eye(2)] * n_qbits\n", - " ops[qbit_index] = op\n", - "\n", - " op = None\n", - " for i in range(n_qbits):\n", - " if op is None:\n", - " op = ops[i]\n", - " else:\n", - " op = np.kron(op, ops[i])\n", - "\n", - " unnormalized_expectation_value = np.conj(state) @ op @ state\n", - " norm = np.sqrt(np.conj(state) @ state)\n", - "\n", - " return np.real(unnormalized_expectation_value), np.real(norm)\n", - "\n", - "\n", - "def batched_expectation_value(op, state, qbit_indices):\n", - " return [measure_operator(op, state, qbit_index) for qbit_index in qbit_indices]\n" - ], - "id": "70f159042b2b5568", - "outputs": [], - "execution_count": 75 - }, - { - "metadata": { - "ExecuteTime": { - "end_time": "2025-02-12T09:30:09.772938Z", - "start_time": "2025-02-12T09:30:09.765310Z" - } - }, - "cell_type": "code", - "source": [ - "phi = 0.1327\n", - "theta = 0.14\n", - "\n", - "U = CONTROL(Rx(theta))\n", - "U_L = CONTROL_L(Rx(theta))\n", - "state = np.kron(ket1(), ket1())\n", - "\n", - "state_t = U_L @ U @ state\n", - "expectations = batched_expectation_value(P1(), state_t, [0, 1])\n", - "expectations = np.array([np.real(e / norm) for e, norm in expectations])\n", - "print(f\"Normalized expectation values: {expectations}\")" - ], - "id": "60138f2ecb0c7f00", - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Normalized expectation values: [0.99513193 0.995108 ]\n" - ] - } - ], - "execution_count": 76 - }, - { - "metadata": { - "ExecuteTime": { - "end_time": "2025-02-12T09:30:09.811442Z", - "start_time": "2025-02-12T09:30:09.792107Z" - } - }, - "cell_type": "code", - "source": [ - "def apply_local_op(op, i, M):\n", - " \"\"\"\n", - " Apply a local operator op to site i of an mps tensor M.\n", - " \"\"\"\n", - " M[i] = np.einsum(\"ij,aib->ajb\", op, M[i])\n", - "\n", - "def wavefunction(M):\n", - " L, D, d, _ = M.shape\n", - "\n", - " # Initialize the left boundary as a 2-D array with shape (1, D).\n", - " left = np.zeros((1, D))\n", - " left[0, 0] = 1.0 # v_left = [1, 0, ..., 0]\n", - "\n", - " # Iteratively contract each site.\n", - " # At each step, left has shape (d**site, D).\n", - " for site in range(L):\n", - " # M[site] has shape (D, d, D)\n", - " # We want to contract the bond index of left (axis 1) with the first index of M[site].\n", - " # The contraction below yields an array of shape (d**site, d, D).\n", - " left = np.tensordot(left, M[site], axes=([1], [0]))\n", - " # Merge the accumulated physical indices into one axis.\n", - " left = left.reshape(-1, D) # Now left has shape (d**(site+1), D)\n", - "\n", - " # The right boundary: a vector of shape (D,) with a 1 in the first entry.\n", - " right = np.zeros((D,))\n", - " right[0] = 1.0 # v_right = [1, 0, ..., 0]\n", - "\n", - " # Final contraction: contract the bond index of left with the right boundary.\n", - " psi = np.dot(left, right) # psi has shape (d**L,)\n", - "\n", - " return psi\n", - "\n", - "def expectation_site(O, site, M, v_left=None, v_right=None):\n", - " L, D, d, _ = M.shape\n", - " # Default boundaries: choose the minimal subspace.\n", - " if v_left is None:\n", - " v_left = np.zeros(D, dtype=complex)\n", - " v_left[0] = 1.0\n", - " if v_right is None:\n", - " v_right = np.zeros(D, dtype=complex)\n", - " v_right[0] = 1.0\n", - "\n", - " E_left = np.outer(v_left, v_left.conjugate())\n", - "\n", - " # Contract sites 0 to site-1 to build the left environment.\n", - " for j in range(site):\n", - " E_left = np.einsum(\"xy,xsz,ysw->zw\", E_left, M[j], M[j].conjugate())\n", - "\n", - " E_right = np.outer(v_right, v_right.conjugate())\n", - " for j in range(L-1, site, -1):\n", - " E_right = np.einsum(\"asz,bsw,zw->ab\", M[j], M[j].conjugate(), E_right)\n", - " F = np.einsum(\"asb,st,atd->bd\", M[site], O, M[site].conjugate())\n", - "\n", - " val = np.einsum(\"ab,ab->\", E_left, F * E_right)\n", - " return np.real(val)\n", - "\n", - "\n", - "def expectation_sites(O, sites, M, v_left=None, v_right=None):\n", - " expectations = []\n", - " for site in sites:\n", - " expectations.append(expectation_site(O, site, M, v_left, v_right))\n", - " return np.array(expectations)\n", - "\n", - "def canonical_form(M):\n", - " L = len(M) # Number of tensors (sites)\n", - "\n", - " # Sweep from left to right: to bring the mps into left-canonical form.\n", - " for i in range(1, L-1):\n", - " # The tensor has shape (D, d, D), so we reshape it to (D * d, D)\n", - " tensor = M[i]\n", - " print(f\"{tensor.shape = }\")\n", - " reshaped_tensor = tensor.reshape(-1, tensor.shape[2]) # Shape (D * d, D)\n", - " print(f\"{reshaped_tensor.shape = }\")\n", - "\n", - " # Perform QR decomposition: reshaped_tensor = U * S\n", - " # U has dimensions (D*d, D), S has dimensions (D, D)\n", - " Q, R = np.linalg.qr(reshaped_tensor)\n", - "\n", - " # apply S to the right tensor\n", - " M[i+1] = np.einsum(\"ij,jkl->ikl\", R, M[i+1])\n", - "\n", - " # Reshape Q to (D, d, new_D) and update tensor\n", - " M[i] = Q.reshape(tensor.shape[0], tensor.shape[1], -1)\n", - " print(f\"{M[i].shape = }\")\n", - "\n", - " Ss = []\n", - " # Sweep from right to left: to compute singular values\n", - " for i in range(L-1, -1, -1):\n", - " print(f\"{i = }\")\n", - " tensor = M[i]\n", - " reshaped_tensor = tensor.reshape(tensor.shape[0], -1) # Shape (D, d * D)\n", - " # svd: reshaped_tensor = U * S * V^T\n", - " U, S, V = np.linalg.svd(reshaped_tensor, full_matrices=False)\n", - "\n", - " # store singular values\n", - " Ss.append(S)\n", - "\n", - " # TODO cutoff and renormalize\n", - "\n", - " # apply U to the left tensor M_i-1 @ U\n", - " M[i-1] = np.einsum(\"ijk,kl->ijl\", M[i-1], U)\n", - "\n", - " # store V in the right tensor M_i\n", - " M[i] = V.reshape(tensor.shape[0], tensor.shape[1], -1)\n", - "\n", - " return M, Ss" - ], - "id": "174f6e4c5482a128", - "outputs": [], - "execution_count": 77 - }, - { - "metadata": { - "ExecuteTime": { - "end_time": "2025-02-12T09:30:09.835266Z", - "start_time": "2025-02-12T09:30:09.828843Z" - } - }, - "cell_type": "code", - "source": [ - "# Set the mps parameters\n", - "d = 2; D = 2; L = 4\n", - "\n", - "# Initialize the mps tensor\n", - "M = np.zeros((L,D,d,D))\n", - "\n", - "# initialize to |00..0>\n", - "for i in range(L):\n", - " M[i][0][0][0] = 1\n", - "\n", - "apply_local_op(X(), 3, M)\n", - "wavefunc = wavefunction(M)\n", - "\n", - "exp_site_3 = expectation_sites(P0(), range(L), M)\n", - "print(exp_site_3)\n", - "\n", - "M_i, S = canonical_form(M)\n", - "\n", - "print(S)" - ], - "id": "a59b0ff20eea8580", - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "[1. 1. 1. 0.]\n", - "tensor.shape = (2, 2, 2)\n", - "reshaped_tensor.shape = (4, 2)\n", - "M[i].shape = (2, 2, 2)\n", - "tensor.shape = (2, 2, 2)\n", - "reshaped_tensor.shape = (4, 2)\n", - "M[i].shape = (2, 2, 2)\n", - "i = 3\n", - "i = 2\n", - "i = 1\n", - "i = 0\n", - "[array([1., 0.]), array([1.41421356, 0. ]), array([1.41421356, 0. ]), array([1., 0.])]\n" - ] - } - ], - "execution_count": 78 - }, - { - "metadata": { - "ExecuteTime": { - "end_time": "2025-02-12T09:30:09.936898Z", - "start_time": "2025-02-12T09:30:09.908725Z" - } - }, - "cell_type": "code", - "source": [ - "def contract_MPS_MPO_expectation(psi, O):\n", - " \"\"\"\n", - " Compute the contraction for one site corresponding to\n", - " \n", - "\n", - " Parameters:\n", - " psi : 3D array, mps tensor of shape (D, d, D)\n", - " O : 2D array, local operator of shape (d, d)\n", - "\n", - " Returns:\n", - " result : float, the expectation value \n", - " \"\"\"\n", - "\n", - " ket = np.einsum(\"ij,kjl->ikl\", O, psi)\n", - " bra = psi.conj()\n", - "\n", - " # inner product \n", - " return np.vdot(bra, ket)\n", - "\n", - "class Site:\n", - " def __init__(self, d, D):\n", - " self.d = d\n", - " self.D = D\n", - "\n", - " self.tensor = np.zeros((D, d, D))\n", - " self.tensor[0, 0, 0] = 1.0 # Initialize to |0>\n", - "\n", - " self.S = None # np.zeros((D, D))\n", - " self.canonical = False\n", - "\n", - "class MPS:\n", - " sites = [] # List of Site objects\n", - "\n", - " def __init__(self, L, chi):\n", - " self.L = L\n", - " self.d = 2\n", - " self.chi = chi\n", - " self.sites = [Site(d, 1) for _ in range(L+1)]\n", - "\n", - " def apply_local_op(self, i, op):\n", - " \"\"\"\n", - " Apply a local operator op to site i of the mps.\n", - " \"\"\"\n", - " self.sites[i].tensor = np.einsum(\"ij,aib->ajb\", op, self.sites[i].tensor)\n", - "\n", - " def canonical_form(self):\n", - "\n", - " for i in range(0, self.L-1):\n", - " # The tensor has shape (D, d, D), so we reshape it to (D * d, D)\n", - " tensor = self.sites[i].tensor\n", - " reshaped_tensor = tensor.reshape(-1, tensor.shape[2])\n", - "\n", - " # Perform QR decomposition: reshaped_tensor = U * S\n", - " # U has dimensions (D*d, D), S has dimensions (D, D)\n", - " Q, R = np.linalg.qr(reshaped_tensor)\n", - "\n", - " # apply S to the right tensor\n", - " self.sites[i+1].tensor = np.einsum(\"ij,jkl->ikl\", R, self.sites[i+1].tensor)\n", - "\n", - " # Reshape Q to (D, d, new_D) and update tensor\n", - " self.sites[i].tensor = Q.reshape(tensor.shape[0], tensor.shape[1], -1)\n", - "\n", - " # right boundary singular values\n", - " # self.sites[self.L].S = np.array([1.0])\n", - "\n", - " for i in range(self.L-1, -1, -1):\n", - " tensor = self.sites[i].tensor\n", - " reshaped_tensor = tensor.reshape(tensor.shape[0], -1)\n", - " # svd: reshaped_tensor = U * S * V^T\n", - " U, S, V = np.linalg.svd(reshaped_tensor, full_matrices=False)\n", - "\n", - " # store singular values\n", - " self.sites[i].S = S / np.linalg.norm(S)\n", - "\n", - " # store V in the right tensor M_i\n", - " self.sites[i].tensor = V.reshape(tensor.shape[0], tensor.shape[1], -1)\n", - "\n", - " if i > 0:\n", - " # apply U to the left tensor M_i-1 @ U\n", - " self.sites[i-1].tensor = np.einsum(\"ijk,kl->ijl\", self.sites[i-1].tensor, U)\n", - "\n", - " # save phase\n", - " # assert (U[0, 0] == 1)\n", - " self.sites[0].tensor *= U[0, 0]\n", - " if U[0, 0] != 1:\n", - " print(f\"Phase: {U[0, 0]}\")\n", - "\n", - " # left boundary singular values\n", - " # self.sites[0].S = np.array([1.0])\n", - " self.sites[self.L].S = self.sites[0].S\n", - "\n", - "\n", - " def get_B(self, i, form=(True, True)):\n", - " a, b = form\n", - "\n", - " B = self.sites[i].tensor\n", - " SL = np.diag(self.sites[i].S)\n", - " SR = np.diag(self.sites[i+1].S)\n", - "\n", - " if a:\n", - " B = np.einsum(\"ij,jkl->ikl\", SL, B)\n", - "\n", - " if b:\n", - " B = np.einsum(\"ijk,kl->ijl\", B, SR)\n", - "\n", - " return B\n", - "\n", - " def theta(self, i, n=2):\n", - " \"\"\"Calculates the `n`-site wavefunction on ``sites[i:i+n]``.\n", - "\n", - " Parameters\n", - " ----------\n", - " i : int\n", - " Site index.\n", - " n : int\n", - " Number of sites. The result lives on ``sites[i:i+n]``.\n", - " \"\"\"\n", - " if n == 1:\n", - " return self.get_B(i, form=(True, True))\n", - " if n == 2:\n", - " b1 = self.get_B(i, form=(True, False))\n", - " b2 = self.get_B(i+1, form=(True, True))\n", - " return np.tensordot(b1, b2, axes=1)\n", - " else:\n", - " raise NotImplementedError(\"n > 2 not implemented\")\n", - "\n", - " def apply_2site_op(self, i, O):\n", - " \"\"\"\n", - " Apply a 2-site operator O to sites i and i+1.\n", - "\n", - " Parameters\n", - " ----------\n", - " i : int\n", - " Site index.\n", - " O : np.ndarray\n", - " (4x4) operator acting on sites i and i+1.\n", - " \"\"\"\n", - " th = self.theta(i, n=2) # shape (D1, d, d, D2)\n", - "\n", - " # reshape th into (D1, d*d, D2)\n", - " D1, d, _, D2 = th.shape\n", - " th = th.reshape(D1, d*d, D2)\n", - "\n", - " # apply the operator\n", - " th = np.einsum('ij,ajk->aik', O, th)\n", - "\n", - " # Reshape back to (D1, d, d, D2)\n", - " th = th.reshape(D1, d, d, D2)\n", - "\n", - " matrix = th.reshape(d*D1, d*D2) # Shape: (2, 2)\n", - "\n", - " # Perform SVD and truncate to bond dimension 1\n", - " U, S, Vh = np.linalg.svd(matrix, full_matrices=False)\n", - "\n", - " # determine the truncation dimension (bond dimension) based on the singular values\n", - " trunc = np.sum(S > 1e-12) # number of singular values larger than 1e-12\n", - " if trunc > self.chi:\n", - " trunc = self.chi\n", - "\n", - " U = U[:, :trunc]\n", - " S = S[:trunc]\n", - " Vh = Vh[:trunc, :]\n", - "\n", - " # Absorb singular values into one tensor\n", - " A_new = U.reshape(D1, d, trunc)\n", - " B_new = Vh.reshape(trunc, d, D2)\n", - "\n", - " self.sites[i].tensor = A_new\n", - " self.sites[i+1].tensor = B_new\n", - " self.sites[i+1].S = S / np.linalg.norm(S)\n", - "\n", - "\n", - " def expectation_value(self, O, sites):\n", - "\n", - " expectations = []\n", - " for i in sites:\n", - " theta = self.theta(i, n=1)\n", - "\n", - " expectations.append(contract_MPS_MPO_expectation(theta, O))\n", - "\n", - " return np.array(expectations)\n", - "\n", - " def chis(self):\n", - " return [max(self.sites[i].tensor.shape[0], self.sites[i].tensor.shape[2]) for i in range(L)]\n", - "\n", - "psi = MPS(L=4, chi=2)\n", - "psi.canonical_form()\n", - "\n", - "# psi.apply_local_op(1, op=X())\n", - "#psi.apply_local_op(2, op=H())\n", - "# psi.apply_2site_op(2, CNOT())\n", - " # npt.assert_array_almost_equal(psi.expectation_value(P1(), [0, 1, 2, 3]), [0, 1, 0.5, 0.5])\n" - ], - "id": "2904c781c2b93577", - "outputs": [], - "execution_count": 79 - }, - { - "metadata": { - "ExecuteTime": { - "end_time": "2025-02-12T09:54:26.653795Z", - "start_time": "2025-02-12T09:54:26.601733Z" - } - }, - "cell_type": "code", - "source": [ - "from tenpy.networks.mps import MPS as TMPS\n", - "from tenpy.networks.site import SpinHalfSite\n", - "from tenpy.networks.site import kron\n", - "import time\n", - "\n", - "site = SpinHalfSite(conserve=None, sort_charge=False)\n", - "\n", - "theta = 0.1\n", - "\n", - "op_o = np.array([[1., 0.], [0., 0.]])\n", - "op_i = np.array([[0., 0.], [0., 1.]])\n", - "op_X = np.array([[0., 1.], [1., 0.]])\n", - "op_H = H()\n", - "op_Rx = Rx(theta)\n", - "\n", - "site.add_op('X', op_X)\n", - "site.add_op('ii', op_i)\n", - "site.add_op('oo', op_o)\n", - "site.add_op('H', op_H)\n", - "site.add_op('Rx', op_Rx)\n", - "\n", - "L = 3\n", - "\n", - "\n", - "op_CX = kron(site.get_op('oo'), site.get_op('Id'), group=False) + kron(site.get_op('ii'), site.get_op('X'), group=False)\n", - "op_CRX = kron(site.get_op('oo'), site.get_op('Id'), group=False) + kron(site.get_op('ii'), site.get_op('Rx'), group=False)\n", - "op_CLX = kron(site.get_op('Id'), site.get_op('oo'), group=False) + kron(site.get_op('Rx'), site.get_op('ii'), group=False)\n", - "\n", - "mps = TMPS.from_product_state([site] * L, ['up'] * L, 'finite')\n", - "\n", - "mps.apply_local_op(0, \"X\")\n", - "mps.apply_local_op(1, \"X\")\n", - "mps.apply_local_op(2, \"X\")\n", - "\n", - "def printBBS(site, mps):\n", - " print(f\"B_{site}:\\n{mps.get_B(site)._data}\")\n", - " print(f\"B_{site+1}:\\n{mps.get_B(site+1)._data}\")\n", - " print(f\"S_{site+1}:\\n{mps.get_SL(site+1)}\")\n", - "\n", - "mps.apply_local_op(0, op_CRX)\n", - "#printBBS(0, mps)\n", - "print(\"Expectations:\", mps.expectation_value(\"ii\", [0, 1, 2]))\n", - "mps.apply_local_op(1, op_CRX)\n", - "#printBBS(1, mps)\n", - "print(\"Expectations:\", mps.expectation_value(\"ii\", [0, 1, 2]))\n", - "mps.apply_local_op(0, op_CLX)\n", - "#printBBS(0, mps)\n", - "print(\"Expectations:\", mps.expectation_value(\"ii\", [0, 1, 2]))\n", - "mps.apply_local_op(1, op_CLX)\n", - "#printBBS(1, mps)\n", - "print(\"Expectations:\", mps.expectation_value(\"ii\", [0, 1, 2]))\n", - "\n", - "#print(mps.chi)" - ], - "id": "e5310f200a30da4", - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Expectations: [1. 0.99750208 1. ]\n", - "Expectations: [1. 0.99750208 0.99750832]\n", - "Expectations: [0.99750832 0.99750208 0.99750832]\n", - "Expectations: [0.99750832 0.99005196 0.99750832]\n" - ] - } - ], - "execution_count": 88 - }, - { - "metadata": { - "ExecuteTime": { - "end_time": "2025-02-12T09:30:10.034189Z", - "start_time": "2025-02-12T09:30:10.029152Z" - } - }, - "cell_type": "code", - "source": [ - "psi = ket_bin([1, 1, 1])\n", - "theta = 0.1\n", - "op_Rx = Rx(theta)\n", - "op_CRX = CONTROL(op_Rx)\n", - "psi = np.kron(op_CRX, np.eye(2)) @ psi\n", - "psi = np.kron(np.eye(2), op_CRX) @ psi\n", - "\n", - "e, norm = measure_operator(P1(), psi, 2)\n", - "print(f\"{e = } {norm = }\")\n", - "print(f\"Expectation value: {e / norm}\")" - ], - "id": "14148111ce9f52c", - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "e = np.float64(0.9975083222301554) norm = np.float64(1.0)\n", - "Expectation value: 0.9975083222301554\n" - ] - } - ], - "execution_count": 81 - }, - { - "metadata": { - "ExecuteTime": { - "end_time": "2025-02-12T09:30:10.059683Z", - "start_time": "2025-02-12T09:30:10.048863Z" - } - }, - "cell_type": "code", - "source": [ - "def K1(phi):\n", - " return np.array([[0, -1j / 2 * np.sin(phi / 2)], [0, 0]])\n", - "\n", - "def K2(phi):\n", - " return np.array([[1, 0], [0, np.cos(phi / 2)]])\n", - "\n", - "phi = 0.1 #np.pi / 2\n", - "\n", - "psi0 = np.kron(ket0(), ket0())\n", - "psi1 = np.kron(ket1(), ket0())\n", - "psi2 = np.kron(ket0(), ket1())\n", - "\n", - "psi0_k = ket0()\n", - "psi1_k = ket1()\n", - "psi2_k = ket0()\n", - "\n", - "\n", - "psi0 = Rxy(phi) @ psi0\n", - "psi1 = Rxy(phi) @ psi1\n", - "psi2 = np.kron(H(), np.eye(2)) @ psi2\n", - "psi2 = Rxy(phi) @ psi2\n", - "\n", - "# measure the expectation value of the operator P0 on the second qbit of the state\n", - "\n", - "e0, n0 = measure_operator(P0(), psi0, 1)\n", - "e1, n1 = measure_operator(P0(), psi1, 1)\n", - "e2, n2 = measure_operator(P0(), psi2, 1)\n", - "\n", - "e0k, n0k = measure_operator(K2(phi), psi0_k, 0)\n", - "e1k, n1k = measure_operator(K2(phi), psi1_k, 0)\n", - "e2k, n2k = measure_operator(K2(phi), psi2_k, 0)\n", - "\n", - "print(f\"Expectation value of |0x0| on qubit: 1 for Rxy(theta)|00> = {e0 / n0}\")\n", - "print(f\"Expectation value of |0x0| on qubit: 1 for Rxy(theta)|10> = {e1 / n1}\")\n", - "print(f\"Expectation value of |0x0| on qubit: 1 for Rxy(theta)|+0> = {e2 / n2}\")\n", - "\n", - "print(f\"Expectation value of K2 on qubit: 0 for |0> = {e0k / n0k}\")\n", - "print(f\"Expectation value of K2 on qubit: 0 for |1> = {e1k / n1k}\")\n", - "print(f\"Expectation value of K2 on qubit: 0 for |+> = {e2k / n2k}\")" - ], - "id": "cd6ae0f26afe819a", - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Expectation value of |0x0| on qubit: 1 for Rxy(theta)|00> = 1.0\n", - "Expectation value of |0x0| on qubit: 1 for Rxy(theta)|10> = 0.997502082639013\n", - "Expectation value of |0x0| on qubit: 1 for Rxy(theta)|+0> = 0.0012489586804935587\n", - "Expectation value of K2 on qubit: 0 for |0> = 1.0\n", - "Expectation value of K2 on qubit: 0 for |1> = 0.9987502603949663\n", - "Expectation value of K2 on qubit: 0 for |+> = 1.0\n" - ] - } - ], - "execution_count": 82 - }, - { - "metadata": { - "ExecuteTime": { - "end_time": "2025-02-12T09:30:10.078098Z", - "start_time": "2025-02-12T09:30:10.076369Z" - } - }, - "cell_type": "code", - "source": "", - "id": "58a74999e21432a0", - "outputs": [], - "execution_count": null - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 2 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython2", - "version": "2.7.6" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} diff --git a/python/ZX.ipynb b/python/ZX.ipynb deleted file mode 100644 index 08927f6..0000000 --- a/python/ZX.ipynb +++ /dev/null @@ -1,53 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "id": "initial_id", - "metadata": { - "collapsed": true, - "ExecuteTime": { - "end_time": "2025-02-04T11:27:11.354176Z", - "start_time": "2025-02-04T11:27:11.197144Z" - } - }, - "source": [ - "import pyzx as zx\n", - "from pyzx.circuit import Circuit, CZ, RXX\n" - ], - "outputs": [], - "execution_count": 2 - }, - { - "metadata": {}, - "cell_type": "code", - "outputs": [], - "execution_count": null, - "source": [ - "c = Circuit(2)\n", - "c.add_gate(CZ(0,1))\n" - ], - "id": "e3ba7b648ad767e7" - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 2 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython2", - "version": "2.7.6" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} diff --git a/python/main.py b/python/main.py deleted file mode 100644 index 735314f..0000000 --- a/python/main.py +++ /dev/null @@ -1,140 +0,0 @@ -import os -import sys - -sys.path.append("/home/robinpr") -sys.path.append("/home/robinpr/tenpy-main") -from itertools import cycle -import numpy as np -import random as rng -from tenpy.networks.site import SpinHalfSite -from tenpy.networks.mps import MPS -from tenpy.networks.site import kron -from own_TeNPy import update_bond -import logging - -rng = np.random.default_rng(seed=42) -# Set the logging level for TenPy to WARNING to suppress info messages -logging.getLogger('tenpy_git').setLevel(logging.WARNING) - - -class InitialState: - def down_state(L): - # print("Create state |00..1..0>") - state = [] - for i in range(L): - if i == L // 2: - state.append('down'); - else: - state.append('down'); - return state - - -def execute(psi_t_initial, t_final, parameter, Ntraj=1): - L = parameter['L'] - p = parameter['p'] - theta = parameter['theta'] - phi = parameter['phi'] - cos_theta_2 = np.cos(theta / 2) - sin_theta_2 = np.sin(theta / 2) - # print(psi_t_initial) - cos_phi_2 = np.cos(phi / 2) - sin_phi_2 = np.sin(phi / 2) - - site = SpinHalfSite(conserve=None, sort_charge=False) - op_o = np.array([[1., 0.], [0., 0.]]) - op_i = np.array([[0., 0.], [0., 1.]]) - op_Rx = np.array([[cos_theta_2, -1j * sin_theta_2], [-1j * sin_theta_2, cos_theta_2]]) - - op_K1 = np.array([[0., -1j / 2 * sin_phi_2], [0., 0.]]) - - op_K2 = np.array([[1., 0.], [0., cos_phi_2]]) - - site.add_op('Rx', op_Rx) - site.add_op('ii', op_i) - site.add_op('oo', op_o) - site.add_op('K1', op_K1) - site.add_op('K2', op_K2) - - opR = kron(site.get_op('oo'), site.get_op('Id'), group=False) + kron(site.get_op('ii'), site.get_op('Rx'), - group=False) - opL = kron(site.get_op('Id'), site.get_op('oo'), group=False) + kron(site.get_op('Rx'), site.get_op('ii'), - group=False) - - #with open(output_file, 'w') as f: - for _ in range(Ntraj): - psi_t = MPS.from_product_state([site] * L, psi_t_initial, 'finite') - state_list = [np.sum(psi_t.expectation_value('ii')) / L] - - #pbar = tqdm.tqdm(range(t_final - 1)) - for t in range(t_final - 1): - # Maximal optimierte Variante - # prob = psi_t.expectation_value('K2') - # decisions = rng.random(size=L) > prob - # print(f"{np.mean(prob) = }", f"{np.sum(decisions) = }") - # - # # Batch-Weise Anwendung (keine Parallelität, aber vektorisiert) - # for op, indices in [('K1', np.where(decisions)[0]), - # ('K2', np.where(~decisions)[0])]: - # for i in indices: - # psi_t.apply_local_op(i, op, unitary=False) - - - # 1. Definition der Update-Sequenz als Zyklus - update_sequence = [ - (range(0, L - 1, 2), opR), # Phase 1: Gerade Bonds mit opR - (range(1, L - 1, 2), opL), # Phase 2: Ungerade Bonds mit opL - (range(1, L - 1, 2), opR), # Phase 3: Ungerade Bonds mit opR - (range(0, L - 1, 2), opL) # Phase 4: Gerade Bonds mit opL - ] - - # 2. Komprimierte Schleife mit paralleler Indexgenerierung - for indices, op in update_sequence: - for i in indices: - update_bond.SVD_based(psi_t, i, op, parameter['tebd_params']['trunc_params']) - state_list.append(np.sum(psi_t.expectation_value('ii')) / L) - #print(f"{t = }, max(chi) = {max(psi_t.chi)}, mean(chi) = {sum(psi_t.chi) / L:.2f}, mean(ii) = {state_list[-1]:.4f}") - print(f"{t = }, max(chi) = {max(psi_t.chi)}, mean(chi) = {sum(psi_t.chi) / L:.2f}, mean(P1) = {state_list[-1]:.4f}") - #print(np.array(state_list)) - - #f.write(str(state_list) + '\n') - print(np.array(state_list)) - return - - -if __name__ == '__main__': - np.set_printoptions(precision=3) - parameter = { - 'state_info': 'down_state', - 'theta': 0.14, - 'phi': 0.1222, - 'p': 1, - 'L': 30, - 'model_params': { - 'bc_MPS': 'finite', - 'omega': 6, - 'gamma': 1, - }, - 'tebd_params': { - 'N_steps': 2, - 'dt': 0.05, - 'order': 1, - 'trunc_params': {'chi_max': 1224, 'svd_min': 1.e-12} - } - } - # param_theta = float(parameter['theta']) - # param_p = str(parameter['p']) - # base_output_file = f"long_output_theta_{param_theta:.3f}_phi0.185.txt" - - # Check if the file already exists and append a counter if necessary - # output_file = base_output_file - # counter = 1 - # while os.path.exists(output_file): - # output_file = f"long_output_theta_{param_theta:.3f}_{counter}_phi0.185.txt" - # counter += 1 - - import time - - start = time.time() - execute(InitialState.down_state(parameter['L']), t_final=31, parameter=parameter, Ntraj=1) - duration = time.time() - start - print(f"Duration: {duration:.6f} seconds") \ No newline at end of file diff --git a/python/mps/mps/mps.py b/python/mps.py similarity index 68% rename from python/mps/mps/mps.py rename to python/mps.py index 844f034..972495e 100644 --- a/python/mps/mps/mps.py +++ b/python/mps.py @@ -1,5 +1,81 @@ import numpy as np +import numpy as np + +def truncate(P, mindim=None, maxdim=None, cutoff=1E-12, use_absolute_cutoff=False, use_relative_cutoff=True): + """ + Truncates a 1D numpy array based on mindim, maxdim, and a cutoff. + + Args: + P (np.ndarray): The input vector to truncate. + mindim (int, optional): The minimum allowed dimension. + maxdim (int, optional): The maximum allowed dimension. + cutoff (float, optional): The cutoff value for truncation. Defaults to 1E-12. + use_absolute_cutoff (bool, optional): If True, use an absolute cutoff. Defaults to False. + use_relative_cutoff (bool, optional): If True, use a relative cutoff. Defaults to True. + + Returns: + tuple: A tuple containing the truncated array, the truncation error, and the cutoff value used. + """ + P = P.copy() + + if mindim is None: + mindim = 1 + if maxdim is None: + maxdim = len(P) + + # Handle negative values by taking the absolute value + P = np.abs(P) + + origm = len(P) + docut = 0.0 + + if origm == 1: + docut = abs(P[0]) / 2 + return P, 0.0, docut + + # Zero out any negative weights from the end + for i in range(origm - 1, -1, -1): + if P[i] >= 0: + break + P[i] = 0 + + n = origm + truncerr = 0.0 + + # Truncate to maxdim + while n > maxdim: + truncerr += P[n - 1] + n -= 1 + + if use_absolute_cutoff: + # Test if individual values fall below cutoff + while P[n - 1] <= cutoff and n > mindim: + truncerr += P[n - 1] + n -= 1 + else: # use_relative_cutoff + scale = np.sum(P) + if scale == 0.0: + scale = 1.0 + + # Continue truncating until sum of discarded weight reaches cutoff + while (truncerr + P[n - 1] <= cutoff * scale) and (n > mindim): + truncerr += P[n - 1] + n -= 1 + + truncerr /= scale + + if n < 1: + n = 1 + + if n < origm: + docut = (P[n - 1] + P[n]) / 2.0 + if abs(P[n - 1] - P[n]) < 1e-3 * P[n - 1]: + docut += 1e-3 * P[n - 1] + # print(f"Truncated from {origm} to {n} with error {truncerr} (cutoff {cutoff}, docut {docut})") + P_new = P[:n] + + return P_new, truncerr, docut def contract_MPS_MPO_expectation(psi, O): """ @@ -129,7 +205,7 @@ def theta(self, i, n=2): else: raise NotImplementedError("n > 2 not implemented") - def apply_2site_op(self, i, O): + def apply_2site_op(self, i, O, cutoff=1E-12): """ Apply a 2-site operator O to sites i and i+1. @@ -158,11 +234,13 @@ def apply_2site_op(self, i, O): # Perform SVD and truncate to bond dimension 1 U, S, Vh = np.linalg.svd(Oth, full_matrices=True) - V = np.conj(Vh).T + #V = np.conj(Vh).T + # print(f"Site = {i}, original S = [{', '.join(f'{x:.8f}' for x in S)}]") + + S_new, truncerr, docut = truncate(np.pow(S, 2), maxdim=self.chi, cutoff=cutoff, use_relative_cutoff=True) - # determine the truncation dimension (bond dimension) based on the singular values - trunc = np.sum(S > 1e-12) # number of singular values larger than 1e-12 - trunc = min(trunc, self.chi) + # The new truncation dimension is simply the length of the new singular value array + trunc = len(S_new) U = U[:, :trunc] S = S[:trunc] @@ -190,4 +268,4 @@ def expectation_value(self, O, sites): return np.array(expectations) def chis(self): - return [max(self.sites[i].tensor.shape[0], self.sites[i].tensor.shape[2]) for i in range(self.L)] + return [self.sites[i].tensor.shape[2] for i in range(self.L - 1)] diff --git a/python/mps/mps/__init__.py b/python/mps/mps/__init__.py deleted file mode 100644 index a2cd0c7..0000000 --- a/python/mps/mps/__init__.py +++ /dev/null @@ -1,2 +0,0 @@ -from .mps import * -from .operators import * \ No newline at end of file diff --git a/python/mps/tests/util.py b/python/mps/tests/util.py deleted file mode 100644 index a8ff576..0000000 --- a/python/mps/tests/util.py +++ /dev/null @@ -1,75 +0,0 @@ -import numpy as np -import re - - -def parse_complex_array_from_string(s): - """ - Parse a string representing a complex-valued array. - - This function first attempts to evaluate the string directly. If that fails - (for example, if commas are missing), it uses a simple regex-based approach - to split rows and columns. - """ - try: - # Try to evaluate the string as a Python literal. - arr = np.array(eval(s)) - return arr - except Exception: - # Remove outer brackets. - s = s.strip() - if s.startswith('[') and s.endswith(']'): - s = s[1:-1] - # Split into rows (using newlines or the pattern "],["). - rows = re.split(r'\]\s*,?\s*\[', s) - # Remove any extra brackets. - rows = [row.replace('[', '').replace(']', '') for row in rows] - data = [] - for row in rows: - # Split by whitespace. - items = row.split() - # Convert each item into a complex number. - row_data = [complex(item) for item in items] - data.append(row_data) - arr = np.array(data) - return arr - - -def main(): - # First string: includes commas. - s1 = """[[-1.71e-09-0.0499j, 3.42e-08+0.998j, 0.0499-3.25e-09j, 6.22e-06-7.99e-12j], - [2.77e-18+0.00249j, -5.53e-17-0.0498j, 0.999-3.09e-08j, 0.000125+4.12e-12j], - [-0.999+5.86e-17j, -0.0499-1.17e-15j, 1.49e-08-4.69e-15j, -0.000125-5.8e-19j], - [-0.000125+1.66e-13j, -6.22e-06-3.32e-12j, -0.000125+7.58e-12j, 1+9.46e-16j]]""" - - # Second string: space-separated entries. - s2 = """[[ 1.370e-16-4.986e-02j 2.740e-15-9.975e-01j -4.985e-02-2.591e-23j 6.216e-06-2.078e-19j] - [ 0.000e+00-2.489e-03j 0.000e+00-4.979e-02j 9.988e-01-2.744e-15j -1.245e-04+1.237e-19j] - [-9.988e-01+4.135e-18j 4.992e-02+8.274e-17j 1.555e-08-2.591e-23j 1.247e-04+3.230e-27j] - [ 1.245e-04-2.195e-22j -6.224e-06-4.392e-21j 1.247e-04-2.078e-19j 1.000e+00+2.591e-23j]]""" - - # Parse the strings. - arr1 = np.array(eval(s1)) - arr2 = parse_complex_array_from_string(s2) - - # Print flattened arrays. - print("Array 1 (flattened):") - print(arr1.flatten()) - print("\nArray 2 (flattened):") - print(arr2.flatten()) - - # Convert both to double precision for comparison. - arr1_d = np.abs(arr1.astype(np.complex128)) - arr2_d = np.abs(arr2.astype(np.complex128)) - - tol = 1e-3 - if np.allclose(arr1_d.flatten(), arr2_d.flatten(), atol=tol): - print("\nArrays are equal within tolerance", tol) - else: - print("\nArrays differ!") - diff = np.abs(arr1_d.flatten() - arr2_d.flatten()) - print("Differences:") - print(diff) - - -if __name__ == "__main__": - main() \ No newline at end of file diff --git a/python/mps/mps/operators.py b/python/operators.py similarity index 100% rename from python/mps/mps/operators.py rename to python/operators.py diff --git a/python/own_TeNPy.py b/python/own_TeNPy.py deleted file mode 100644 index cbc5816..0000000 --- a/python/own_TeNPy.py +++ /dev/null @@ -1,801 +0,0 @@ -""" -Not my own code, I only changed parts of algorithms to use it for my purpose -original code: https://github.com/tenpy/tenpy -""" - -import numpy as np -import logging -logger = logging.getLogger(__name__) - -from tenpy.linalg import np_conserved as npc -from tenpy.algorithms.truncation import svd_theta, TruncationError, truncate -from tenpy.algorithms.tebd import TEBDEngine - -from tenpy.algorithms.algorithm import TimeEvolutionAlgorithm -import warnings -import time -class update_bond: - @staticmethod - def SVD_based(psi, i, U_bond, trunc_params): - - """Updates the B matrices on a given bond. - - Function that updates the B matrices, the bond matrix s between and the - bond dimension chi for bond i. The corresponding tensor networks look like this:: - - | --S--B1--B2-- --B1--B2-- - | | | | | - | theta: U_bond C: U_bond - | | | | | - - Parameters - ---------- - i : int - Bond index; we update the matrices at sites ``i-1, i``. - U_bond : :class:`~tenpy.linalg.np_conserved.Array` - The bond operator which we apply to the wave function. - We expect labels ``'p0', 'p1', 'p0*', 'p1*'``. - - Returns - ------- - trunc_err : :class:`~tenpy.algorithms.truncation.TruncationError` - The error of the represented state which is introduced by the truncation - during this update step. - """ - i0, i1 = i , i +1 - logger.debug("Update sites (%d, %d)", i0, i1) - # Construct the theta matrix - C = psi.get_theta(i0, n=2, formL=0.) # the two B without the S on the left - C = npc.tensordot(U_bond, C, axes=(['p0*', 'p1*'], ['p0', 'p1'])) # apply U - C.itranspose(['vL', 'p0', 'p1', 'vR']) - theta = C.scale_axis(psi.get_SL(i0), 'vL') - # now theta is the same as if we had done - # theta = psi.psi.get_theta(i0, n=2) - # theta = npc.tensordot(U_bond, theta, axes=(['p0*', 'p1*'], ['p0', 'p1'])) # apply U - # but also have C which is the same except the missing "S" on the left - # so we don't have to apply inverses of S (see below) - - theta = theta.combine_legs([('vL', 'p0'), ('p1', 'vR')], qconj=[+1, -1]) - # Perform the SVD and truncate the wavefunction - U, S, V, trunc_err, renormalize = svd_theta(theta, - trunc_params, - [psi.get_B(i0, None).qtotal, None], - inner_labels=['vR', 'vL']) - - # Split tensor and update matrices - B_R = V.split_legs(1).ireplace_label('p1', 'p') - - # In general, we want to do the following: - # U = U.iscale_axis(S, 'vR') - # B_L = U.split_legs(0).iscale_axis(self.psi.get_SL(i0)**-1, 'vL') - # B_L = B_L.ireplace_label('p0', 'p') - # i.e. with SL = self.psi.get_SL(i0), we have ``B_L = SL**-1 U S`` - # - # However, the inverse of SL is problematic, as it might contain very small singular - # values. Instead, we use ``C == SL**-1 theta == SL**-1 U S V``, - # such that we obtain ``B_L = SL**-1 U S = SL**-1 U S V V^dagger = C V^dagger`` - # here, C is the same as theta, but without the `S` on the very left - # (Note: this requires no inverse if the MPS is initially in 'B' canonical form) - B_L = npc.tensordot(C.combine_legs(('p1', 'vR'), pipes=theta.legs[1]), - V.conj(), - axes=['(p1.vR)', '(p1*.vR*)']) - B_L.ireplace_labels(['vL*', 'p0'], ['vR', 'p']) - B_L /= renormalize # re-normalize to = 1 - #psi.norm *= renormalize - psi.set_SR(i0, S) - psi.set_B(i0, B_L, form='B') - psi.set_B(i1, B_R, form='B') - return trunc_err - - @staticmethod - def _expansion_rate(psi,chi,cbe_expand, i): - """get expansion rate for updating bond i""" - - expand = cbe_expand#trunc_params.get('cbe_expand', None) - expand_0 = None#trunc_params.get('cbe_expand_0', None) - if expand_0 is None or expand_0 == expand: - return expand - - chi_max = chi#trunc_params.get('chi_max', None) - if chi_max is None: - raise ValueError('Need to specify trunc_params["chi_max"] in order to use cbe_expand_0.') - - chi = min(psi.get_SL(i).shape) - return max(expand_0 - chi / chi_max * (expand_0 - expand), expand) - - @staticmethod - def update_bond_QR(psi, i, U_bond,chi,cbe_expand,trunc_params): - i0, i1 = i - 1, i - expand = update_bond._expansion_rate(psi,chi,cbe_expand,i) - logger.debug(f'Update sites ({i0}, {i1}). CBE expand={expand}') - # Construct the theta matrix - C = psi.get_theta(i0, n=2, formL=0.) # the two B without the S on the left - C = npc.tensordot(U_bond, C, axes=(['p0*', 'p1*'], ['p0', 'p1'])) # apply U - C.itranspose(['vL', 'p0', 'p1', 'vR']) - theta = C.scale_axis(psi.get_SL(i0), 'vL') - theta = theta.combine_legs([('vL', 'p0'), ('p1', 'vR')], qconj=[+1, -1]) - - min_block_increase = 1#trunc_params.get('cbe_min_block_increase', 1) - Y0 = QRBasedTEBDEngine._qr_tebd_cbe_Y0(B_L=psi.get_B(i0, 'B'), B_R=psi.get_B(i1, 'B'), theta=theta, - expand=expand, min_block_increase=min_block_increase) - A_L, S, B_R, trunc_err, renormalize = QRBasedTEBDEngine._qr_based_decomposition( - theta=theta, Y0=Y0, use_eig_based_svd=False,#trunc_params.get('use_eig_based_svd', False), - need_A_L=False, compute_err=False,#trunc_params.get('compute_err', False), - trunc_params=trunc_params) - B_L = npc.tensordot(C.combine_legs(('p1', 'vR'), pipes=theta.legs[1]), - B_R.conj(), - axes=[['(p1.vR)'], ['(p*.vR*)']]) / renormalize - B_L.ireplace_labels(['p0', 'vL*'], ['p', 'vR']) - B_R = B_R.split_legs(1) - psi.norm *= renormalize - psi.set_B(i0, B_L, form='B') - psi.set_SL(i1, S) - psi.set_B(i1, B_R, form='B') - return trunc_err - -class QRBasedTEBDEngine(TEBDEngine): - r"""Version of TEBD that relies on QR decompositions rather than SVD. - - As introduced in :arxiv:`2212.09782`. - - .. todo :: - To use `use_eig_based_svd == True`, which makes sense on GPU only, we need to implement - the `_eig_based_svd` for "non-square" matrices. - This means that :math:`M^{\dagger} M` and :math:`M M^{\dagger}` dont have the same size, - and we need to disregard those eigenvectors of the larger one, that have eigenvalue zero, - since we dont have corresponding eigenvalues of the smaller one. - - Options - ------- - .. cfg:config :: QRBasedTEBDEngine - :include: TEBDEngine - - cbe_expand : float - Expansion rate. The QR-based decomposition is carried out at an expanded bond dimension - ``eta = (1 + cbe_expand) * chi``, where ``chi`` is the bond dimension before the time step. - Default is `0.1`. - cbe_expand_0 : float - Expansion rate at low ``chi``. - If given, the expansion rate decreases linearly from ``cbe_expand_0`` at ``chi == 1`` - to ``cbe_expand`` at ``chi == trunc_params['chi_max']``, then remains constant. - If not given, the expansion rate is ``cbe_expand`` at all ``chi``. - cbe_min_block_increase : int - Minimum bond dimension increase for each block. Default is `1`. - use_eig_based_svd : bool - Whether the SVD of the bond matrix :math:`\Xi` should be carried out numerically via - the eigensystem. This is faster on GPUs, but less accurate. - It makes no sense to do this on CPU. It is currently not supported for update_imag. - Default is `False`. - compute_err : bool - Whether the truncation error should be computed exactly. - Compared to SVD-based TEBD, computing the truncation error is significantly more expensive. - If `True` (default), the full error is computed. - Otherwise, the truncation error is set to NaN. - """ - - def _expansion_rate(self, i): - """get expansion rate for updating bond i""" - expand = self.options.get('cbe_expand', 0.1) - expand_0 = self.options.get('cbe_expand_0', None) - - if expand_0 is None or expand_0 == expand: - return expand - - chi_max = self.trunc_params.get('chi_max', None) - if chi_max is None: - raise ValueError('Need to specify trunc_params["chi_max"] in order to use cbe_expand_0.') - - chi = min(self.psi.get_SL(i).shape) - return max(expand_0 - chi / chi_max * (expand_0 - expand), expand) - - def update_bond(self, i, U_bond): - i0, i1 = i - 1, i - expand = self._expansion_rate(i) - logger.debug(f'Update sites ({i0}, {i1}). CBE expand={expand}') - # Construct the theta matrix - C = self.psi.get_theta(i0, n=2, formL=0.) # the two B without the S on the left - C = npc.tensordot(U_bond, C, axes=(['p0*', 'p1*'], ['p0', 'p1'])) # apply U - C.itranspose(['vL', 'p0', 'p1', 'vR']) - theta = C.scale_axis(self.psi.get_SL(i0), 'vL') - theta = theta.combine_legs([('vL', 'p0'), ('p1', 'vR')], qconj=[+1, -1]) - - min_block_increase = self.options.get('cbe_min_block_increase', 1) - Y0 = _qr_tebd_cbe_Y0(B_L=self.psi.get_B(i0, 'B'), B_R=self.psi.get_B(i1, 'B'), theta=theta, - expand=expand, min_block_increase=min_block_increase) - A_L, S, B_R, trunc_err, renormalize = _qr_based_decomposition( - theta=theta, Y0=Y0, use_eig_based_svd=self.options.get('use_eig_based_svd', False), - need_A_L=False, compute_err=self.options.get('compute_err', True), - trunc_params=self.trunc_params - ) - B_L = npc.tensordot(C.combine_legs(('p1', 'vR'), pipes=theta.legs[1]), - B_R.conj(), - axes=[['(p1.vR)'], ['(p*.vR*)']]) #/ renormalize - - #B_L = npc.tensordot(C.combine_legs(('p1', 'vR'), pipes=theta.legs[1]), - # B_R.conj(), - # axes=[['(p1.vR)'], ['(p*.vR*)']]) / renormalize - # - #deleted the /renormalize to make it work for non hermitian Hamitltonian - - # - B_L.ireplace_labels(['p0', 'vL*'], ['p', 'vR']) - B_R = B_R.split_legs(1) - - #self.psi.norm *= renormalize - # - #deleted self.psi.norm because we need to calculate the norm on our own - self.psi.set_B(i0, B_L, form='B') - self.psi.set_SL(i1, S) - self.psi.set_B(i1, B_R, form='B') - self._trunc_err_bonds[i] = self._trunc_err_bonds[i] + trunc_err - return trunc_err - - def update_bond_imag(self, i, U_bond): - i0, i1 = i - 1, i - expand = self._expansion_rate(i) - logger.debug(f'Update sites ({i0}, {i1}). CBE expand={expand}') - # Construct the theta matrix - theta = self.psi.get_theta(i0, n=2) - theta = npc.tensordot(U_bond, theta, axes=(['p0*', 'p1*'], ['p0', 'p1'])) - theta.itranspose(['vL', 'p0', 'p1', 'vR']) - theta = theta.combine_legs([('vL', 'p0'), ('p1', 'vR')], qconj=[+1, -1]) - - use_eig_based_svd = self.options.get('use_eig_based_svd', False) - - if use_eig_based_svd: - # see todo comment in _eig_based_svd - raise NotImplementedError('update_bond_imag does not (yet) support eig based SVD') - - min_block_increase = self.options.get('cbe_min_block_increase', 1) - Y0 = _qr_tebd_cbe_Y0(B_L=self.psi.get_B(i0, 'B'), B_R=self.psi.get_B(i1, 'B'), theta=theta, - expand=expand, min_block_increase=min_block_increase) - A_L, S, B_R, trunc_err, renormalize = _qr_based_decomposition( - theta=theta, Y0=Y0, use_eig_based_svd=use_eig_based_svd, - need_A_L=True, compute_err=self.options.get('compute_err', True), - trunc_params=self.trunc_params - ) - A_L = A_L.split_legs(0) - B_R = B_R.split_legs(1) - - self.psi.norm *= renormalize - self.psi.set_B(i0, A_L, form='A') - self.psi.set_SL(i1, S) - self.psi.set_B(i1, B_R, form='B') - self._trunc_err_bonds[i] = self._trunc_err_bonds[i] + trunc_err - - return trunc_err -def _qr_tebd_cbe_Y0(B_L: npc.Array, B_R: npc.Array, theta: npc.Array, expand: float, min_block_increase: int): - """Generate the initial guess Y0 for the left isometry in QR based TEBD - - Parameters - ---------- - B_L : Array with legs [vL, p, vR] - B_R : Array with legs [vL, p, vR] - theta : Array with legs [(vL.p0), (p1.vR)] - expand : float or None - - Returns - ------- - Y0 : Array with legs [vL, (p1.vR)] - """ - if expand is None or expand == 0: - return B_R.combine_legs(['p', 'vR']).ireplace_labels('(p.vR)', '(p1.vR)') - - assert min_block_increase >= 0 - - Y0 = theta.copy(deep=False) - Y0.legs[0] = Y0.legs[0].to_LegCharge() - Y0.ireplace_label('(vL.p0)', 'vL') - if any(B_L.qtotal != 0): - Y0.gauge_total_charge('vL', new_qtotal=B_R.qtotal) - vL_old = B_R.get_leg('vL') - if not vL_old.is_blocked(): - vL_old = vL_old.sort()[1] - vL_new = Y0.get_leg('vL') # is blocked, since created from pipe - - # vL_old is guaranteed to be a slice of vL_new by charge rule in B_L - piv = np.zeros(vL_new.ind_len, dtype=bool) # indices to keep in vL_new - increase_per_block = max(min_block_increase, int(vL_old.ind_len * expand // vL_new.block_number)) - sizes_old = vL_old.get_block_sizes() - sizes_new = vL_new.get_block_sizes() - # iterate over charge blocks in vL_new and vL_old at the same time - j_old = 0 - q_old = vL_old.charges[j_old, :] - qdata_order = np.argsort(Y0._qdata[:, 0]) - qdata_idx = 0 - for j_new, q_new in enumerate(vL_new.charges): - if all(q_new == q_old): # have charge block in both vL_new and vL_old - s_new = sizes_old[j_old] + increase_per_block - # move to next charge block in next loop iteration - j_old += 1 - if j_old < len(vL_old.charges): - q_old = vL_old.charges[j_old, :] - else: # charge block only in vL_new - s_new = increase_per_block - s_new = min(s_new, sizes_new[j_new]) # don't go beyond block - - if Y0._qdata[qdata_order[qdata_idx], 0] != j_new: - # block does not exist - # while we could set corresponding piv entries to True, it would not help, since - # the corresponding "entries" of Y0 are zero anyway - continue - - # block has axis [vL, (p1.vR)]. want to keep the s_new slices of the vL axis - # that have the largest norm - norms = np.linalg.norm(Y0._data[qdata_order[qdata_idx]], axis=1) - kept_slices = np.argsort(-norms)[:s_new] # negative sign so we sort large to small - start = vL_new.slices[j_new] - piv[start + kept_slices] = True - - qdata_idx += 1 - if qdata_idx >= Y0._qdata.shape[0]: - break - - Y0.iproject(piv, 'vL') - return Y0 -def _qr_based_decomposition(theta: npc.Array, Y0: npc.Array, use_eig_based_svd: bool, trunc_params, - need_A_L: bool, compute_err: bool): - """Perform the decomposition step of QR based TEBD - - Parameters - ---------- - theta : Array with legs [(vL.p0), (p1.vR)] - Y0 : Array with legs [vL, (p1.vR)] - ... - - Returns - ------- - A_L : array with legs [(vL.p), vR] or None - S : 1D numpy array - B_R : array with legs [vL, (p.vR)] - trunc_err : TruncationError - renormalize : float - """ - - if compute_err: - need_A_L = True - - # QR based updates - theta_i0 = npc.tensordot(theta, Y0.conj(), ['(p1.vR)', '(p1*.vR*)']).ireplace_label('vL*', 'vR') - A_L, _ = npc.qr(theta_i0, inner_labels=['vR', 'vL']) - # A_L: [(vL.p0), vR] - theta_i1 = npc.tensordot(A_L.conj(), theta, ['(vL*.p0*)', '(vL.p0)']).ireplace_label('vR*', 'vL') - theta_i1.itranspose(['(p1.vR)', 'vL']) - B_R, Xi = npc.qr(theta_i1, inner_labels=['vL', 'vR'], inner_qconj=-1) - B_R.itranspose(['vL', '(p1.vR)']) - Xi.itranspose(['vL', 'vR']) - - # SVD of bond matrix Xi - if use_eig_based_svd: - U, S, Vd, trunc_err, renormalize = _eig_based_svd( - Xi, inner_labels=['vR', 'vL'], need_U=need_A_L, trunc_params=trunc_params - ) - else: - U, S, Vd, _, renormalize = svd_theta(Xi, trunc_params) - B_R = npc.tensordot(Vd, B_R, ['vR', 'vL']) - if need_A_L: - A_L = npc.tensordot(A_L, U, ['vR', 'vL']) - else: - A_L = None - - if compute_err: - theta_approx = npc.tensordot(A_L.scale_axis(S, axis='vR'), B_R, ['vR', 'vL']) - eps = npc.norm(theta - theta_approx) ** 2 - trunc_err = TruncationError(eps, 1. - 2. * eps) - else: - trunc_err = TruncationError(np.nan, np.nan) - - B_R = B_R.ireplace_label('(p1.vR)', '(p.vR)') - if need_A_L: - A_L = A_L.ireplace_label('(vL.p0)', '(vL.p)') - - return A_L, S, B_R, trunc_err, renormalize -def _eig_based_svd(A, need_U: bool = True, need_Vd: bool = True, inner_labels=[None, None], - trunc_params=None): - """Computes the singular value decomposition of a matrix A via eigh - - Singular values and vectors are obtained by diagonalizing the "square" A.hc @ A and/or A @ A.hc, - i.e. with two eigh calls instead of an svd call. - - Truncation if performed if and only if trunc_params are given. - This performs better on GPU, but is not really useful on CPU. - If isometries U or Vd are not needed, their computation can be omitted for performance. - - Does not (yet) support computing both U and Vd - """ - - assert A.rank == 2 - - if need_U and need_Vd: - # TODO (JU) just doing separate eighs for U, S and for S, Vd is not sufficient - # the phases of U / Vd are arbitrary. - # Need to put in more work in that case... - raise NotImplementedError - - if need_U: - Vd = None - A_Ahc = npc.tensordot(A, A.conj(), [1, 1]) - L, U = npc.eigh(A_Ahc, sort='>') - S = np.sqrt(np.abs(L)) # abs to avoid `nan` due to accidentially negative values close to zero - U = U.ireplace_label('eig', inner_labels[0]) - elif need_Vd: - U = None - Ahc_A = npc.tensordot(A.conj(), A, [0, 0]) - L, V = npc.eigh(Ahc_A, sort='>') - S = np.sqrt(np.abs(L)) # abs to avoid `nan` due to accidentially negative values close to zero - Vd = V.iconj().itranspose().ireplace_label('eig*', inner_labels[1]) - else: - U = None - Vd = None - # use the smaller of the two square matrices -- they have the same eigenvalues - if A.shape[1] >= A.shape[0]: - A2 = npc.tensordot(A, A.conj(), [1, 0]) - else: - A2 = npc.tensordot(A.conj(), A, [1, 0]) - L = npc.eigvalsh(A2) - S = np.sqrt(np.abs(L)) # abs to avoid `nan` due to accidentially negative values close to zero - - if trunc_params is not None: - piv, renormalize, trunc_err = truncate(S, trunc_params) - S = S[piv] - S /= renormalize - if need_U: - U.iproject(piv, 1) - if need_Vd: - Vd.iproject(piv, 0) - else: - renormalize = np.linalg.norm(S) - S /= renormalize - trunc_err = TruncationError() - - return U, S, Vd, trunc_err, renormalize - - -class SVDBasedTEBDEngine(TimeEvolutionAlgorithm): - def __init__(self, psi, model, options, **kwargs): - TimeEvolutionAlgorithm.__init__(self, psi, model, options, **kwargs) - self.trunc_err = self.options.get('start_trunc_err', TruncationError()) - self._U = None - self._U_param = {} - self._trunc_err_bonds = [TruncationError() for i in range(psi.L + 1)] - self._update_index = None - - @property - def TEBD_params(self): - warnings.warn("renamed self.TEBD_params -> self.options", FutureWarning, stacklevel=2) - return self.options - - @property - def trunc_err_bonds(self): - """truncation error introduced on each non-trivial bond.""" - return self._trunc_err_bonds[self.psi.nontrivial_bonds] - - def run(self): - """Run TEBD real time evolution by `N_steps`*`dt`.""" - # initialize parameters - delta_t = self.options.get('dt', 0.1) - N_steps = self.options.get('N_steps', 10) - TrotterOrder = self.options.get('order', 2) - E_offset = self.options.get('E_offset', None) - - self.calc_U(TrotterOrder, delta_t, type_evo='real', E_offset=E_offset) - - Sold = np.mean(self.psi.entanglement_entropy()) - start_time = time.time() - - self.update(N_steps) - - S = self.psi.entanglement_entropy() - logger.info( - "--> time=%(t)3.3f, max(chi)=%(chi)d, max(S)=%(S).5f, " - "avg DeltaS=%(dS).4e, since last update: %(wall_time).1fs", { - 't': self.evolved_time.real, - 'chi': max(self.psi.chi), - 'S': max(S), - 'dS': np.mean(S) - Sold, - 'wall_time': time.time() - start_time, - }) - - def run_GS(self): - # initialize parameters - delta_tau_list = self.options.get( - 'delta_tau_list', - [0.1, 0.01, 0.001, 1.e-4, 1.e-5, 1.e-6, 1.e-7, 1.e-8, 1.e-9, 1.e-10, 1.e-11, 0.]) - max_error_E = self.options.get('max_error_E', 1.e-13) - N_steps = self.options.get('N_steps', 10) - TrotterOrder = self.options.get('order', 2) - - Eold = np.mean(self.model.bond_energies(self.psi)) - Sold = np.mean(self.psi.entanglement_entropy()) - start_time = time.time() - - for delta_tau in delta_tau_list: - logger.info("delta_tau=%e", delta_tau) - self.calc_U(TrotterOrder, delta_tau, type_evo='imag') - DeltaE = 2 * max_error_E - step = 0 - while (DeltaE > max_error_E): - if self.psi.finite and TrotterOrder == 2: - self.update_imag(N_steps) - else: - self.update(N_steps) - step += N_steps - E = np.mean(self.model.bond_energies(self.psi)) - DeltaE = abs(Eold - E) - Eold = E - S = self.psi.entanglement_entropy() - max_S = max(S) - S = np.mean(S) - DeltaS = S - Sold - Sold = S - logger.info( - "--> step=%(step)6d, beta=%(beta)3.3f, max(chi)=%(max_chi)d," - "DeltaE=%(dE).2e, E_bond=%(E).10f, Delta_S=%(dS).4e, " - "max(S)=%(max_S).10f, time simulated: %(wall_time).1fs", { - 'step': step, - 'beta': -self.evolved_time.imag, - 'max_chi': max(self.psi.chi), - 'dE': DeltaE, - 'E': E.real, - 'dS': DeltaS, - 'max_S': max_S, - 'wall_time': time.time() - start_time, - }) - # done - - @staticmethod - def suzuki_trotter_time_steps(order): - if order == 1: - return [1.] - elif order == 2: - return [0.5, 1.] - elif order == 4: - t1 = 1. / (4. - 4.**(1 / 3.)) - t3 = 1. - 4. * t1 - return [t1 / 2., t1, (t1 + t3) / 2., t3] - elif order == '4_opt': - # Eq (30a) of arXiv:1901.04974 - a1 = 0.095848502741203681182 - b1 = 0.42652466131587616168 - a2 = -0.078111158921637922695 - b2 = -0.12039526945509726545 - return [a1, b1, a2, b2, 0.5 - a1 - a2, 1. - 2 * (b1 + b2)] # a1 b1 a2 b2 a3 b3 - # else - raise ValueError("Unknown order %r for Suzuki Trotter decomposition" % order) - - @staticmethod - def suzuki_trotter_decomposition(order, N_steps): - even, odd = 0, 1 - if N_steps == 0: - return [] - if order == 1: - a = (0, odd) - b = (0, even) - return [a, b] * N_steps - elif order == 2: - a = (0, odd) # dt/2 - a2 = (1, odd) # dt - b = (1, even) # dt - # U = [a b a]*N - # = a b [a2 b]*(N-1) a - return [a, b] + [a2, b] * (N_steps - 1) + [a] - elif order == 4: - a = (0, odd) # t1/2 - a2 = (1, odd) # t1 - b = (1, even) # t1 - c = (2, odd) # (t1 + t3) / 2 == (1 - 3 * t1)/2 - d = (3, even) # t3 = 1 - 4 * t1 - # From Schollwoeck 2011 (:arxiv:`1008.3477`): - # U = U(t1) U(t2) U(t3) U(t2) U(t1) - # with U(dt) = U(dt/2, odd) U(dt, even) U(dt/2, odd) and t1 == t2 - # Using above definitions, we arrive at: - # U = [a b a2 b c d c b a2 b a] * N - # = [a b a2 b c d c b a2 b] + [a2 b a2 b c d c b a2 b a] * (N-1) + [a] - steps = [a, b, a2, b, c, d, c, b, a2, b] - steps = steps + [a2, b, a2, b, c, d, c, b, a2, b] * (N_steps - 1) - steps = steps + [a] - return steps - elif order == '4_opt': - # symmetric: a1 b1 a2 b2 a3 b3 a2 b2 a2 b1 a1 - steps = [(0, odd), (1, even), (2, odd), (3, even), (4, odd), (5, even), - (4, odd), (3, even), (2, odd), (1, even), (0, odd)] # yapf: disable - return steps * N_steps - # else - raise ValueError("Unknown order {0!r} for Suzuki Trotter decomposition".format(order)) - - def calc_U(self, order, delta_t, type_evo='real', E_offset=None): - U_param = dict(order=order, delta_t=delta_t, type_evo=type_evo, E_offset=E_offset) - if type_evo == 'real': - U_param['tau'] = delta_t - elif type_evo == 'imag': - U_param['tau'] = -1.j * delta_t - else: - raise ValueError("Invalid value for `type_evo`: " + repr(type_evo)) - if self._U_param == U_param: # same keys and values as cached - logger.debug("Skip recalculation of U with same parameters as before") - return # nothing to do: U is cached - self._U_param = U_param - logger.info("Calculate U for %s", U_param) - - L = self.psi.L - self._U = [] - for dt in self.suzuki_trotter_time_steps(order): - U_bond = [ - self._calc_U_bond(i_bond, dt * delta_t, type_evo, E_offset) for i_bond in range(L) - ] - self._U.append(U_bond) - # done - - def update(self, N_steps): - #print("update norm =",self.psi.norm) - #print("psi =",self.psi) - trunc_err = TruncationError() - order = self._U_param['order'] - for U_idx_dt, odd in self.suzuki_trotter_decomposition(order, N_steps): - trunc_err += self.update_step(U_idx_dt, odd) - self.evolved_time = self.evolved_time + N_steps * self._U_param['tau'] - self.trunc_err = self.trunc_err + trunc_err # not += : make a copy! - # (this is done to avoid problems of users storing self.trunc_err after each `update`) - return trunc_err - - def update_step(self, U_idx_dt, odd): - Us = self._U[U_idx_dt] - trunc_err = TruncationError() - for i_bond in np.arange(int(odd) % 2, self.psi.L, 2): - if Us[i_bond] is None: - continue # handles finite vs. infinite boundary conditions - self._update_index = (U_idx_dt, i_bond) - trunc_err += self.update_bond(i_bond, Us[i_bond]) - self._update_index = None - return trunc_err - - def update_bond(self, i, U_bond): - #print("beginning update bond =", self.psi.norm) - #print("position = ",i) - i0, i1 = i - 1, i - logger.debug("Update sites (%d, %d)", i0, i1) - # Construct the theta matrix - C = self.psi.get_theta(i0, n=2, formL=0.) # the two B without the S on the left - C = npc.tensordot(U_bond, C, axes=(['p0*', 'p1*'], ['p0', 'p1'])) # apply U - C.itranspose(['vL', 'p0', 'p1', 'vR']) - theta = C.scale_axis(self.psi.get_SL(i0), 'vL') - # now theta is the same as if we had done - # theta = self.psi.get_theta(i0, n=2) - # theta = npc.tensordot(U_bond, theta, axes=(['p0*', 'p1*'], ['p0', 'p1'])) # apply U - # but also have C which is the same except the missing "S" on the left - # so we don't have to apply inverses of S (see below) - - theta = theta.combine_legs([('vL', 'p0'), ('p1', 'vR')], qconj=[+1, -1]) - # Perform the SVD and truncate the wavefunction - U, S, V, trunc_err, renormalize = svd_theta(theta, - self.trunc_params, - [self.psi.get_B(i0, None).qtotal, None], - inner_labels=['vR', 'vL']) - - # Split tensor and update matrices - B_R = V.split_legs(1).ireplace_label('p1', 'p') - - - B_L = npc.tensordot(C.combine_legs(('p1', 'vR'), pipes=theta.legs[1]), - V.conj(), - axes=['(p1.vR)', '(p1*.vR*)']) - B_L.ireplace_labels(['vL*', 'p0'], ['vR', 'p']) - #print("renormalize =", renormalize,"") - #print("B_L = ",B_L) - #print("norm = ",B_L.norm()) - #print("singulaerwert Norm = ",LA.norm(S)) - #print("B_R = ",B_R) - #re-normalize to = 1 - #print("before renormalize =",self.psi.norm) - - - - #S = S * renormalize - #Der Vektor S der Singulärwerte wurde bereits mit dem Faktor "renormalize" normalisiert, muss dies - #Rückgängig gemacht werden? - - - #print("renormalizefactor =", renormalize) - #B_L /= renormalize - #Wenn ich die renormalisierung von B_L unterlasse, ändert sich dann die Norm wie folgt? - #self.psi.norm *= renormalize - #Vergleich zu - #update_bond_imag(self, i, U_bond): - #dort wird die norm mit dem Faktor multipliziert - - #print("norm = ",B_L.norm()) - self.psi.set_SR(i0, S) - self.psi.set_B(i0, B_L, form='B') - self.psi.set_B(i1, B_R, form='B') - - #print("norm =", self.psi.norm, type(self.psi)) - self._trunc_err_bonds[i] = self._trunc_err_bonds[i] + trunc_err - #print("end renormalize =", self.psi.norm) - return trunc_err - - def update_imag(self, N_steps): - trunc_err = TruncationError() - order = self._U_param['order'] - # allow only second order evolution - if order != 2 or not self.psi.finite: - # Would lead to loss of canonical form. What about DMRG? - raise NotImplementedError("Use DMRG instead...") - U_idx_dt = 0 # always with dt=0.5 - assert (self.suzuki_trotter_time_steps(order)[U_idx_dt] == 0.5) - assert (self.psi.finite) # finite or segment bc - Us = self._U[U_idx_dt] - for _ in range(N_steps): - # sweep right - for i_bond in range(self.psi.L): - if Us[i_bond] is None: - continue # handles finite vs. infinite boundary conditions - self._update_index = (U_idx_dt, i_bond) - trunc_err += self.update_bond_imag(i_bond, Us[i_bond]) - # sweep left - for i_bond in range(self.psi.L - 1, -1, -1): - if Us[i_bond] is None: - continue # handles finite vs. infinite boundary conditions - self._update_index = (U_idx_dt, i_bond) - trunc_err += self.update_bond_imag(i_bond, Us[i_bond]) - self._update_index = None - self.evolved_time = self.evolved_time + N_steps * self._U_param['tau'] - self.trunc_err = self.trunc_err + trunc_err # not += : make a copy! - # (this is done to avoid problems of users storing self.trunc_err after each `update`) - return trunc_err - - def update_bond_imag(self, i, U_bond): - i0, i1 = i - 1, i - logger.debug("Update sites (%d, %d)", i0, i1) - # Construct the theta matrix - theta = self.psi.get_theta(i0, n=2) # 'vL', 'vR', 'p0', 'p1' - theta = npc.tensordot(U_bond, theta, axes=(['p0*', 'p1*'], ['p0', 'p1'])) - theta = theta.combine_legs([('vL', 'p0'), ('vR', 'p1')], qconj=[+1, -1]) - # Perform the SVD and truncate the wavefunction - U, S, V, trunc_err, renormalize = svd_theta(theta, - self.trunc_params, - inner_labels=['vR', 'vL']) - - self.psi.norm *= renormalize - # Split legs and update matrices - B_R = V.split_legs(1).ireplace_label('p1', 'p') - A_L = U.split_legs(0).ireplace_label('p0', 'p') - self.psi.set_SR(i0, S) - self.psi.set_B(i0, A_L, form='A') - self.psi.set_B(i1, B_R, form='B') - self._trunc_err_bonds[i] = self._trunc_err_bonds[i] + trunc_err - return trunc_err - - def _calc_U_bond(self, i_bond, dt, type_evo, E_offset): - """Calculate exponential of a bond Hamitonian. - * ``U_bond = exp(-i dt (H_bond-E_offset_bond))`` for ``type_evo='real'``, or - * ``U_bond = exp(- dt H_bond)`` for ``type_evo='imag'``. - """ - h = self.model.H_bond[i_bond] - if h is None: - return None # don't calculate exp(i H t), if `H` is None - H2 = h.combine_legs([('p0', 'p1'), ('p0*', 'p1*')], qconj=[+1, -1]) - if type_evo == 'imag': - H2 = (-dt) * H2 - elif type_evo == 'real': - if E_offset is not None: - H2 = H2 - npc.diag(E_offset[i_bond], H2.legs[0]) - H2 = (-1.j * dt) * H2 - else: - raise ValueError("Expect either 'real' or 'imag'inary time, got " + repr(type_evo)) - U = npc.expm(H2) -#print("calc_U_bond =", U) - assert (tuple(U.get_leg_labels()) == ('(p0.p1)', '(p0*.p1*)')) - return U.split_legs() - - -class Engine(SVDBasedTEBDEngine): - """Deprecated old name of :class:`TEBDEngine`. - .. deprecated : v0.8.0 - Renamed the `Engine` to `TEBDEngine` to have unique algorithm class names. - """ - def __init__(self, psi, model, options): - msg = "Renamed `Engine` class to `TEBDEngine`." - warnings.warn(msg, category=FutureWarning, stacklevel=2) - SVDBasedTEBDEngine.__init__(self, psi, model, options) - - diff --git a/python/test-python.py b/python/test-python.py new file mode 100644 index 0000000..aae1867 --- /dev/null +++ b/python/test-python.py @@ -0,0 +1,58 @@ +import time +import numpy as np + +from mps import MPS +from operators import X, P1, Rx, CONTROL, CONTROL_L + +def run_mps_simulation(T=50, L=30, chi=1024, theta=0.14, cutoff=1e-12): + # build operators + X_op = X() + P1_op = P1() + Rx_op = Rx(theta) + CRX = CONTROL(Rx_op) + CLRX = CONTROL_L(Rx_op) + + # initialize MPS + psi = MPS(L, chi) + + # apply X on every site + for i in range(L): + psi.apply_local_op(i, X_op) + + # record mean‐P1 at each step + mean_expectations = [] + + for t in range(T): + # Trotter sweep + for i in range(0, L-1, 2): + psi.apply_2site_op(i, CRX, cutoff=cutoff) + for i in range(1, L-1, 2): + psi.apply_2site_op(i, CLRX, cutoff=cutoff) + for i in range(1, L-1, 2): + psi.apply_2site_op(i, CRX, cutoff=cutoff) + for i in range(0, L-1, 2): + psi.apply_2site_op(i, CLRX, cutoff=cutoff) + + # compute ⟨P1⟩ on all sites + exps = psi.expectation_value(P1_op, list(range(L))) # NumPy array + + total = float(np.sum(exps)) + mean = total / L + mean_expectations.append(mean) + + # bond‐dimension stats + chis = psi.chis() + print(f"t = {t:2d}, max(chi) = {max(chis):4d}, mean(chi) = {sum(chis)/L:8.2f}") + + # for (i, d) in enumerate(chis): + # print(f" Site {i}: chi = {d}: shape = {psi.sites[i].tensor.shape}") + + + # final printout in C++‐style list + print("[1, " + ", ".join(f"{m:.6f}" for m in mean_expectations) + "]") + +if __name__ == "__main__": + start = time.perf_counter() + run_mps_simulation() + dt = (time.perf_counter() - start) * 1000.0 + print(f"Execution time: {dt:.0f} ms") \ No newline at end of file diff --git a/python/mps/tests/test_MPS.py b/python/test_MPS.py similarity index 97% rename from python/mps/tests/test_MPS.py rename to python/test_MPS.py index 51dccd3..339d45a 100644 --- a/python/mps/tests/test_MPS.py +++ b/python/test_MPS.py @@ -5,8 +5,8 @@ from math import pi as M_PI -from mps.mps import MPS -from mps.mps.operators import X, H, CNOT, P1, P0, CONTROL, CONTROL_L, Rx +from mps import MPS +from operators import X, H, CNOT, P1, P0, CONTROL, CONTROL_L, Rx class MPSTest(unittest.TestCase): def test_application(self): diff --git a/src/main.cu b/src/main.cu deleted file mode 100644 index 670e5fb..0000000 --- a/src/main.cu +++ /dev/null @@ -1,134 +0,0 @@ -#include "Operators.hpp" -#include "MPSSimulator.cuh" - -#include - - -void pretty_print(const std::vector &probs) { - std::cout << "["; - for (auto i = 0; i < probs.size(); i++) { - std::cout << fmt::format("{:.8f}", probs[i]); - if (i < probs.size() - 1) { - std::cout << ", "; - } - } - std::cout << "]\n"; -} - -void operator_print(const operators::Operator &op, unsigned N_rows = 0) { - - std::cout << "Operator: [\n\t"; - for (auto i = 0; i < op.data.size(); i++) { - std::cout << op.data[i] << " "; - if (i % N_rows == N_rows - 1 && i < op.data.size() - 1) { - std::cout << "\n\t"; - } - } - std::cout << "\n]\n"; - -} - -int main() { - // enable debug logs - // spdlog::set_level(spdlog::level::debug); - - static_assert(sizeof(size_t) == sizeof(int64_t), "Please build this sample on a 64-bit architecture!"); - - static Scalar theta = 0.14; - static Scalar phi = 0.1222; - - // only used for constructing the operators on the host - const auto Rx = operators::Rx(theta); - auto Rx_d = Rx.device(); - - // Allocate memory for operators on the GPU - auto CRRx = operators::CONTROL(Rx).device(); - auto CLRx = operators::CONTROL_L(Rx).device(); - - auto K1 = operators::K1(phi).device(); - auto K2 = operators::K2(phi).device(); - - auto X = operators::X.device(); - auto P0 = operators::P0.device(); - auto P1 = operators::P1.device(); - - constexpr auto numQubits = 60u; - constexpr auto chi = 1224u; - mps::Simulator simulator(0, numQubits, chi); - - spdlog::info("Allocated {} bytes of scratch memory on GPU: [{}:{}]", simulator.scratchSize, - (void *) simulator.d_scratch, - (void *) (((char *) simulator.d_scratch) + simulator.scratchSize)); - - - { - // apply X gate to all qubits |1111...1> - for (auto i = 0; i < numQubits; i++) { - simulator.apply_operator(X, {i}); - } - } - constexpr auto random_values = std::array{ - 0.6394267984578837, 0.025010755222666936, 0.27502931836911926, 0.22321073814882275, 0.7364712141640124, 0.6766994874229113, 0.8921795677048454, 0.08693883262941615, 0.4219218196852704, 0.029797219438070344, 0.21863797480360336, 0.5053552881033624, 0.026535969683863625, 0.1988376506866485, 0.6498844377795232, 0.5449414806032167, 0.2204406220406967, 0.5892656838759087, 0.8094304566778266, 0.006498759678061017, 0.8058192518328079, 0.6981393949882269, 0.3402505165179919, 0.15547949981178155, 0.9572130722067812, 0.33659454511262676, 0.09274584338014791, 0.09671637683346401, 0.8474943663474598, 0.6037260313668911, 0.8071282732743802, 0.7297317866938179, 0.5362280914547007, 0.9731157639793706, 0.3785343772083535, 0.552040631273227, 0.8294046642529949, 0.6185197523642461, 0.8617069003107772, 0.577352145256762, 0.7045718362149235, 0.045824383655662215, 0.22789827565154686, 0.28938796360210717, 0.0797919769236275, 0.23279088636103018, 0.10100142940972912, 0.2779736031100921, 0.6356844442644002, 0.36483217897008424, 0.37018096711688264, 0.2095070307714877, 0.26697782204911336, 0.936654587712494, 0.6480353852465935, 0.6091310056669882, 0.171138648198097, 0.7291267979503492, 0.1634024937619284, 0.3794554417576478, 0.9895233506365952, 0.6399997598540929}; - - static auto N_time_steps = 150; - auto probs_index = 0; - for (auto t = 0; t < N_time_steps; t++) { - - { - std::cout << "t = " << t << std::endl; - - // Measure expectation value of K2 - simulator.mps_factorization(); - auto probs = simulator.expectation_value(K2); - std::cout << "Expectation values: "; - pretty_print(probs); - - for (auto i = 0; i < numQubits; i++) { - if (probs[i] > random_values[probs_index]) { - simulator.apply_operator(K1, {i}, false); - } else { - simulator.apply_operator(K2, {i}, false); - } - probs_index++; - } - } - - { - // apply a layer of CX gates to neighboring qubits - for (auto i = 0; i < numQubits - 2; i += 2) { - simulator.apply_operator(CRRx, {i, i + 1}); - } - - for (auto i = 1; i < numQubits - 2; i += 2) { - simulator.apply_operator(CLRx, {i, i + 1}); - }; - - for (auto i = 0; i < numQubits - 2; i += 2) { - simulator.apply_operator(CLRx, {i, i + 1}); - }; - - for (auto i = 1; i < numQubits - 2; i += 2) { - simulator.apply_operator(CRRx, {i, i + 1}); - }; - - } - { - simulator.mps_factorization(); - // Compute expectation values - auto expects = simulator.expectation_value(P1); - //std::cout << "Expectation values: "; - //pretty_print(expects); - auto mean = std::accumulate(expects.begin(), expects.end(), 0.0) / expects.size(); - std::cout << "Mean expectation value: " << mean << std::endl; - } - } - { - simulator.mps_factorization(); - // Compute expectation values - auto expects = simulator.expectation_value(P1); - std::cout << "Expectation values: "; - pretty_print(expects); - auto mean = std::accumulate(expects.begin(), expects.end(), 0.0) / expects.size(); - std::cout << "Mean expectation value: " << mean << std::endl; - } -} diff --git a/src/mps.cu b/src/mps.cu index 68e6ac1..42639a6 100644 --- a/src/mps.cu +++ b/src/mps.cu @@ -20,7 +20,7 @@ void print_expectation_values(mps::MPS& psi, std::vector& sites, tensor::De spdlog::info("Expectation values: [{}]", ss.str()); } -int run() { +void run() { constexpr int T = 30; constexpr int L = 30; constexpr int chi = 1024; @@ -38,36 +38,34 @@ int run() { auto sites = std::vector(L, 0); std::iota(sites.begin(), sites.end(), 0); - auto streams = std::vector(2); - /*for (auto& stream : streams) { - HANDLE_CUDA_ERROR(cudaStreamCreate(&stream)); - }*/ + auto stream = cudaStream_t{nullptr}; + HANDLE_CUDA_ERROR(cudaStreamCreate(&stream)); auto summed_expectations = thrust::device_vector(T); auto expectations = thrust::device_vector(L); for (auto i = 0; i < L; ++i) { - psi.apply_local_op(i, X); + psi.apply_local_op(i, X, stream); } for (auto t = 0; t < T; ++t) { for (auto i = 0; i < L-1; i += 2) { - psi.apply_2_site_op(i, CRX); + psi.apply_2_site_op(i, CRX, stream); } for (auto i = 1; i < L-1; i += 2) { - psi.apply_2_site_op(i, CLRX); + psi.apply_2_site_op(i, CLRX, stream); } for (auto i = 1; i < L-1; i += 2) { - psi.apply_2_site_op(i, CRX); + psi.apply_2_site_op(i, CRX, stream); } for (auto i = 0; i < L-1; i += 2) { - psi.apply_2_site_op(i, CLRX); + psi.apply_2_site_op(i, CLRX, stream); } - psi.expectation_values(P1, sites, expectations); - reduction::sum(thrust::raw_pointer_cast(expectations.data()), expectations.size(), thrust::raw_pointer_cast(&summed_expectations[t])); + psi.expectation_values(P1, sites, expectations, stream); + reduction::sum(thrust::raw_pointer_cast(expectations.data()), expectations.size(), thrust::raw_pointer_cast(&summed_expectations[t]), stream); std::cout << "t = " << t << ", max(chi) = " << psi.get_max_bond_dimension() << ", mean(chi) = " << psi.get_avg_bond_dimension() << std::endl;// ", mean(P1) = " << mean_expectations[t] << std::endl; @@ -85,12 +83,10 @@ int run() { } std::cout << "]" << std::endl; - for (auto& stream : streams) { - if (stream != nullptr) - HANDLE_CUDA_ERROR(cudaStreamDestroy(stream)); - } - return true; + if (stream != nullptr) + HANDLE_CUDA_ERROR(cudaStreamDestroy(stream)); + } int main() { diff --git a/tests/test_einsum.cu b/tests/test_einsum.cu deleted file mode 100644 index 63b91b1..0000000 --- a/tests/test_einsum.cu +++ /dev/null @@ -1,155 +0,0 @@ -/* - * test_einsum.cpp - * - * A simple test for the Einsum functionality using the equation "ij,jk->ik". - * - * This test creates two matrices: - * - A of shape (2, 3) - * - B of shape (3, 4) - * and computes the contraction C = einsum("ij,jk->ik", A, B) - * which should be equivalent to matrix multiplication (A * B). - * - * Expected C has shape (2, 4). - */ - -#include -#include -#include -#include "tensordot.h" // Contains the Einsum and tensordot implementations -#include "tensor.h" // Contains the DeviceTensor definition -#include "contraction.cuh"// Contains any additional contraction helper code - -// Macro for CUDA error checking (if not defined elsewhere) -#ifndef HANDLE_CUDA_ERROR -#define HANDLE_CUDA_ERROR(x) \ - { \ - cudaError_t err = x; \ - if(err != cudaSuccess){ \ - std::cerr << "CUDA Error: " << cudaGetErrorString(err) << std::endl; \ - exit(EXIT_FAILURE); \ - } \ - } -#endif - -int main() { - // Select CUDA device 0. - cudaSetDevice(0); - - // Create a cuTENSOR handle. - cutensorHandle_t handle; - CUTENSOR_CHECK(cutensorCreate(&handle)); - - // Define shapes for A (2x3) and B (3x4) - std::vector shapeA = {2, 3}; // 2 rows, 3 columns. - std::vector shapeB = {3, 4}; // 3 rows, 4 columns. - tensor::DeviceTensor A(shapeA); - tensor::DeviceTensor B(shapeB); - - // Prepare host data for A and B in column-major order. - // For A: - // Column 0: [1, 4] - // Column 1: [2, 5] - // Column 2: [3, 6] - std::vector hA = { - make_cuComplex(1.0f, 0.0f), // A(0,0) - make_cuComplex(4.0f, 0.0f), // A(1,0) - make_cuComplex(2.0f, 0.0f), // A(0,1) - make_cuComplex(5.0f, 0.0f), // A(1,1) - make_cuComplex(3.0f, 0.0f), // A(0,2) - make_cuComplex(6.0f, 0.0f) // A(1,2) - }; - - // For B: - // Column 0: [7, 10, 13] (B(0,0), B(1,0), B(2,0)) - // Column 1: [8, 11, 14] - // Column 2: [9, 12, 15] - // Column 3: [10, 13, 16] - std::vector hB = { - make_cuComplex(7.0f, 0.0f), // B(0,0) - make_cuComplex(10.0f, 0.0f), // B(1,0) - make_cuComplex(13.0f, 0.0f), // B(2,0) - make_cuComplex(8.0f, 0.0f), // B(0,1) - make_cuComplex(11.0f, 0.0f), // B(1,1) - make_cuComplex(14.0f, 0.0f), // B(2,1) - make_cuComplex(9.0f, 0.0f), // B(0,2) - make_cuComplex(12.0f, 0.0f), // B(1,2) - make_cuComplex(15.0f, 0.0f), // B(2,2) - make_cuComplex(10.0f, 0.0f), // B(0,3) - make_cuComplex(13.0f, 0.0f), // B(1,3) - make_cuComplex(16.0f, 0.0f) // B(2,3) - }; - - // Copy host data to device. - size_t totalA = 2 * 3; - size_t totalB = 3 * 4; - HANDLE_CUDA_ERROR(cudaMemcpy(A.ptr(), hA.data(), totalA * sizeof(cuComplex), cudaMemcpyHostToDevice)); - HANDLE_CUDA_ERROR(cudaMemcpy(B.ptr(), hB.data(), totalB * sizeof(cuComplex), cudaMemcpyHostToDevice)); - - // Prepare output tensor C; expected shape is (2,4). - std::vector shapeC = {2, 4}; - tensor::DeviceTensor C(shapeC); - - // Create a workspace for the Einsum call. - // (Size may need to be adjusted depending on the problem.) - contraction::Workspace workspace(1024 * 1024); - - // Use the Einsum helper to compute C = einsum("ij,jk->ik", A, B) - std::string subscripts = "ij,jk->ik"; - contraction::einsum(handle, subscripts, A, B, C, 0, workspace, false, false); - - // Copy result from device to host. - std::vector hC(2 * 4); - HANDLE_CUDA_ERROR(cudaMemcpy(hC.data(), C.ptr(), hC.size() * sizeof(cuComplex), cudaMemcpyDeviceToHost)); - - // Print the result. - std::cout << "Result tensor C (matrix multiplication via einsum \"ij,jk->ik\"):" << std::endl; - // Since our DeviceTensor uses column-major order, element (i,j) is at index i + j*2. - for (int i = 0; i < 2; i++) { - for (int j = 0; j < 4; j++) { - int idx = i + j * 2; - cuComplex val = hC[idx]; - std::cout << "(" << val.x << "," << val.y << ") "; - } - std::cout << std::endl; - } - - // Expected result computed on host: - // A = [ [1,2,3], - // [4,5,6] ] - // B = [ [7, 8, 9, 10], - // [10,11,12,13], - // [13,14,15,16] ] - // So, C = A*B = [ [ 1*7+2*10+3*13, 1*8+2*11+3*14, 1*9+2*12+3*15, 1*10+2*13+3*16 ], - // [ 4*7+5*10+6*13, 4*8+5*11+6*14, 4*9+5*12+6*15, 4*10+5*13+6*16 ] ] - // = [ [66, 72, 78, 84], - // [156,171,186,201] ] - std::vector expected = { - make_cuComplex(66.0f, 0.0f), - make_cuComplex(156.0f, 0.0f), - make_cuComplex(72.0f, 0.0f), - make_cuComplex(171.0f, 0.0f), - make_cuComplex(78.0f, 0.0f), - make_cuComplex(186.0f, 0.0f), - make_cuComplex(84.0f, 0.0f), - make_cuComplex(201.0f, 0.0f) - }; - - float tol = 1e-3f; - bool pass = true; - for (size_t i = 0; i < expected.size(); i++) { - if (std::abs(hC[i].x - expected[i].x) > tol || std::abs(hC[i].y - expected[i].y) > tol) { - pass = false; - std::cout << "Mismatch at index " << i << ": got (" << hC[i].x << ", " << hC[i].y - << "), expected (" << expected[i].x << ", " << expected[i].y << ")" << std::endl; - } - } - if (pass) - std::cout << "Einsum matrix multiplication test passed!" << std::endl; - else - std::cout << "Einsum matrix multiplication test failed." << std::endl; - - // Cleanup the cuTENSOR handle. - CUTENSOR_CHECK(cutensorDestroy(handle)); - - return 0; -} diff --git a/tests/test_permutation.cu b/tests/test_permutation.cu deleted file mode 100644 index 60ead1f..0000000 --- a/tests/test_permutation.cu +++ /dev/null @@ -1,138 +0,0 @@ -// test.cu -#include -#include -#include -#include -#include -#include -#include -#include "permutation.h" // your permutation helper header -#include "tensor.h" // your DeviceTensor definition -#include - -// Host-side permutation function for verification. -// Given a tensor stored in column-major order (host vector h_src), -// with shape 'shape', and a permutation vector 'permutation', -// returns a new host vector representing the permuted tensor. -thrust::host_vector host_permute( - const thrust::host_vector& h_src, - const std::vector& shape, - const std::vector& permutation) -{ - int n = shape.size(); - int total = 1; - for (auto s : shape) - total *= s; - thrust::host_vector h_dst(total); - // Compute source strides (column-major) - std::vector srcStrides(n, 1); - for (int i = 1; i < n; i++) { - srcStrides[i] = srcStrides[i-1] * shape[i-1]; - } - // Compute new shape: new_shape[i] = shape[ permutation[i] ] - std::vector newShape(n); - for (int i = 0; i < n; i++) { - newShape[i] = shape[ permutation[i] ]; - } - // Compute destination strides (column-major for new shape) - std::vector dstStrides(n, 1); - for (int i = 1; i < n; i++) { - dstStrides[i] = dstStrides[i-1] * newShape[i-1]; - } - // Iterate over all multi-indices in source. - std::vector index(n, 0); - for (int count = 0; count < total; count++) { - int srcIdx = 0; - for (int j = 0; j < n; j++) { - srcIdx += index[j] * srcStrides[j]; - } - // Compute the new multi-index: new_index[k] = index[ permutation[k] ] - std::vector newIndex(n); - for (int k = 0; k < n; k++) { - newIndex[k] = index[ permutation[k] ]; - } - int dstIdx = 0; - for (int k = 0; k < n; k++) { - dstIdx += newIndex[k] * dstStrides[k]; - } - h_dst[dstIdx] = h_src[srcIdx]; - // Increment multi-index in column-major order. - int pos = 0; - while (pos < n) { - index[pos]++; - if (index[pos] < shape[pos]) - break; - index[pos] = 0; - pos++; - } - } - return h_dst; -} - -// Test function: creates a tensor with given shape and permutation, -// runs the device permutation, and compares against expected result. -void run_test(const std::vector& shape, const std::vector& permutation) { - int nModes = shape.size(); - int total = 1; - for (auto s : shape) - total *= s; - std::cout << "Running test for tensor shape " << tensor::vector_to_string(shape) - << " with permutation " << tensor::vector_to_string(permutation) << std::endl; - - // Create host data: fill with sequential values (as cuComplex with zero imaginary part). - thrust::host_vector h_src(total); - for (int i = 0; i < total; i++) { - h_src[i] = make_cuComplex(static_cast(i), 0.0f); - } - - // Create device tensor 'src' from h_src. - tensor::DeviceTensor src(h_src, shape); - // Create destination device tensor 'dst' with same shape (will be reshaped by permutation). - tensor::DeviceTensor dst(shape); - - // Create cuTENSOR handle. - cutensorHandle_t handle; - CUTENSOR_CHECK(cutensorCreate(&handle)); - - // Call our permutation function. - permutation::permute(handle, src, dst, permutation); - - // Copy result back to host. - thrust::host_vector h_dst(dst.device_data); - - // Compute expected result using host_permute. - thrust::host_vector expected = host_permute(h_src, shape, permutation); - - // Compare and print results. - bool pass = true; - float tol = 1e-4f; - for (int i = 0; i < total; i++) { - float diff = std::abs(h_dst[i].x - expected[i].x) + std::abs(h_dst[i].y - expected[i].y); - if (diff > tol) { - pass = false; - std::cout << "Mismatch at index " << i << ": got (" << h_dst[i].x << "," << h_dst[i].y - << "), expected (" << expected[i].x << "," << expected[i].y << ")" << std::endl; - } - } - if (pass) - std::cout << "Test PASSED for shape " << tensor::vector_to_string(shape) - << " and permutation " << tensor::vector_to_string(permutation) << std::endl; - else - std::cout << "Test FAILED for shape " << tensor::vector_to_string(shape) - << " and permutation " << tensor::vector_to_string(permutation) << std::endl; - - CUTENSOR_CHECK(cutensorDestroy(handle)); -} - -int main() { - // Test 1: 2D tensor: shape [3, 4], permutation: swap axes -> [1, 0] (expected shape: [4, 3]) - run_test({3, 4}, {1, 0}); - - // Test 2: 3D tensor: shape [2, 3, 4], permutation: {2, 0, 1} (expected shape: [4, 2, 3]) - run_test({2, 3, 4}, {2, 0, 1}); - - // Test 3: 4D tensor: shape [2, 2, 2, 2], permutation: {2, 0, 1, 3} (expected shape: [2, 2, 2, 2]) - run_test({2, 2, 2, 2}, {2, 0, 1, 3}); - - return 0; -} diff --git a/tests/test_reduction.cu b/tests/test_reduction.cu deleted file mode 100644 index c5b8bb6..0000000 --- a/tests/test_reduction.cu +++ /dev/null @@ -1,69 +0,0 @@ -#include -#include -#include -#include -#include - -#include "reduction.h" - -int main() { - const int N = 4; - const float truncation_tol = 0.5f; - - float h_S[N] = {1.0f, 2.0f, 3.0f, 4.0f}; - float expected_norm = sqrtf(30.0f); - - float *d_S; - cudaMalloc(&d_S, N * sizeof(float)); - cudaMemcpy(d_S, h_S, N * sizeof(float), cudaMemcpyHostToDevice); - - int *d_count; - float *d_normsq; - cudaMalloc(&d_count, sizeof(int)); - cudaMalloc(&d_normsq, sizeof(float)); - - cudaStream_t stream; - cudaStreamCreate(&stream); - - int count = reduction::fusedCountNormScaleAndReturn(d_S, N, truncation_tol, d_count, d_normsq, stream); - - float h_S_normalized[N]; - cudaMemcpy(h_S_normalized, d_S, N * sizeof(float), cudaMemcpyDeviceToHost); - - int h_count; - float h_normsq; - cudaMemcpy(&h_count, d_count, sizeof(int), cudaMemcpyDeviceToHost); - cudaMemcpy(&h_normsq, d_normsq, sizeof(float), cudaMemcpyDeviceToHost); - - std::cout << "Count (elements processed): " << count << " (expected " << N << ")" << std::endl; - std::cout << "Block-reduced count from device: " << h_count << std::endl; - std::cout << "Accumulated norm-squared from device: " << h_normsq << std::endl; - std::cout << "Expected normalization factor: " << expected_norm << std::endl; - std::cout << "Normalized S:" << std::endl; - for (int i = 0; i < N; i++) { - std::cout << h_S_normalized[i] << " "; - } - std::cout << std::endl; - - bool success = true; - for (int i = 0; i < N; i++) { - float expected_value = h_S[i] / expected_norm; - if (fabs(h_S_normalized[i] - expected_value) > 1e-5) { - success = false; - std::cerr << "Mismatch at index " << i << ": got " - << h_S_normalized[i] << ", expected " << expected_value << std::endl; - } - } - if (success) - std::cout << "Test PASSED!" << std::endl; - else - std::cerr << "Test FAILED!" << std::endl; - - // Cleanup. - cudaFree(d_S); - cudaFree(d_count); - cudaFree(d_normsq); - cudaStreamDestroy(stream); - - return 0; -} diff --git a/tests/test_tensordot.cu b/tests/test_tensordot.cu deleted file mode 100644 index 54517e8..0000000 --- a/tests/test_tensordot.cu +++ /dev/null @@ -1,371 +0,0 @@ -#include -#include -#include -#include - -#include "tensordot.h" -#include "tensor.h" - -int test_tensordot() { - // Use CUDA device 0. - cudaSetDevice(0); - - // ============================= - // Test 1: Matrix Multiplication - // ============================= - std::cout << "Running matrix multiplication tensordot test..." << std::endl; - // A is a 2x3 matrix and B is a 3x2 matrix. - // (Remember: DeviceTensor uses column-major order.) - std::vector shapeA = {2, 3}; // 2 rows, 3 columns. - std::vector shapeB = {3, 2}; // 3 rows, 2 columns. - tensor::DeviceTensor A(shapeA); - tensor::DeviceTensor B(shapeB); - - // Prepare host data in column-major order. - // For A (2x3): - // Column 0: [1, 4] - // Column 1: [2, 5] - // Column 2: [3, 6] - std::vector hA = { - make_cuComplex(1.0f, 0.0f), // A(0,0) - make_cuComplex(4.0f, 0.0f), // A(1,0) - make_cuComplex(2.0f, 0.0f), // A(0,1) - make_cuComplex(5.0f, 0.0f), // A(1,1) - make_cuComplex(3.0f, 0.0f), // A(0,2) - make_cuComplex(6.0f, 0.0f) // A(1,2) - }; - - // For B (3x2): - // Column 0: [7, 9, 11] - // Column 1: [8, 10, 12] - std::vector hB = { - make_cuComplex(7.0f, 0.0f), // B(0,0) - make_cuComplex(9.0f, 0.0f), // B(1,0) - make_cuComplex(11.0f, 0.0f), // B(2,0) - make_cuComplex(8.0f, 0.0f), // B(0,1) - make_cuComplex(10.0f, 0.0f), // B(1,1) - make_cuComplex(12.0f, 0.0f) // B(2,1) - }; - - size_t totalA = 2 * 3; - size_t totalB = 3 * 2; - HANDLE_CUDA_ERROR(cudaMemcpy(A.ptr(), hA.data(), totalA * sizeof(cuComplex), cudaMemcpyHostToDevice)); - HANDLE_CUDA_ERROR(cudaMemcpy(B.ptr(), hB.data(), totalB * sizeof(cuComplex), cudaMemcpyHostToDevice)); - - // Prepare output tensor C for the result (should be 2x2). - std::vector shapeC = {2, 2}; - tensor::DeviceTensor C(shapeC); - - // For matrix multiplication, contract A over axis 1 (columns) with B over axis 0 (rows). - std::vector axesA = {1}; - std::vector axesB = {0}; - - cutensorHandle_t handle; - CUTENSOR_CHECK(cutensorCreate(&handle)); - auto workspace = contraction::Workspace(100); - - - contraction::tensordot(handle, workspace, A, B, C, axesA, axesB, false, false, 0); - - // Copy the result from device to host. - std::vector hC(2 * 2); - HANDLE_CUDA_ERROR(cudaMemcpy(hC.data(), C.ptr(), hC.size() * sizeof(cuComplex), cudaMemcpyDeviceToHost)); - - // Print the result (C is stored in column-major order). - std::cout << "Result tensor C (matrix multiplication):" << std::endl; - for (int i = 0; i < 2; i++) { - for (int j = 0; j < 2; j++) { - // In column-major, index = i + j * (number of rows). - cuComplex val = hC[i + j * 2]; - std::cout << "(" << val.x << ", " << val.y << ") "; - } - std::cout << std::endl; - } - - // Expected result: - // A = [ [1, 2, 3], - // [4, 5, 6] ] - // B = [ [7, 8], - // [9, 10], - // [11,12] ] - // A * B = [ [1*7+2*9+3*11, 1*8+2*10+3*12], - // [4*7+5*9+6*11, 4*8+5*10+6*12] ] - // = [ [58, 64], - // [139,154] ] - std::vector expected = { - make_cuComplex(58.0f, 0.0f), // C(0,0) - make_cuComplex(139.0f, 0.0f), // C(1,0) - make_cuComplex(64.0f, 0.0f), // C(0,1) - make_cuComplex(154.0f, 0.0f) // C(1,1) - }; - - float tol = 1e-3f; - bool pass = true; - for (size_t i = 0; i < expected.size(); i++) { - if (std::abs(hC[i].x - expected[i].x) > tol || std::abs(hC[i].y - expected[i].y) > tol) { - pass = false; - std::cout << "Mismatch at index " << i << ": got (" << hC[i].x << ", " << hC[i].y - << "), expected (" << expected[i].x << ", " << expected[i].y << ")" << std::endl; - } - } - if (pass) - std::cout << "tensordot matrix multiplication test passed!" << std::endl; - else - std::cout << "tensordot matrix multiplication test failed." << std::endl; - - - // ============================= - // Test 2: 3D Tensordot Contraction - // ============================= - std::cout << "\nRunning 3D tensordot test..." << std::endl; - // Create a 3D tensor A of shape (2, 3, 4) and a 2D tensor B of shape (4, 5). - // We will contract A’s last axis (axis 2, size 4) with B’s first axis (axis 0, size 4). - // The free dimensions of A are (2,3) and for B are (5), so the output tensor C should have shape (2, 3, 5). - std::vector shapeA3d = {2, 3, 4}; - std::vector shapeB2d = {4, 5}; - tensor::DeviceTensor A3d(shapeA3d); - tensor::DeviceTensor B2d(shapeB2d); - size_t totalA3d = 2 * 3 * 4; // 24 elements. - size_t totalB2d = 4 * 5; // 20 elements. - std::vector hA3d(totalA3d); - std::vector hB2d(totalB2d); - // Fill A3d with sequential numbers 1 ... 24. - for (size_t idx = 0; idx < totalA3d; ++idx) { - hA3d[idx] = make_cuComplex(static_cast(idx + 1), 0.0f); - } - // Fill B2d with sequential numbers 1 ... 20. - for (size_t idx = 0; idx < totalB2d; ++idx) { - hB2d[idx] = make_cuComplex(static_cast(idx + 1), 0.0f); - } - HANDLE_CUDA_ERROR(cudaMemcpy(A3d.ptr(), hA3d.data(), totalA3d * sizeof(cuComplex), cudaMemcpyHostToDevice)); - HANDLE_CUDA_ERROR(cudaMemcpy(B2d.ptr(), hB2d.data(), totalB2d * sizeof(cuComplex), cudaMemcpyHostToDevice)); - - // Prepare output tensor C3d with expected shape (2, 3, 5). - std::vector shapeC3d = {2, 3, 5}; - tensor::DeviceTensor C3d(shapeC3d); - - // Set contraction axes: contract axis 2 of A3d with axis 0 of B2d. - std::vector axesA3d = {2}; - std::vector axesB2d = {0}; - - contraction::tensordot(handle, workspace, A3d, B2d, C3d, axesA3d, axesB2d, false, false, 0); - - size_t totalC3d = 2 * 3 * 5; - std::vector hC3d(totalC3d); - HANDLE_CUDA_ERROR(cudaMemcpy(hC3d.data(), C3d.ptr(), totalC3d * sizeof(cuComplex), cudaMemcpyDeviceToHost)); - - // Compute expected result on host. - // For A3d with shape (2,3,4) in column-major order, the strides are: - // stride[0] = 1, stride[1] = 2, stride[2] = 2*3 = 6. - // So A3d(i,j,l) is stored at index: i + j*2 + l*6. - // For B2d with shape (4,5), in column-major order, strides: - // stride[0] = 1, stride[1] = 4. - // So B2d(l,k) is stored at index: l + k*4. - // For C3d with shape (2,3,5), strides are the same as A3d for the free dimensions: - // index = i + j*2 + k*6. - std::vector expected3d(totalC3d, make_cuComplex(0.0f, 0.0f)); - for (int i = 0; i < 2; i++) { - for (int j = 0; j < 3; j++) { - for (int k = 0; k < 5; k++) { - float sum = 0.0f; - for (int l = 0; l < 4; l++) { - int idxA = i + j * 2 + l * 6; // A3d(i,j,l) - int idxB = l + k * 4; // B2d(l,k) - sum += hA3d[idxA].x * hB2d[idxB].x; // real numbers only. - } - int idxC = i + j * 2 + k * 6; - expected3d[idxC] = make_cuComplex(sum, 0.0f); - } - } - } - - // Print the 3D tensordot result. - std::cout << "\nResult tensor C3d (shape 2x3x5):" << std::endl; - for (int k = 0; k < 5; k++) { - std::cout << "Slice k = " << k << ":" << std::endl; - for (int j = 0; j < 3; j++) { - for (int i = 0; i < 2; i++) { - int idx = i + j * 2 + k * 6; - cuComplex val = hC3d[idx]; - std::cout << "(" << val.x << ", " << val.y << ") "; - } - std::cout << std::endl; - } - std::cout << std::endl; - } - - // Compare expected vs. GPU result. - pass = true; - for (size_t idx = 0; idx < totalC3d; idx++) { - if (std::abs(hC3d[idx].x - expected3d[idx].x) > tol || - std::abs(hC3d[idx].y - expected3d[idx].y) > tol) { - pass = false; - std::cout << "Mismatch at index " << idx << ": got (" - << hC3d[idx].x << ", " << hC3d[idx].y << "), expected (" - << expected3d[idx].x << ", " << expected3d[idx].y << ")" << std::endl; - } - } - if (pass) - std::cout << "3D tensordot test passed!" << std::endl; - else - std::cout << "3D tensordot test failed." << std::endl; - - CUTENSOR_CHECK(cutensorDestroy(handle)); - - return 0; -} - -using Complex = cuDoubleComplex; -using Scalar = double; - -int test_scale() { - std::cout << std::fixed << std::setprecision(10); - cudaStream_t stream; - cudaStreamCreate(&stream); - static const Scalar tol = 1e-16; - static const Scalar reciprocal_tol = 1e-5; - - int I = 20, K = 30, L = 20; - int total = I * K * L; - - std::vector h_tensor(total); - for (int l = 0; l < L; l++) { - for (int k = 0; k < K; k++) { - for (int i = 0; i < I; i++) { - int idx = i + I * (k + K * l); - h_tensor[idx] = Complex(static_cast(idx + 1), static_cast(idx + 10)); - } - } - } - - std::vector h_S_left(I); - std::iota(h_S_left.begin(), h_S_left.end(), 1.0f); - - tensor::DeviceTensor d_in({I, K, L}); - tensor::DeviceTensor d_out({I, K, L}); - tensor::DeviceTensor d_S_left({I}); - - d_in.device_data = h_tensor; - d_S_left.device_data = h_S_left; - - - contraction::scale_B_by_S(d_S_left, d_in, d_out, stream, false); - thrust::host_vector h_out = d_out.device_data; - - - std::cout << "Test 1: Left scaling (non-reciprocal):" << std::endl; - bool pass = true; - for (int l = 0; l < L; l++) { - for (int k = 0; k < K; k++) { - for (int i = 0; i < I; i++) { - int idx = i + I * (k + K * l); - Scalar factor = h_S_left[i]; // left scaling: factor depends on i. - Complex expected = Complex(h_tensor[idx].x * factor, h_tensor[idx].y * factor); - Complex actual = h_out[idx]; - if (std::abs(expected.x - actual.x) > tol|| std::abs(expected.y - actual.y) > tol) { - pass = false; - std::cout << "Mismatch at idx " << idx << ": expected (" - << expected.x << "," << expected.y << ") but got (" - << actual.x << "," << actual.y << ")" << std::endl; - } - } - } - } - std::cout << (pass ? "Test 1 PASSED" : "Test 1 FAILED") << std::endl; - - // ------------------------------- - // Test 2: Left scaling with reciprocal - // Expected: each element becomes inData / S[i]. - // ------------------------------- - contraction::scale_B_by_S(d_S_left, d_in, d_out, stream, true); - h_out = d_out.device_data; - - std::cout << "Test 2: Left scaling (reciprocal):" << std::endl; - pass = true; - for (int l = 0; l < L; l++) { - for (int k = 0; k < K; k++) { - for (int i = 0; i < I; i++) { - int idx = i + I * (k + K * l); - Scalar factor = h_S_left[i]; - Complex expected = Complex(h_tensor[idx].x / factor, h_tensor[idx].y / factor); - Complex actual = h_out[idx]; - if (std::abs(expected.x - actual.x) > reciprocal_tol || std::abs(expected.y - actual.y) > reciprocal_tol) { - pass = false; - std::cout << "Mismatch at idx " << idx << ": expected (" - << expected.x << "," << expected.y << ") but got (" - << actual.x << "," << actual.y << ")" << std::endl; - } - } - } - } - std::cout << (pass ? "Test 2 PASSED" : "Test 2 FAILED") << std::endl; - - // ------------------------------- - // Test 3: Right scaling (non-reciprocal) - // S vector length must equal L. - // Expected: each element is multiplied by S[l] where l = (idx / I) / K. - // ------------------------------- - std::vector h_S_right(L); - std::iota(h_S_right.begin(), h_S_right.end(), 10); - - tensor::DeviceTensor d_S_right({L}); - d_S_right.device_data = h_S_right; - - contraction::scale_B_by_S(d_in, d_S_right, d_out, stream, false); - h_out = d_out.device_data; - - std::cout << "Test 3: Right scaling (non-reciprocal):" << std::endl; - pass = true; - for (int l = 0; l < L; l++) { - for (int k = 0; k < K; k++) { - for (int i = 0; i < I; i++) { - int idx = i + I * (k + K * l); - Scalar factor = h_S_right[l]; // right scaling: factor depends on l. - Complex expected = Complex(h_tensor[idx].x * factor, h_tensor[idx].y * factor); - Complex actual = h_out[idx]; - if (std::abs(expected.x - actual.x) > tol || std::abs(expected.y - actual.y) > tol) { - pass = false; - std::cout << "Mismatch at idx " << idx << ": expected (" - << expected.x << "," << expected.y << ") but got (" - << actual.x << "," << actual.y << ")" << std::endl; - } - } - } - } - std::cout << (pass ? "Test 3 PASSED" : "Test 3 FAILED") << std::endl; - - // ------------------------------- - // Test 4: Right scaling with reciprocal - // Expected: each element becomes inData / S[l]. - // ------------------------------- - contraction::scale_B_by_S(d_in, d_S_right, d_out, stream, true); - h_out = d_out.device_data; - - std::cout << "Test 4: Right scaling (reciprocal):" << std::endl; - pass = true; - for (int l = 0; l < L; l++) { - for (int k = 0; k < K; k++) { - for (int i = 0; i < I; i++) { - int idx = i + I * (k + K * l); - Scalar factor = h_S_right[l]; - Complex expected = Complex(h_tensor[idx].x / factor, h_tensor[idx].y / factor); - Complex actual = h_out[idx]; - if (std::abs(expected.x - actual.x) > reciprocal_tol || std::abs(expected.y - actual.y) > reciprocal_tol) { - pass = false; - std::cout << "Mismatch at idx " << idx << ": expected (" - << expected.x << "," << expected.y << ") but got (" - << actual.x << "," << actual.y << ")" << std::endl; - } - } - } - } - std::cout << (pass ? "Test 4 PASSED" : "Test 4 FAILED") << std::endl; - - cudaStreamDestroy(stream); - return 0; -} - - -int main() { - test_scale(); -} \ No newline at end of file