5555import java .security .KeyStoreException ;
5656import java .security .NoSuchAlgorithmException ;
5757import java .security .Provider ;
58+ import java .security .ProviderException ;
5859import java .security .SecureRandom ;
5960import java .security .Security ;
6061import java .security .UnrecoverableKeyException ;
6162import java .security .cert .CertificateException ;
62- import java .util .ArrayDeque ;
6363import java .util .Arrays ;
6464import java .util .Date ;
65- import java .util .Deque ;
6665import java .util .HashMap ;
6766import java .util .List ;
6867import java .util .Map ;
@@ -427,6 +426,7 @@ PKCS11Session acquireSession(long timeoutMs) throws KMSException {
427426 return session ;
428427 }
429428 // Stale idle session: discard it and free its permit so a new one can be created.
429+ logger .debug ("Discarding stale idle PKCS#11 session for profile {}" , profileId );
430430 session .close ();
431431 sessionPermits .release ();
432432 }
@@ -517,15 +517,13 @@ void invalidate() {
517517 private static class PKCS11Session {
518518 private static final int IV_LENGTH = 16 ; // 128 bits for CBC mode
519519
520- // Counter to bypass JDK SunPKCS11 caching when the HSM restarts
520+ // Counter to bypass JDK SunPKCS11 caching when the HSM restarts. The JDK's
521+ // PKCS11.getInstance caches the native module in a static map keyed by the library
522+ // path string and never removes entries, so a module attached to a dead HSM can
523+ // never be re-initialized under its original path. Each bump adds an extra slash
524+ // to the path (a distinct map key for the same file), forcing a fresh C_Initialize.
521525 private static final AtomicInteger hsmRestartCount = new AtomicInteger (0 );
522526
523- // Holds strong references to recently-closed SunPKCS11 providers until a new
524- // session has successfully completed C_Initialize + C_OpenSession. This prevents
525- // GC from running the old provider's finalizer (which calls C_Finalize on the
526- // shared native PKCS11 module) while a new session is still initialising.
527- private static final Deque <Provider > pendingFinalization = new ArrayDeque <>();
528-
529527 private KeyStore keyStore ;
530528 private Provider provider ;
531529 private String providerName ;
@@ -568,6 +566,7 @@ private static class PKCS11Session {
568566 * </ul>
569567 */
570568 private void connect (Map <String , String > config ) throws KMSException {
569+ boolean connected = false ;
571570 try {
572571 // Unique suffix ensures each session gets its own provider name in java.security.Security,
573572 // allowing Security.removeProvider() in close() to target exactly this session's provider.
@@ -606,20 +605,20 @@ private void connect(Map<String, String> config) throws KMSException {
606605 throw KMSException .invalidParameter ("pin is required" );
607606 }
608607 char [] pinChars = pin .toCharArray ();
609- keyStore .load (null , pinChars );
610- Arrays .fill (pinChars , '\0' );
608+ try {
609+ keyStore .load (null , pinChars );
610+ } finally {
611+ // Wipe the PIN copy on every path, including load() failures
612+ // (wrong PIN, CRYPTOKI_NOT_INITIALIZED after an HSM restart, etc.).
613+ Arrays .fill (pinChars , '\0' );
614+ }
611615
612616 // The temp file is only needed during configure()/load(); delete it immediately
613617 // rather than holding it until the session is eventually closed.
614618 Files .deleteIfExists (tempConfigFile );
615619 tempConfigFile = null ;
616620
617- // New session is fully initialised (C_Initialize + C_OpenSession succeeded).
618- // Release the deferred old providers so they can now be GC'd and finalised
619- // without racing against this session's initialisation.
620- synchronized (pendingFinalization ) {
621- pendingFinalization .clear ();
622- }
621+ connected = true ;
623622
624623 logger .debug ("Successfully connected to PKCS#11 HSM at {}" , config .get ("library" ));
625624 } catch (KeyStoreException | CertificateException | NoSuchAlgorithmException e ) {
@@ -636,6 +635,12 @@ private void connect(Map<String, String> config) throws KMSException {
636635 }
637636 } catch (Exception e ) {
638637 handlePKCS11Exception (e , "Unexpected error during PKCS#11 connection" );
638+ } finally {
639+ if (!connected ) {
640+ // connect that failed after Security.addProvider() would otherwise leave
641+ // the half-initialized provider registered in java.security.Security.
642+ close ();
643+ }
639644 }
640645 }
641646
@@ -663,7 +668,6 @@ private String buildSunPKCS11Config(Map<String, String> config, String nameSuffi
663668 libraryPath = "./" + extraSlashes + libraryPath ;
664669 }
665670 }
666-
667671 StringBuilder configBuilder = new StringBuilder ();
668672 // Include the unique suffix so that each session is registered under a distinct
669673 // provider name (SunPKCS11-CloudStackHSM-{suffix}), preventing name collisions
@@ -720,30 +724,36 @@ private String buildSunPKCS11Config(Map<String, String> config, String nameSuffi
720724 */
721725 private void handlePKCS11Exception (Exception e , String context ) throws KMSException {
722726 String errorMsg = e .getMessage ();
723- String causeErrorMessage = e .getCause () != null ? e .getCause ().getMessage () : "" ;
724727 if (errorMsg == null ) {
725728 errorMsg = e .getClass ().getSimpleName ();
726729 }
727730 logger .warn ("PKCS#11 error: {} - {}" , errorMsg , context , e );
728731
729- if (causeErrorMessage .contains ("CRYPTOKI_NOT_INITIALIZED" ) || errorMsg .contains ("CRYPTOKI_NOT_INITIALIZED" )) {
732+ // The PKCS#11 error code (CKR_*) may be on the exception itself or its cause, so
733+ // match against both. KeyStore.getKey/load wrap the real code in a ProviderException.
734+ String causeMsg = e .getCause () != null ? e .getCause ().getMessage () : null ;
735+ String combinedMsg = errorMsg + " " + (causeMsg != null ? causeMsg : "" );
736+
737+ if (combinedMsg .contains ("CRYPTOKI_NOT_INITIALIZED" )) {
730738 hsmRestartCount .incrementAndGet ();
731739 throw new KMSException (KMSException .ErrorType .CONNECTION_FAILED ,
732740 context + ": HSM requires re-initialization (CRYPTOKI_NOT_INITIALIZED)" , e );
733- } else if (causeErrorMessage . contains ( "PIN_INCORRECT" ) || errorMsg .contains ("PIN_INCORRECT" )) {
741+ } else if (combinedMsg .contains ("PIN_INCORRECT" )) {
734742 throw new KMSException (KMSException .ErrorType .AUTHENTICATION_FAILED ,
735743 context + ": Incorrect PIN" , e );
736- } else if (causeErrorMessage . contains ( "SLOT_ID_INVALID" ) || errorMsg .contains ("SLOT_ID_INVALID" )) {
744+ } else if (combinedMsg .contains ("SLOT_ID_INVALID" )) {
737745 throw KMSException .invalidParameter (context + ": Invalid slot ID" );
738- } else if (causeErrorMessage . contains ( "KEY_NOT_FOUND" ) || errorMsg .contains ("KEY_NOT_FOUND" )) {
746+ } else if (combinedMsg .contains ("KEY_NOT_FOUND" )) {
739747 throw KMSException .kekNotFound (context + ": Key not found" );
740- } else if (causeErrorMessage .contains ("DEVICE_ERROR" ) || errorMsg .contains ("DEVICE_ERROR" )) {
748+ } else if (combinedMsg .contains ("DEVICE_ERROR" )) {
749+ hsmRestartCount .incrementAndGet ();
741750 throw new KMSException (KMSException .ErrorType .CONNECTION_FAILED ,
742751 context + ": HSM device error" , e );
743- } else if (causeErrorMessage .contains ("SESSION_HANDLE_INVALID" ) || errorMsg .contains ("SESSION_HANDLE_INVALID" )) {
752+ } else if (combinedMsg .contains ("SESSION_HANDLE_INVALID" )) {
753+ hsmRestartCount .incrementAndGet ();
744754 throw new KMSException (KMSException .ErrorType .CONNECTION_FAILED ,
745755 context + ": Invalid session handle" , e );
746- } else if (causeErrorMessage . contains ( "KEY_ALREADY_EXISTS" ) || errorMsg .contains ("KEY_ALREADY_EXISTS" )) {
756+ } else if (combinedMsg .contains ("KEY_ALREADY_EXISTS" )) {
747757 throw KMSException .keyAlreadyExists (context );
748758 } else if (e instanceof KeyStoreException ) {
749759 throw new KMSException (KMSException .ErrorType .WRAP_UNWRAP_FAILED ,
@@ -800,12 +810,6 @@ void close() {
800810 if (provider != null && providerName != null ) {
801811 try {
802812 Security .removeProvider (providerName );
803- // Keep a strong reference so GC cannot finalize this provider
804- // (and call C_Finalize on the shared native module) until the
805- // next session has fully initialised. Drained in connect().
806- synchronized (pendingFinalization ) {
807- pendingFinalization .addLast (provider );
808- }
809813 } catch (Exception e ) {
810814 logger .debug ("Failed to remove provider {}: {}" , providerName , e .getMessage ());
811815 }
@@ -971,8 +975,6 @@ byte[] wrapKey(byte[] plainDek, String kekLabel) throws KMSException {
971975 handlePKCS11Exception (e , "Invalid IV for CBC mode" );
972976 } catch (Exception e ) {
973977 handlePKCS11Exception (e , "Failed to wrap key with HSM" );
974- } finally {
975- kek = null ;
976978 }
977979 return null ;
978980 }
@@ -1000,6 +1002,11 @@ private SecretKey getKekFromKeyStore(String kekLabel) throws KMSException {
10001002 handlePKCS11Exception (e , "Algorithm not supported" );
10011003 } catch (KeyStoreException e ) {
10021004 handlePKCS11Exception (e , "Failed to retrieve KEK from HSM" );
1005+ } catch (ProviderException e ) {
1006+ // KeyStore.getKey() wraps PKCS#11 errors (e.g. CKR_CRYPTOKI_NOT_INITIALIZED after
1007+ // an HSM restart) in ProviderException, an unchecked RuntimeException. Route it
1008+ // through handlePKCS11Exception so hsmRestartCount is bumped and the caller retries.
1009+ handlePKCS11Exception (e , "Failed to retrieve KEK from HSM" );
10031010 }
10041011 return null ;
10051012 }
@@ -1061,8 +1068,6 @@ byte[] unwrapKey(byte[] wrappedBlob, String kekLabel) throws KMSException {
10611068 handlePKCS11Exception (e , "Invalid IV for CBC mode" );
10621069 } catch (Exception e ) {
10631070 handlePKCS11Exception (e , "Failed to unwrap key with HSM" );
1064- } finally {
1065- kek = null ;
10661071 }
10671072 return null ;
10681073 }
0 commit comments