From c464606b11ed8b31e6533de72add4ed0ebe7c8a0 Mon Sep 17 00:00:00 2001
From: "Martin C. Frith" <mcfrith@gmail.com>
Date: Mon, 13 Oct 2025 01:33:33 +0000
Subject: [PATCH 1/3] Adds esl-mixdchlet DNA strand symmetry option

---
 esl_mixdchlet.c          | 73 ++++++++++++++++++++++++++++++++++------
 esl_mixdchlet.h          |  2 ++
 esl_mixdchlet.tex        | 47 +++++++++++++++++++++++++-
 miniapps/Makefile.in     |  5 +++
 miniapps/esl-mixdchlet.c |  5 ++-
 5 files changed, 119 insertions(+), 13 deletions(-)

diff --git a/esl_mixdchlet.c b/esl_mixdchlet.c
index d7fb1117..638f6dee 100644
--- a/esl_mixdchlet.c
+++ b/esl_mixdchlet.c
@@ -45,11 +45,12 @@
  * Throws:    NULL on allocation failure.
  */
 ESL_MIXDCHLET *
-esl_mixdchlet_Create(int Q, int K)
+esl_mixdchlet_CreateForFitting(int Q, int K, int isDnaStrandSymmetric)
 {
   ESL_MIXDCHLET *dchl = NULL;
   int            status;
 
+  if (isDnaStrandSymmetric) Q /= 2;  // Q components => Q/2 pairs of components
   ESL_DASSERT1( (Q > 0) );
   ESL_DASSERT1( (K > 0) );
 
@@ -64,14 +65,18 @@ esl_mixdchlet_Create(int Q, int K)
 
   dchl->Q = Q;
   dchl->K = K;
+  dchl->isDnaStrandSymmetric = isDnaStrandSymmetric;
   return dchl;
 
  ERROR:
   esl_mixdchlet_Destroy(dchl);
   return NULL;
 }
-
-
+ESL_MIXDCHLET *
+esl_mixdchlet_Create(int Q, int K)
+{
+  return esl_mixdchlet_CreateForFitting(Q, K, 0);
+}
 
 /* Function:  esl_mixdchlet_Destroy()
  * Synopsis:  Free a mixture Dirichlet.
@@ -94,6 +99,28 @@ esl_mixdchlet_Destroy(ESL_MIXDCHLET *dchl)
  * 2. Likelihoods, posteriors, inference
  *****************************************************************/
 
+// Return the log probability of a vector c of counts of A/C/G/T, given a
+// DNA-strand-symmetric pair of Dirichlet distributions: log P(c | alpha)
+static double
+mixdchlet_logprob_c_dna(double *c, double *alpha, int K)  // presumably, K=4
+{
+  double lgamma_sums[2] = {0, 0};
+  double sum1, sum2, sum3, logp;
+  sum1 = sum2 = sum3 = logp = 0;
+
+  for (int a = 0; a < K; a++)  // loop over 4 DNA bases
+    {
+      sum1 += c[a] + alpha[a];
+      sum2 += alpha[a];
+      sum3 += c[a];
+      lgamma_sums[0] += lgamma(alpha[a] + c[a]);
+      lgamma_sums[1] += lgamma(alpha[a] + c[K - 1 - a]);  // complement of a
+      logp -= lgamma(c[a] + 1) + lgamma(alpha[a]);
+    }
+
+  return logp + esl_vec_DLogSum(lgamma_sums, 2) - eslCONST_LOG2
+    + lgamma(sum2) + lgamma(sum3 + 1) - lgamma(sum1);
+}
 
 /* mixchlet_postq()
  * Calculate P(q | c), the posterior probability of component q.
@@ -103,7 +130,9 @@ mixdchlet_postq(ESL_MIXDCHLET *dchl, double *c)
 {
   int k;
   for (k = 0; k < dchl->Q; k++)
-    if (dchl->q[k] > 0.) dchl->postq[k] = log(dchl->q[k]) + esl_dirichlet_logpdf_c(c, dchl->alpha[k], dchl->K);
+    if (dchl->q[k] > 0.)
+      dchl->postq[k] = log(dchl->q[k]) +
+	(dchl->isDnaStrandSymmetric ? mixdchlet_logprob_c_dna : esl_dirichlet_logpdf_c)(c, dchl->alpha[k], dchl->K);
     else                 dchl->postq[k] = -eslINFINITY;
   esl_vec_DLogNorm(dchl->postq, dchl->Q); 
 }
@@ -128,7 +157,9 @@ esl_mixdchlet_logp_c(ESL_MIXDCHLET *dchl, double *c)
 {
   int k; 
   for (k = 0; k < dchl->Q; k++) 
-    if (dchl->q[k] > 0.) dchl->postq[k] = log(dchl->q[k]) + esl_dirichlet_logpdf_c(c, dchl->alpha[k], dchl->K);
+    if (dchl->q[k] > 0.)
+      dchl->postq[k] = log(dchl->q[k]) +
+	(dchl->isDnaStrandSymmetric ? mixdchlet_logprob_c_dna : esl_dirichlet_logpdf_c)(c, dchl->alpha[k], dchl->K);
     else                 dchl->postq[k] = -eslINFINITY;
   return esl_vec_DLogSum(dchl->postq, dchl->Q);
 }
@@ -278,6 +309,7 @@ mixdchlet_gradient(double *p, int np, void *dptr, double *dp)
   double  sum_alpha;     //  |alpha_k| 
   double  sum_c;         //  |c_i|
   double  psi1;          //  \Psi(c_ia + alpha_ka)        
+  double  psi1b;         //  \Psi(c_ia' + alpha_ka)
   double  psi2;          //  \Psi( |c_i| + |alpha_k| ) 
   double  psi3;          //  \Psi( |alpha_k| )        
   double  psi4;          //  \Psi( alpha_ka )
@@ -297,12 +329,27 @@ mixdchlet_gradient(double *p, int np, void *dptr, double *dp)
 
       for (k = 0; k < dchl->Q; k++)
 	{
+	  double strand_weights[2] = {0, 0};
+	  for (a = 0; a < dchl->K; a++)
+	    {
+	      int b = dchl->K - 1 - a;  // complement of DNA base a
+	      strand_weights[0] += lgamma(dchl->alpha[k][a] + data->c[i][a]);
+	      strand_weights[1] += lgamma(dchl->alpha[k][a] + data->c[i][b]);
+	    }
+	  esl_vec_DLogNorm(strand_weights, 2);
+
 	  sum_alpha = esl_vec_DSum(dchl->alpha[k], dchl->K);
 	  esl_stats_Psi( sum_alpha + sum_c, &psi2);
 	  esl_stats_Psi( sum_alpha,         &psi3);
 	  for (a = 0; a < dchl->K; a++)
 	    {
 	      esl_stats_Psi( dchl->alpha[k][a] + data->c[i][a], &psi1);
+	      if (dchl->isDnaStrandSymmetric)
+		{
+		  int b = dchl->K - 1 - a;  // complement of DNA base a
+		  esl_stats_Psi( dchl->alpha[k][a] + data->c[i][b], &psi1b);
+		  psi1 = strand_weights[0] * psi1 + strand_weights[1] * psi1b;
+		}
 	      esl_stats_Psi( dchl->alpha[k][a],                 &psi4);
 	      dp[j++] -= dchl->alpha[k][a] * dchl->postq[k] * (psi1 - psi2 + psi3 - psi4);
 	    }
@@ -518,16 +565,20 @@ esl_mixdchlet_Read(ESL_FILEPARSER *efp,  ESL_MIXDCHLET **ret_dchl)
 int
 esl_mixdchlet_Write(FILE *fp, const ESL_MIXDCHLET *dchl)
 {
-  int k,a;
+  int k,s,a;
   int status;
+  int n = 1 + dchl->isDnaStrandSymmetric;
 
-  if ((status = esl_fprintf(fp, "%d %d\n", dchl->K, dchl->Q))      != eslOK) return status;
+  if ((status = esl_fprintf(fp, "%d %d\n", dchl->K, dchl->Q * n))      != eslOK) return status;
   for (k = 0; k < dchl->Q; k++)
     {
-      if ((status = esl_fprintf(fp, "%.4f ", dchl->q[k]))          != eslOK) return status;
-      for (a = 0; a < dchl->K; a++)
-	if ((status = esl_fprintf(fp, "%.4f ", dchl->alpha[k][a])) != eslOK) return status;
-      if ((status = esl_fprintf(fp, "\n"))                         != eslOK) return status;
+      for (s = 0; s < n; ++s)
+	{
+	  if ((status = esl_fprintf(fp, "%.4f ", dchl->q[k] / n))      != eslOK) return status;
+	  for (a = 0; a < dchl->K; a++)
+	    if ((status = esl_fprintf(fp, "%.4f ", dchl->alpha[k][s ? dchl->K - 1 - a : a])) != eslOK) return status;
+	  if ((status = esl_fprintf(fp, "\n"))                         != eslOK) return status;
+	}
     }
   return eslOK;
 }
diff --git a/esl_mixdchlet.h b/esl_mixdchlet.h
index 46400767..bd7a2bea 100644
--- a/esl_mixdchlet.h
+++ b/esl_mixdchlet.h
@@ -20,6 +20,7 @@ typedef struct {
   double **alpha;               /* Dirichlet params alpha[0..Q-1][0..K-1]   */
   int      Q;			/* number of mixtures, e.g. 9 for Sjolander */
   int      K;			/* alphabet size, e.g. 20                   */
+  int isDnaStrandSymmetric;     /* is it DNA-strand-symmetric for A C G T?  */
 
   double  *postq;                /* temp space 0..Q-1: for posterior P(k|c) for example */
   /*::cexcerpt::dirichlet_mixdchlet::end::*/
@@ -27,6 +28,7 @@ typedef struct {
 
 
 extern ESL_MIXDCHLET *esl_mixdchlet_Create(int Q, int K);
+extern ESL_MIXDCHLET *esl_mixdchlet_CreateForFitting(int Q, int K, int isDnaStrandSymmetric);
 extern void           esl_mixdchlet_Destroy(ESL_MIXDCHLET *dchl);
 
 extern double         esl_mixdchlet_logp_c      (ESL_MIXDCHLET *dchl, double *c);
diff --git a/esl_mixdchlet.tex b/esl_mixdchlet.tex
index 8c5ef284..93b69544 100644
--- a/esl_mixdchlet.tex
+++ b/esl_mixdchlet.tex
@@ -91,6 +91,51 @@ \section{Fitting a mixture Dirichlet to counts}
 maximizer.  The implementation provides the negative log likelihood
 and the negative gradient to the CG routine.
 
+\subsection{Fitting a mixture Dirichlet with DNA strand symmetry}
 
-\end{document}
+Here, each count vector has $K=4$ counts of DNA bases.  The mixture
+Dirichlet consists of $Q$ \emph{pairs} of components.  Each component
+in a pair is identical to the other after swapping DNA bases with
+their complements.  So, the two components have identical mixture
+coefficients: $q_k/2$.  (It's also possible to have unpaired
+self-symmetric components: that isn't considered here.)
+
+Here, we define $P(c_i \mid \alpha_k)$ to be the probability of one
+count vector given one pair of Dirichlet components:
+
+\[
+P(c_i \mid \alpha_k) = \frac{ |c_i|! }
+                            { \prod_a c_{ia}! }
+                       \frac{ \prod_a \Gamma \left( c_{ia} + \alpha_{ia} \right) + \prod_a \Gamma \left( c_{ia'} + \alpha_{ia} \right) }
+                            { 2 \enspace \Gamma ( |c_i + \alpha_k| ) }
+                       \frac{ \Gamma ( |\alpha_k| ) }
+                            { \prod_a \Gamma \left( \alpha_{ka} \right) }\,,
+\]
+where $a'$ means the complement of base $a$.
 
+Then, the equation for $\frac{\partial L}{\partial \lambda_k}$ is the
+same as above, except we're using the new definition of $P(c_i \mid
+\alpha_k)$:
+\[
+P(k \mid \theta, c_i) = q_k P(c_i \mid \alpha_k) \Big/ \sum_k q_k P(c_i \mid \alpha_k)\,.
+\]
+The partial derivative with respect to $\beta_{ka}$ is
+\[
+\frac{\partial L}{\partial \beta_{ka}}  =  \sum_i
+ \alpha_{ka} P(k \mid \theta, c_i)
+    \left( X_{ika}
+        -  \Psi \left( | c_i | + | \alpha_k | \right)
+        +  \Psi \left( | \alpha_k | \right)
+        -  \Psi \left( \alpha_{ka} \right)
+    \right)\,,
+\]
+where
+\[
+X_{ika} =
+\frac{ \Psi \left( c_{ia} + \alpha_{ka} \right) \prod_a \Gamma \left( c_{ia} + \alpha_{ka} \right) +
+       \Psi \left( c_{ia'} + \alpha_{ka} \right) \prod_a \Gamma \left( c_{ia'} + \alpha_{ka} \right) }
+     { \prod_a \Gamma \left( c_{ia} + \alpha_{ka} \right) + \prod_a \Gamma \left( c_{ia'} + \alpha_{ka} \right) }\,.
+\]
+
+
+\end{document}
diff --git a/miniapps/Makefile.in b/miniapps/Makefile.in
index 4a8caab4..18584a42 100644
--- a/miniapps/Makefile.in
+++ b/miniapps/Makefile.in
@@ -41,6 +41,8 @@ LDFLAGS        = @LDFLAGS@
 DEFS           = @DEFS@
 LIBS           = -leasel @LIBGSL@ @LIBS@ @PTHREAD_LIBS@ -lm
 
+PROGS = esl-mixdchlet
+
 ## list of the miniapp commands to compile.
 #
 SUBCMDOBJS = \
@@ -97,6 +99,9 @@ check:  ${PROGS} easel
 easel:  % : %.c ../libeasel.a ${SUBCMDOBJS}
 	${QUIET_GEN}${CC} ${CPPFLAGS} ${CFLAGS} ${PTHREAD_CFLAGS} ${DEFS} ${LDFLAGS} -L.. -I. -I.. -I${srcdir} -I${srcdir}/.. -o $@ $< ${SUBCMDOBJS} ${LIBS}
 
+${PROGS}: % : %.c ../libeasel.a
+	${QUIET_GEN}${CC} ${CPPFLAGS} ${CFLAGS} ${PTHREAD_CFLAGS} ${DEFS} ${LDFLAGS} -L.. -I. -I.. -I${srcdir} -I${srcdir}/.. -o $@ $< ${LIBS}
+
 ${SUBCMDOBJS}: %.o : %.c ../libeasel.a
 	${QUIET_CC}${CC} -I. -I.. -I${srcdir} -I${srcdir}/.. ${CPPFLAGS} ${CFLAGS} ${PTHREAD_CFLAGS} ${SIMD_CFLAGS} ${DEFS} -c $<
 
diff --git a/miniapps/esl-mixdchlet.c b/miniapps/esl-mixdchlet.c
index fa38f8fb..df72f389 100644
--- a/miniapps/esl-mixdchlet.c
+++ b/miniapps/esl-mixdchlet.c
@@ -111,6 +111,7 @@ static ESL_OPTIONS fit_options[] = {
   /* name           type      default  env  range toggles reqs incomp  help                                   docgroup*/
   { "-h",    eslARG_NONE,   FALSE,  NULL, NULL,  NULL,  NULL, NULL, "show brief help on version and usage",         0 },
   { "-s",    eslARG_INT,      "0",  NULL, NULL,  NULL,  NULL, NULL, "set random number seed to <n>",                0 },
+  { "-d",    eslARG_NONE,   FALSE,  NULL, NULL,  NULL,  NULL, NULL, "fit a DNA strand-symmetric mixture to A C G T counts (Q must be even)", 0 },
   {  0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
 };
 
@@ -119,13 +120,15 @@ cmd_fit(const char *topcmd, const ESL_SUBCMD *sub, int argc, char **argv)
 {
   ESL_GETOPTS    *go      = esl_subcmd_CreateDefaultApp(topcmd, sub, fit_options, argc, argv, /*custom opthelp?:*/NULL);
   ESL_RANDOMNESS *rng     = esl_randomness_Create( esl_opt_GetInteger(go, "-s"));
+  int             isDna   = esl_opt_GetBoolean(go, "-d");              // a complement-symmetric mixture Dirichlet for DNA base counts?
   int             Q       = strtol(esl_opt_GetArg(go, 1), NULL, 10);   // number of mixture components
   int             K       = strtol(esl_opt_GetArg(go, 2), NULL, 10);   // size of probability/parameter vectors - length of count vectors
   char           *ctfile  = esl_opt_GetArg(go, 3);                     // count file to input
   char           *outfile = esl_opt_GetArg(go, 4);                     // mixture Dirichlet file output
   ESL_FILEPARSER *efp     = NULL;                                      // open fileparser for reading                                                
+  if (isDna && Q % 2) esl_fatal("option -d requires number of mixture components Q to be even");
   FILE           *ofp     = NULL;                                      // open output file for writing
-  ESL_MIXDCHLET  *dchl    = esl_mixdchlet_Create(Q,K);                 // mixture Dirichlet being estimated
+  ESL_MIXDCHLET  *dchl    = esl_mixdchlet_CreateForFitting(Q,K,isDna); // mixture Dirichlet being estimated
   int             Nalloc  = 1024;                                      // initial allocation for ct[]
   double        **ct      = esl_mat_DCreate(Nalloc, K);                // count vectors, [0..N-1][0..K-1]
   int             N       = 0;                                         // number of count vectors so far

From 1fdc27d094cb8fa9f5bd01a3c33c3866cb042e0b Mon Sep 17 00:00:00 2001
From: "Martin C. Frith" <mcfrith@gmail.com>
Date: Thu, 23 Oct 2025 08:56:08 +0000
Subject: [PATCH 2/3] Revert "Adds esl-mixdchlet DNA strand symmetry option"

This reverts commit c464606b11ed8b31e6533de72add4ed0ebe7c8a0.
---
 esl_mixdchlet.c          | 73 ++++++----------------------------------
 esl_mixdchlet.h          |  2 --
 esl_mixdchlet.tex        | 47 +-------------------------
 miniapps/Makefile.in     |  5 ---
 miniapps/esl-mixdchlet.c |  5 +--
 5 files changed, 13 insertions(+), 119 deletions(-)

diff --git a/esl_mixdchlet.c b/esl_mixdchlet.c
index 638f6dee..d7fb1117 100644
--- a/esl_mixdchlet.c
+++ b/esl_mixdchlet.c
@@ -45,12 +45,11 @@
  * Throws:    NULL on allocation failure.
  */
 ESL_MIXDCHLET *
-esl_mixdchlet_CreateForFitting(int Q, int K, int isDnaStrandSymmetric)
+esl_mixdchlet_Create(int Q, int K)
 {
   ESL_MIXDCHLET *dchl = NULL;
   int            status;
 
-  if (isDnaStrandSymmetric) Q /= 2;  // Q components => Q/2 pairs of components
   ESL_DASSERT1( (Q > 0) );
   ESL_DASSERT1( (K > 0) );
 
@@ -65,18 +64,14 @@ esl_mixdchlet_CreateForFitting(int Q, int K, int isDnaStrandSymmetric)
 
   dchl->Q = Q;
   dchl->K = K;
-  dchl->isDnaStrandSymmetric = isDnaStrandSymmetric;
   return dchl;
 
  ERROR:
   esl_mixdchlet_Destroy(dchl);
   return NULL;
 }
-ESL_MIXDCHLET *
-esl_mixdchlet_Create(int Q, int K)
-{
-  return esl_mixdchlet_CreateForFitting(Q, K, 0);
-}
+
+
 
 /* Function:  esl_mixdchlet_Destroy()
  * Synopsis:  Free a mixture Dirichlet.
@@ -99,28 +94,6 @@ esl_mixdchlet_Destroy(ESL_MIXDCHLET *dchl)
  * 2. Likelihoods, posteriors, inference
  *****************************************************************/
 
-// Return the log probability of a vector c of counts of A/C/G/T, given a
-// DNA-strand-symmetric pair of Dirichlet distributions: log P(c | alpha)
-static double
-mixdchlet_logprob_c_dna(double *c, double *alpha, int K)  // presumably, K=4
-{
-  double lgamma_sums[2] = {0, 0};
-  double sum1, sum2, sum3, logp;
-  sum1 = sum2 = sum3 = logp = 0;
-
-  for (int a = 0; a < K; a++)  // loop over 4 DNA bases
-    {
-      sum1 += c[a] + alpha[a];
-      sum2 += alpha[a];
-      sum3 += c[a];
-      lgamma_sums[0] += lgamma(alpha[a] + c[a]);
-      lgamma_sums[1] += lgamma(alpha[a] + c[K - 1 - a]);  // complement of a
-      logp -= lgamma(c[a] + 1) + lgamma(alpha[a]);
-    }
-
-  return logp + esl_vec_DLogSum(lgamma_sums, 2) - eslCONST_LOG2
-    + lgamma(sum2) + lgamma(sum3 + 1) - lgamma(sum1);
-}
 
 /* mixchlet_postq()
  * Calculate P(q | c), the posterior probability of component q.
@@ -130,9 +103,7 @@ mixdchlet_postq(ESL_MIXDCHLET *dchl, double *c)
 {
   int k;
   for (k = 0; k < dchl->Q; k++)
-    if (dchl->q[k] > 0.)
-      dchl->postq[k] = log(dchl->q[k]) +
-	(dchl->isDnaStrandSymmetric ? mixdchlet_logprob_c_dna : esl_dirichlet_logpdf_c)(c, dchl->alpha[k], dchl->K);
+    if (dchl->q[k] > 0.) dchl->postq[k] = log(dchl->q[k]) + esl_dirichlet_logpdf_c(c, dchl->alpha[k], dchl->K);
     else                 dchl->postq[k] = -eslINFINITY;
   esl_vec_DLogNorm(dchl->postq, dchl->Q); 
 }
@@ -157,9 +128,7 @@ esl_mixdchlet_logp_c(ESL_MIXDCHLET *dchl, double *c)
 {
   int k; 
   for (k = 0; k < dchl->Q; k++) 
-    if (dchl->q[k] > 0.)
-      dchl->postq[k] = log(dchl->q[k]) +
-	(dchl->isDnaStrandSymmetric ? mixdchlet_logprob_c_dna : esl_dirichlet_logpdf_c)(c, dchl->alpha[k], dchl->K);
+    if (dchl->q[k] > 0.) dchl->postq[k] = log(dchl->q[k]) + esl_dirichlet_logpdf_c(c, dchl->alpha[k], dchl->K);
     else                 dchl->postq[k] = -eslINFINITY;
   return esl_vec_DLogSum(dchl->postq, dchl->Q);
 }
@@ -309,7 +278,6 @@ mixdchlet_gradient(double *p, int np, void *dptr, double *dp)
   double  sum_alpha;     //  |alpha_k| 
   double  sum_c;         //  |c_i|
   double  psi1;          //  \Psi(c_ia + alpha_ka)        
-  double  psi1b;         //  \Psi(c_ia' + alpha_ka)
   double  psi2;          //  \Psi( |c_i| + |alpha_k| ) 
   double  psi3;          //  \Psi( |alpha_k| )        
   double  psi4;          //  \Psi( alpha_ka )
@@ -329,27 +297,12 @@ mixdchlet_gradient(double *p, int np, void *dptr, double *dp)
 
       for (k = 0; k < dchl->Q; k++)
 	{
-	  double strand_weights[2] = {0, 0};
-	  for (a = 0; a < dchl->K; a++)
-	    {
-	      int b = dchl->K - 1 - a;  // complement of DNA base a
-	      strand_weights[0] += lgamma(dchl->alpha[k][a] + data->c[i][a]);
-	      strand_weights[1] += lgamma(dchl->alpha[k][a] + data->c[i][b]);
-	    }
-	  esl_vec_DLogNorm(strand_weights, 2);
-
 	  sum_alpha = esl_vec_DSum(dchl->alpha[k], dchl->K);
 	  esl_stats_Psi( sum_alpha + sum_c, &psi2);
 	  esl_stats_Psi( sum_alpha,         &psi3);
 	  for (a = 0; a < dchl->K; a++)
 	    {
 	      esl_stats_Psi( dchl->alpha[k][a] + data->c[i][a], &psi1);
-	      if (dchl->isDnaStrandSymmetric)
-		{
-		  int b = dchl->K - 1 - a;  // complement of DNA base a
-		  esl_stats_Psi( dchl->alpha[k][a] + data->c[i][b], &psi1b);
-		  psi1 = strand_weights[0] * psi1 + strand_weights[1] * psi1b;
-		}
 	      esl_stats_Psi( dchl->alpha[k][a],                 &psi4);
 	      dp[j++] -= dchl->alpha[k][a] * dchl->postq[k] * (psi1 - psi2 + psi3 - psi4);
 	    }
@@ -565,20 +518,16 @@ esl_mixdchlet_Read(ESL_FILEPARSER *efp,  ESL_MIXDCHLET **ret_dchl)
 int
 esl_mixdchlet_Write(FILE *fp, const ESL_MIXDCHLET *dchl)
 {
-  int k,s,a;
+  int k,a;
   int status;
-  int n = 1 + dchl->isDnaStrandSymmetric;
 
-  if ((status = esl_fprintf(fp, "%d %d\n", dchl->K, dchl->Q * n))      != eslOK) return status;
+  if ((status = esl_fprintf(fp, "%d %d\n", dchl->K, dchl->Q))      != eslOK) return status;
   for (k = 0; k < dchl->Q; k++)
     {
-      for (s = 0; s < n; ++s)
-	{
-	  if ((status = esl_fprintf(fp, "%.4f ", dchl->q[k] / n))      != eslOK) return status;
-	  for (a = 0; a < dchl->K; a++)
-	    if ((status = esl_fprintf(fp, "%.4f ", dchl->alpha[k][s ? dchl->K - 1 - a : a])) != eslOK) return status;
-	  if ((status = esl_fprintf(fp, "\n"))                         != eslOK) return status;
-	}
+      if ((status = esl_fprintf(fp, "%.4f ", dchl->q[k]))          != eslOK) return status;
+      for (a = 0; a < dchl->K; a++)
+	if ((status = esl_fprintf(fp, "%.4f ", dchl->alpha[k][a])) != eslOK) return status;
+      if ((status = esl_fprintf(fp, "\n"))                         != eslOK) return status;
     }
   return eslOK;
 }
diff --git a/esl_mixdchlet.h b/esl_mixdchlet.h
index bd7a2bea..46400767 100644
--- a/esl_mixdchlet.h
+++ b/esl_mixdchlet.h
@@ -20,7 +20,6 @@ typedef struct {
   double **alpha;               /* Dirichlet params alpha[0..Q-1][0..K-1]   */
   int      Q;			/* number of mixtures, e.g. 9 for Sjolander */
   int      K;			/* alphabet size, e.g. 20                   */
-  int isDnaStrandSymmetric;     /* is it DNA-strand-symmetric for A C G T?  */
 
   double  *postq;                /* temp space 0..Q-1: for posterior P(k|c) for example */
   /*::cexcerpt::dirichlet_mixdchlet::end::*/
@@ -28,7 +27,6 @@ typedef struct {
 
 
 extern ESL_MIXDCHLET *esl_mixdchlet_Create(int Q, int K);
-extern ESL_MIXDCHLET *esl_mixdchlet_CreateForFitting(int Q, int K, int isDnaStrandSymmetric);
 extern void           esl_mixdchlet_Destroy(ESL_MIXDCHLET *dchl);
 
 extern double         esl_mixdchlet_logp_c      (ESL_MIXDCHLET *dchl, double *c);
diff --git a/esl_mixdchlet.tex b/esl_mixdchlet.tex
index 93b69544..8c5ef284 100644
--- a/esl_mixdchlet.tex
+++ b/esl_mixdchlet.tex
@@ -91,51 +91,6 @@ \section{Fitting a mixture Dirichlet to counts}
 maximizer.  The implementation provides the negative log likelihood
 and the negative gradient to the CG routine.
 
-\subsection{Fitting a mixture Dirichlet with DNA strand symmetry}
-
-Here, each count vector has $K=4$ counts of DNA bases.  The mixture
-Dirichlet consists of $Q$ \emph{pairs} of components.  Each component
-in a pair is identical to the other after swapping DNA bases with
-their complements.  So, the two components have identical mixture
-coefficients: $q_k/2$.  (It's also possible to have unpaired
-self-symmetric components: that isn't considered here.)
-
-Here, we define $P(c_i \mid \alpha_k)$ to be the probability of one
-count vector given one pair of Dirichlet components:
-
-\[
-P(c_i \mid \alpha_k) = \frac{ |c_i|! }
-                            { \prod_a c_{ia}! }
-                       \frac{ \prod_a \Gamma \left( c_{ia} + \alpha_{ia} \right) + \prod_a \Gamma \left( c_{ia'} + \alpha_{ia} \right) }
-                            { 2 \enspace \Gamma ( |c_i + \alpha_k| ) }
-                       \frac{ \Gamma ( |\alpha_k| ) }
-                            { \prod_a \Gamma \left( \alpha_{ka} \right) }\,,
-\]
-where $a'$ means the complement of base $a$.
-
-Then, the equation for $\frac{\partial L}{\partial \lambda_k}$ is the
-same as above, except we're using the new definition of $P(c_i \mid
-\alpha_k)$:
-\[
-P(k \mid \theta, c_i) = q_k P(c_i \mid \alpha_k) \Big/ \sum_k q_k P(c_i \mid \alpha_k)\,.
-\]
-The partial derivative with respect to $\beta_{ka}$ is
-\[
-\frac{\partial L}{\partial \beta_{ka}}  =  \sum_i
- \alpha_{ka} P(k \mid \theta, c_i)
-    \left( X_{ika}
-        -  \Psi \left( | c_i | + | \alpha_k | \right)
-        +  \Psi \left( | \alpha_k | \right)
-        -  \Psi \left( \alpha_{ka} \right)
-    \right)\,,
-\]
-where
-\[
-X_{ika} =
-\frac{ \Psi \left( c_{ia} + \alpha_{ka} \right) \prod_a \Gamma \left( c_{ia} + \alpha_{ka} \right) +
-       \Psi \left( c_{ia'} + \alpha_{ka} \right) \prod_a \Gamma \left( c_{ia'} + \alpha_{ka} \right) }
-     { \prod_a \Gamma \left( c_{ia} + \alpha_{ka} \right) + \prod_a \Gamma \left( c_{ia'} + \alpha_{ka} \right) }\,.
-\]
-
 
 \end{document}
+
diff --git a/miniapps/Makefile.in b/miniapps/Makefile.in
index 18584a42..4a8caab4 100644
--- a/miniapps/Makefile.in
+++ b/miniapps/Makefile.in
@@ -41,8 +41,6 @@ LDFLAGS        = @LDFLAGS@
 DEFS           = @DEFS@
 LIBS           = -leasel @LIBGSL@ @LIBS@ @PTHREAD_LIBS@ -lm
 
-PROGS = esl-mixdchlet
-
 ## list of the miniapp commands to compile.
 #
 SUBCMDOBJS = \
@@ -99,9 +97,6 @@ check:  ${PROGS} easel
 easel:  % : %.c ../libeasel.a ${SUBCMDOBJS}
 	${QUIET_GEN}${CC} ${CPPFLAGS} ${CFLAGS} ${PTHREAD_CFLAGS} ${DEFS} ${LDFLAGS} -L.. -I. -I.. -I${srcdir} -I${srcdir}/.. -o $@ $< ${SUBCMDOBJS} ${LIBS}
 
-${PROGS}: % : %.c ../libeasel.a
-	${QUIET_GEN}${CC} ${CPPFLAGS} ${CFLAGS} ${PTHREAD_CFLAGS} ${DEFS} ${LDFLAGS} -L.. -I. -I.. -I${srcdir} -I${srcdir}/.. -o $@ $< ${LIBS}
-
 ${SUBCMDOBJS}: %.o : %.c ../libeasel.a
 	${QUIET_CC}${CC} -I. -I.. -I${srcdir} -I${srcdir}/.. ${CPPFLAGS} ${CFLAGS} ${PTHREAD_CFLAGS} ${SIMD_CFLAGS} ${DEFS} -c $<
 
diff --git a/miniapps/esl-mixdchlet.c b/miniapps/esl-mixdchlet.c
index df72f389..fa38f8fb 100644
--- a/miniapps/esl-mixdchlet.c
+++ b/miniapps/esl-mixdchlet.c
@@ -111,7 +111,6 @@ static ESL_OPTIONS fit_options[] = {
   /* name           type      default  env  range toggles reqs incomp  help                                   docgroup*/
   { "-h",    eslARG_NONE,   FALSE,  NULL, NULL,  NULL,  NULL, NULL, "show brief help on version and usage",         0 },
   { "-s",    eslARG_INT,      "0",  NULL, NULL,  NULL,  NULL, NULL, "set random number seed to <n>",                0 },
-  { "-d",    eslARG_NONE,   FALSE,  NULL, NULL,  NULL,  NULL, NULL, "fit a DNA strand-symmetric mixture to A C G T counts (Q must be even)", 0 },
   {  0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
 };
 
@@ -120,15 +119,13 @@ cmd_fit(const char *topcmd, const ESL_SUBCMD *sub, int argc, char **argv)
 {
   ESL_GETOPTS    *go      = esl_subcmd_CreateDefaultApp(topcmd, sub, fit_options, argc, argv, /*custom opthelp?:*/NULL);
   ESL_RANDOMNESS *rng     = esl_randomness_Create( esl_opt_GetInteger(go, "-s"));
-  int             isDna   = esl_opt_GetBoolean(go, "-d");              // a complement-symmetric mixture Dirichlet for DNA base counts?
   int             Q       = strtol(esl_opt_GetArg(go, 1), NULL, 10);   // number of mixture components
   int             K       = strtol(esl_opt_GetArg(go, 2), NULL, 10);   // size of probability/parameter vectors - length of count vectors
   char           *ctfile  = esl_opt_GetArg(go, 3);                     // count file to input
   char           *outfile = esl_opt_GetArg(go, 4);                     // mixture Dirichlet file output
   ESL_FILEPARSER *efp     = NULL;                                      // open fileparser for reading                                                
-  if (isDna && Q % 2) esl_fatal("option -d requires number of mixture components Q to be even");
   FILE           *ofp     = NULL;                                      // open output file for writing
-  ESL_MIXDCHLET  *dchl    = esl_mixdchlet_CreateForFitting(Q,K,isDna); // mixture Dirichlet being estimated
+  ESL_MIXDCHLET  *dchl    = esl_mixdchlet_Create(Q,K);                 // mixture Dirichlet being estimated
   int             Nalloc  = 1024;                                      // initial allocation for ct[]
   double        **ct      = esl_mat_DCreate(Nalloc, K);                // count vectors, [0..N-1][0..K-1]
   int             N       = 0;                                         // number of count vectors so far

From 27b2f949e2eed7e55a8391137468f5936e448c82 Mon Sep 17 00:00:00 2001
From: "Martin C. Frith" <mcfrith@gmail.com>
Date: Thu, 23 Oct 2025 09:22:21 +0000
Subject: [PATCH 3/3] Adds esl-mixdchlet DNA strand symmetry option

---
 esl_mixdchlet.c          | 121 ++++++++++++++++++++++++++++++++++++++-
 esl_mixdchlet.h          |   2 +
 esl_mixdchlet.tex        |  54 ++++++++++++++++-
 miniapps/Makefile.in     |   5 ++
 miniapps/esl-mixdchlet.c |   4 +-
 5 files changed, 182 insertions(+), 4 deletions(-)

diff --git a/esl_mixdchlet.c b/esl_mixdchlet.c
index d7fb1117..37b16a88 100644
--- a/esl_mixdchlet.c
+++ b/esl_mixdchlet.c
@@ -46,6 +46,12 @@
  */
 ESL_MIXDCHLET *
 esl_mixdchlet_Create(int Q, int K)
+{
+  return esl_mixdchlet_CreateForFitting(Q, K, 0);
+}
+
+ESL_MIXDCHLET *
+esl_mixdchlet_CreateForFitting(int Q, int K, int isDnaStrandSymmetric)
 {
   ESL_MIXDCHLET *dchl = NULL;
   int            status;
@@ -64,6 +70,7 @@ esl_mixdchlet_Create(int Q, int K)
 
   dchl->Q = Q;
   dchl->K = K;
+  dchl->isDnaStrandSymmetric = isDnaStrandSymmetric;
   return dchl;
 
  ERROR:
@@ -218,6 +225,25 @@ mixdchlet_pack_paramvector(ESL_MIXDCHLET *dchl, double *p)
   int j = 0;     /* counter in packed parameter vector <p> */
   int k,a;	 /* indices over components, residues */
 
+  if (dchl->isDnaStrandSymmetric)
+    {
+      for (k = 0; k < dchl->Q - 1; k += 2)  // paired components
+	p[j++] = log(dchl->q[k] * 2);  // Prob(component) * 2 = Prob(pair)
+
+      if (k == dchl->Q - 1)  // unpaired self-symmetric component
+	p[j++] = log(dchl->q[k]);
+
+      for (k = 0; k < dchl->Q - 1; k += 2)  // paired components
+	for (a = 0; a < dchl->K; a++)
+	  p[j++] = log(dchl->alpha[k][a]);
+
+      if (k == dchl->Q - 1)  // unpaired self-symmetric component
+	for (a = 0; a < dchl->K / 2; a++)
+	  p[j++] = log(dchl->alpha[k][a]);
+
+      return;
+    }
+
   /* mixture coefficients */
   for (k = 0; k < dchl->Q; k++)
     p[j++] = log(dchl->q[k]);
@@ -240,6 +266,27 @@ mixdchlet_unpack_paramvector(double *p, ESL_MIXDCHLET *dchl)
   int j = 0;     /* counter in packed parameter vector <p> */
   int k,a;	 /* indices over components, residues */
 
+  if (dchl->isDnaStrandSymmetric)
+    {
+      for (k = 0; k < dchl->Q - 1; k += 2)  // paired components
+	dchl->q[k] = dchl->q[k+1] = exp(p[j++]) / 2;
+
+      if (k == dchl->Q - 1)  // unpaired self-symmetric component
+	dchl->q[k] = exp(p[j++]);
+
+      esl_vec_DNorm(dchl->q, dchl->Q);
+
+      for (k = 0; k < dchl->Q - 1; k += 2)  // paired components
+	for (a = 0; a < dchl->K; a++)
+	  dchl->alpha[k][a] = dchl->alpha[k+1][dchl->K - 1 - a] = exp(p[j++]);
+
+      if (k == dchl->Q - 1)  // unpaired self-symmetric component
+	for (a = 0; a < dchl->K / 2; a++)
+	  dchl->alpha[k][a] = dchl->alpha[k][dchl->K - 1 - a] = exp(p[j++]);
+
+      return;
+    }
+
   /* mixture coefficients */
   for (k = 0; k < dchl->Q; k++) 
     dchl->q[k] = exp(p[j++]);
@@ -278,6 +325,7 @@ mixdchlet_gradient(double *p, int np, void *dptr, double *dp)
   double  sum_alpha;     //  |alpha_k| 
   double  sum_c;         //  |c_i|
   double  psi1;          //  \Psi(c_ia + alpha_ka)        
+  double  psi1b;         //  \Psi(c_ia' + alpha_ka)
   double  psi2;          //  \Psi( |c_i| + |alpha_k| ) 
   double  psi3;          //  \Psi( |alpha_k| )        
   double  psi4;          //  \Psi( alpha_ka )
@@ -289,9 +337,61 @@ mixdchlet_gradient(double *p, int np, void *dptr, double *dp)
     {
       mixdchlet_postq(dchl, data->c[i]);           // d->postq[q] is now P(q | c_i, theta)
       sum_c = esl_vec_DSum(data->c[i], dchl->K);   // |c_i|
+      j = 0;
       
+      if (dchl->isDnaStrandSymmetric)
+	{
+	  for (k = 0; k < dchl->Q - 1; k += 2)  // paired components
+	    dp[j++] -= dchl->postq[k] + dchl->postq[k+1] - 2 * dchl->q[k];
+
+	  if (k == dchl->Q - 1)  // unpaired self-symmetric component
+	    dp[j++] -= dchl->postq[k] - dchl->q[k];
+
+	  for (k = 0; k < dchl->Q - 1; k += 2)  // paired components
+	    {
+	      double strand_weights[2] = {0, 0};
+	      for (a = 0; a < dchl->K; a++)
+		{
+		  int b = dchl->K - 1 - a;  // complement of DNA base a
+		  strand_weights[0] += lgamma(dchl->alpha[k][a] + data->c[i][a]);
+		  strand_weights[1] += lgamma(dchl->alpha[k][a] + data->c[i][b]);
+		}
+	      esl_vec_DLogNorm(strand_weights, 2);
+
+	      sum_alpha = esl_vec_DSum(dchl->alpha[k], dchl->K);
+	      esl_stats_Psi( sum_alpha + sum_c, &psi2);
+	      esl_stats_Psi( sum_alpha,         &psi3);
+	      for (a = 0; a < dchl->K; a++)
+		{
+		  int b = dchl->K - 1 - a;  // complement of DNA base a
+		  esl_stats_Psi( dchl->alpha[k][a] + data->c[i][a], &psi1);
+		  esl_stats_Psi( dchl->alpha[k][a] + data->c[i][b], &psi1b);
+		  esl_stats_Psi( dchl->alpha[k][a],                 &psi4);
+		  dp[j++] -= dchl->alpha[k][a] * (dchl->postq[k] + dchl->postq[k+1])
+		    * (strand_weights[0] * psi1 + strand_weights[1] * psi1b - psi2 + psi3 - psi4);
+		}
+	    }
+
+	  if (k == dchl->Q - 1)  // unpaired self-symmetric component
+	    {
+	      sum_alpha = esl_vec_DSum(dchl->alpha[k], dchl->K);
+	      esl_stats_Psi( sum_alpha + sum_c, &psi2);
+	      esl_stats_Psi( sum_alpha,         &psi3);
+	      for (a = 0; a < dchl->K / 2; a++)
+		{
+		  int b = dchl->K - 1 - a;  // complement of DNA base a
+		  esl_stats_Psi( dchl->alpha[k][a] + data->c[i][a], &psi1);
+		  esl_stats_Psi( dchl->alpha[k][a] + data->c[i][b], &psi1b);
+		  esl_stats_Psi( dchl->alpha[k][a],                 &psi4);
+		  dp[j++] -= dchl->alpha[k][a] * dchl->postq[k]
+		    * (psi1 + psi1b - 2 * psi2 + 2 * psi3 - 2 * psi4);
+		}
+	    }
+
+	  continue;
+	}
+
       /* mixture coefficient gradient */
-      j = 0;
       for (k = 0; k < dchl->Q; k++)
 	dp[j++] -= dchl->postq[k] - dchl->q[k];
 
@@ -351,6 +451,8 @@ esl_mixdchlet_Fit(double **c, int N, ESL_MIXDCHLET *dchl, double *opt_nll)
   double  fx;
   int     status;
 
+  if (dchl->isDnaStrandSymmetric) nparam = (nparam + 1) / 2;
+
   cfg = esl_min_cfg_Create(nparam);
   if (! cfg) { status = eslEMEM; goto ERROR; }
   cfg->cg_rtol    = 3e-5;
@@ -419,6 +521,23 @@ esl_mixdchlet_Sample(ESL_RANDOMNESS *rng, ESL_MIXDCHLET *dchl)
 {
   int k,a;
 
+  if (dchl->isDnaStrandSymmetric)
+    {
+      esl_dirichlet_DSampleUniform(rng, dchl->Q, dchl->q);
+      for (k = 0; k < dchl->Q - 1; k += 2)  // paired components
+	dchl->q[k] = dchl->q[k+1] = (dchl->q[k] + dchl->q[k+1]) / 2;
+
+      for (k = 0; k < dchl->Q - 1; k += 2)  // paired components
+	for (a = 0; a < dchl->K; a++)
+	  dchl->alpha[k][a] = dchl->alpha[k+1][dchl->K - 1 - a] = 2.0 * esl_rnd_UniformPositive(rng);
+
+      if (k == dchl->Q - 1)  // unpaired self-symmetric component
+	for (a = 0; a < dchl->K / 2; a++)
+	  dchl->alpha[k][a] = dchl->alpha[k][dchl->K - 1 - a] = 2.0 * esl_rnd_UniformPositive(rng);
+
+      return eslOK;
+    }
+
   esl_dirichlet_DSampleUniform(rng, dchl->Q, dchl->q);
   for (k = 0; k < dchl->Q; k++)
     for (a = 0; a < dchl->K; a++)
diff --git a/esl_mixdchlet.h b/esl_mixdchlet.h
index 46400767..bd7a2bea 100644
--- a/esl_mixdchlet.h
+++ b/esl_mixdchlet.h
@@ -20,6 +20,7 @@ typedef struct {
   double **alpha;               /* Dirichlet params alpha[0..Q-1][0..K-1]   */
   int      Q;			/* number of mixtures, e.g. 9 for Sjolander */
   int      K;			/* alphabet size, e.g. 20                   */
+  int isDnaStrandSymmetric;     /* is it DNA-strand-symmetric for A C G T?  */
 
   double  *postq;                /* temp space 0..Q-1: for posterior P(k|c) for example */
   /*::cexcerpt::dirichlet_mixdchlet::end::*/
@@ -27,6 +28,7 @@ typedef struct {
 
 
 extern ESL_MIXDCHLET *esl_mixdchlet_Create(int Q, int K);
+extern ESL_MIXDCHLET *esl_mixdchlet_CreateForFitting(int Q, int K, int isDnaStrandSymmetric);
 extern void           esl_mixdchlet_Destroy(ESL_MIXDCHLET *dchl);
 
 extern double         esl_mixdchlet_logp_c      (ESL_MIXDCHLET *dchl, double *c);
diff --git a/esl_mixdchlet.tex b/esl_mixdchlet.tex
index 8c5ef284..38c0910c 100644
--- a/esl_mixdchlet.tex
+++ b/esl_mixdchlet.tex
@@ -1,6 +1,7 @@
 \documentclass[11pt]{article}
 \setcounter{secnumdepth}{0}
 
+\usepackage{amsmath}
 \usepackage{relsize}
 
 \newcommand{\mono}[1]{{\smaller\texttt{#1}}}                    % literal (to be typed): code, program names
@@ -38,7 +39,7 @@ \section{Fitting a mixture Dirichlet to counts}
 \[
 P(c_i \mid \alpha_k) = \frac{ |c_i|! }
                             { \prod_a c_{ia}! }
-                       \frac{ \prod_a \Gamma \left( c_{ia} + \alpha_{ia} \right) }
+                       \frac{ \prod_a \Gamma \left( c_{ia} + \alpha_{ka} \right) }
                             { \Gamma ( |c_i + \alpha_k| ) }
                        \frac{ \Gamma ( |\alpha_k| ) }
                             { \prod_a \Gamma \left( \alpha_{ka} \right) }
@@ -91,6 +92,55 @@ \section{Fitting a mixture Dirichlet to counts}
 maximizer.  The implementation provides the negative log likelihood
 and the negative gradient to the CG routine.
 
+\subsection{Fitting a mixture Dirichlet with DNA strand symmetry}
 
-\end{document}
+Here, each count vector has $K=4$ counts of DNA bases.  For a
+strand-symmetric mixture Dirichlet, each component must be either
+self-symmetric: $\alpha_{ka} = \alpha_{ka'}$ (where $a'$ is the
+complement of base $a$), or paired with a symmetric component:
+$\alpha_{ka} = \alpha_{k'a'}$ and $q_k = q_{k'}$ (where $k$ and $k'$
+indicate paired components).  Define $R$ to be the number of pairs
+plus the number of unpaired components.  It's convenient to make these
+definitions:
+\[
+r_k = \begin{cases}
+  2 q_k & \text{for a paired component (i.e., the prior probability of the pair),} \\
+  q_k   & \text{for an unpaired component}
+\end{cases}
+\]
+\[
+\hat{P}(k \mid \theta, c_i) = \begin{cases}
+  P(k \mid \theta, c_i) + P(k' \mid \theta, c_i) & \text{for a paired component,}\\
+  P(k \mid \theta, c_i) & \text{for an unpaired component}
+\end{cases}
+\]
+Here, the number of $\lambda$ parameters is $R$, and the number of
+$\beta$ parameters is: $K \times \text{(number of components) / 2}$.
+We can define $\beta_{ka}$ as above, and $\lambda_k$ like this:
+\begin{eqnarray*}
+  r_k & = & \frac{ e^{\lambda_k} } { \sum_{m=1}^R e^{\lambda_m} } \,.
+\end{eqnarray*}
+Then,
+\begin{eqnarray*}
+\frac{\partial L}{\partial \lambda_k} &=& \sum_i \bigl( \hat{P}(k \mid \theta, c_i) - r_k \bigr) \\
+\frac{\partial L}{\partial \beta_{ka}} &=&
+ \begin{cases}
+\sum_i \alpha_{ka} \hat{P}(k \mid \theta, c_i)
+    (X_{ika} + Z_{ika}) & \text{for a paired component,} \\
+\sum_i \alpha_{ka} \hat{P}(k \mid \theta, c_i)
+    (Y_{ika} + 2 Z_{ika}) & \text{for an unpaired component}
+    \end{cases}
+\end{eqnarray*}
+where
+\begin{eqnarray*}
+X_{ika} & = &
+\frac{ \Psi \left( c_{ia} + \alpha_{ka} \right) \prod_a \Gamma \left( c_{ia} + \alpha_{ka} \right) +
+       \Psi \left( c_{ia'} + \alpha_{ka} \right) \prod_a \Gamma \left( c_{ia'} + \alpha_{ka} \right) }
+     { \prod_a \Gamma \left( c_{ia} + \alpha_{ka} \right) + \prod_a \Gamma \left( c_{ia'} + \alpha_{ka} \right) } \,,\\
+Y_{ika} & = & \Psi \left( c_{ia} + \alpha_{ka} \right) + \Psi \left( c_{ia'} + \alpha_{ka} \right) \,,\\
+Z_{ika} & = & -  \Psi \left( | c_i | + | \alpha_k | \right)
+        +  \Psi \left( | \alpha_k | \right)
+        -  \Psi \left( \alpha_{ka} \right) \,.
+\end{eqnarray*}
 
+\end{document}
diff --git a/miniapps/Makefile.in b/miniapps/Makefile.in
index 4a8caab4..18584a42 100644
--- a/miniapps/Makefile.in
+++ b/miniapps/Makefile.in
@@ -41,6 +41,8 @@ LDFLAGS        = @LDFLAGS@
 DEFS           = @DEFS@
 LIBS           = -leasel @LIBGSL@ @LIBS@ @PTHREAD_LIBS@ -lm
 
+PROGS = esl-mixdchlet
+
 ## list of the miniapp commands to compile.
 #
 SUBCMDOBJS = \
@@ -97,6 +99,9 @@ check:  ${PROGS} easel
 easel:  % : %.c ../libeasel.a ${SUBCMDOBJS}
 	${QUIET_GEN}${CC} ${CPPFLAGS} ${CFLAGS} ${PTHREAD_CFLAGS} ${DEFS} ${LDFLAGS} -L.. -I. -I.. -I${srcdir} -I${srcdir}/.. -o $@ $< ${SUBCMDOBJS} ${LIBS}
 
+${PROGS}: % : %.c ../libeasel.a
+	${QUIET_GEN}${CC} ${CPPFLAGS} ${CFLAGS} ${PTHREAD_CFLAGS} ${DEFS} ${LDFLAGS} -L.. -I. -I.. -I${srcdir} -I${srcdir}/.. -o $@ $< ${LIBS}
+
 ${SUBCMDOBJS}: %.o : %.c ../libeasel.a
 	${QUIET_CC}${CC} -I. -I.. -I${srcdir} -I${srcdir}/.. ${CPPFLAGS} ${CFLAGS} ${PTHREAD_CFLAGS} ${SIMD_CFLAGS} ${DEFS} -c $<
 
diff --git a/miniapps/esl-mixdchlet.c b/miniapps/esl-mixdchlet.c
index fa38f8fb..23150c4a 100644
--- a/miniapps/esl-mixdchlet.c
+++ b/miniapps/esl-mixdchlet.c
@@ -111,6 +111,7 @@ static ESL_OPTIONS fit_options[] = {
   /* name           type      default  env  range toggles reqs incomp  help                                   docgroup*/
   { "-h",    eslARG_NONE,   FALSE,  NULL, NULL,  NULL,  NULL, NULL, "show brief help on version and usage",         0 },
   { "-s",    eslARG_INT,      "0",  NULL, NULL,  NULL,  NULL, NULL, "set random number seed to <n>",                0 },
+  { "-d",    eslARG_NONE,   FALSE,  NULL, NULL,  NULL,  NULL, NULL, "fit a DNA strand-symmetric mixture to A C G T counts", 0 },
   {  0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
 };
 
@@ -119,13 +120,14 @@ cmd_fit(const char *topcmd, const ESL_SUBCMD *sub, int argc, char **argv)
 {
   ESL_GETOPTS    *go      = esl_subcmd_CreateDefaultApp(topcmd, sub, fit_options, argc, argv, /*custom opthelp?:*/NULL);
   ESL_RANDOMNESS *rng     = esl_randomness_Create( esl_opt_GetInteger(go, "-s"));
+  int             isDna   = esl_opt_GetBoolean(go, "-d");              // a complement-symmetric mixture Dirichlet for DNA base counts?
   int             Q       = strtol(esl_opt_GetArg(go, 1), NULL, 10);   // number of mixture components
   int             K       = strtol(esl_opt_GetArg(go, 2), NULL, 10);   // size of probability/parameter vectors - length of count vectors
   char           *ctfile  = esl_opt_GetArg(go, 3);                     // count file to input
   char           *outfile = esl_opt_GetArg(go, 4);                     // mixture Dirichlet file output
   ESL_FILEPARSER *efp     = NULL;                                      // open fileparser for reading                                                
   FILE           *ofp     = NULL;                                      // open output file for writing
-  ESL_MIXDCHLET  *dchl    = esl_mixdchlet_Create(Q,K);                 // mixture Dirichlet being estimated
+  ESL_MIXDCHLET  *dchl    = esl_mixdchlet_CreateForFitting(Q,K,isDna); // mixture Dirichlet being estimated
   int             Nalloc  = 1024;                                      // initial allocation for ct[]
   double        **ct      = esl_mat_DCreate(Nalloc, K);                // count vectors, [0..N-1][0..K-1]
   int             N       = 0;                                         // number of count vectors so far