-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathDataSetModeling.c
More file actions
1167 lines (983 loc) · 47.4 KB
/
DataSetModeling.c
File metadata and controls
1167 lines (983 loc) · 47.4 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
// DataSetModeling.c
// Automated_CSV_Data_Analysis
// DavidRichardson02
#include "DataSetModeling.h"
#include "CommonDefinitions.h"
#include "GeneralUtilities.h"
#include "StringUtilities.h"
#include "FileUtilities.h"
#include "DataExtraction.h"
#include "DataAnalysis.h"
/**
* emit_adv_modeling_snippet_individual
*
* Writes an optional advanced-modeling MATLAB segment into a per-field script. The
* emitted code attempts to load raw samples, fit several candidate distributions
* (Normal, Lognormal, Gamma, Exponential when valid), select the best by AIC, and
* overlay the best-fit PDF on the histogram. It also generates ECDF and QQ plots and
* saves them alongside the script. All operations are guarded to remain no-risk when
* required toolboxes are missing or files are absent.
*
* Works by printing MATLAB commands via fprintf into the open script stream `mf`.
* The MATLAB snippet resolves raw data from multiple candidate paths, computes
* log-likelihoods per fit, ranks via AIC, overlays the scaled best PDF, updates the
* legend, and conditionally saves ECDF/QQ figure files. Errors are caught and surfaced
* as MATLAB warnings at runtime.
*
* @note The emitted MATLAB expects variables `analysisDir`, `scriptDir`, `field`,
* `bin_centers`, and `counts` to be defined earlier in the script.
*
* @param mf Pointer to an open FILE stream for the per-field MATLAB script.
* @return void.
*/
static void emit_adv_modeling_snippet_individual(FILE *mf)
{
fprintf(mf,
"%% --- Optional advanced modeling (raw samples + toolbox, no-risk) ---\n"
"enableAdvancedFits = true;\n"
"rawCandidates = {\n"
" fullfile(analysisDir, sprintf('%%s_samples.txt', field)),\n"
" sprintf('%%s_samples.txt', field)\n"
"};\n"
"rawLoaded = false;\n"
"for rk=1:numel(rawCandidates)\n"
" if exist(rawCandidates{rk},'file'), try raw = load(rawCandidates{rk}); rawLoaded=true; break; end, end\n"
"end\n"
"if enableAdvancedFits && rawLoaded && exist('fitdist','file') && exist('aicbic','file')\n"
" try\n"
" Cnames = {}; Cdist = {}; LL = []; K = []; %% names, dists, loglik, k-params\n"
" %% Normal (k=2)\n"
" d = fitdist(raw(:),'Normal'); Cnames{end+1}='Normal'; Cdist{end+1}=d; LL(end+1)=sum(log(pdf(d,raw(:))+eps)); K(end+1)=2;\n"
" if all(raw(:)>0)\n"
" %% Lognormal (k=2)\n"
" d = fitdist(raw(:),'Lognormal'); Cnames{end+1}='Lognormal'; Cdist{end+1}=d; LL(end+1)=sum(log(pdf(d,raw(:))+eps)); K(end+1)=2;\n"
" %% Gamma (k=2)\n"
" d = fitdist(raw(:),'Gamma'); Cnames{end+1}='Gamma'; Cdist{end+1}=d; LL(end+1)=sum(log(pdf(d,raw(:))+eps)); K(end+1)=2;\n"
" %% Exponential (k=1)\n"
" d = fitdist(raw(:),'Exponential'); Cnames{end+1}='Exponential'; Cdist{end+1}=d; LL(end+1)=sum(log(pdf(d,raw(:))+eps)); K(end+1)=1;\n"
" end\n"
" [AIC,~] = aicbic(LL, K, numel(raw)); [~,ix] = min(AIC);\n"
" bestName = Cnames{ix}; bestDist = Cdist{ix};\n"
" xfine = linspace(min(bin_centers), max(bin_centers), 300);\n"
" ypdf = pdf(bestDist, xfine); sf2 = max(counts) / max(ypdf + eps);\n"
" plot(xfine, ypdf*sf2, 'm-', 'LineWidth', 1.2);\n"
" legend('Histogram','Normal Fit','Mean',['Best: ' bestName],'Location','best');\n"
" %% ECDF + QQ (saved next to script; guards keep Octave OK)\n"
" f_ecdf = figure('Name',[field ' ECDF']); ecdf(raw(:)); grid on;\n"
" saveas(f_ecdf, fullfile(scriptDir, sprintf('%%s_ecdf.png', field)));\n"
" if exist('savefig','file'), savefig(f_ecdf, fullfile(scriptDir, sprintf('%%s_ecdf.fig', field))); end\n"
" f_qq = figure('Name',[field ' QQ Normal']); qqplot(raw(:)); grid on;\n"
" saveas(f_qq, fullfile(scriptDir, sprintf('%%s_qq.png', field)));\n"
" if exist('savefig','file'), savefig(f_qq, fullfile(scriptDir, sprintf('%%s_qq.fig', field))); end\n"
" catch ME\n"
" warning('Advanced modeling failed for %%s: %%s', field, ME.message);\n"
" end\n"
"end\n");
}
/**
* emit_adv_modeling_snippet_comprehensive
*
* Writes an optional advanced-modeling MATLAB segment into the comprehensive script
* (multi-field loop). The emitted code mirrors the per-field version: it locates raw
* samples, fits candidate distributions, selects the AIC winner, overlays its PDF,
* and saves ECDF/QQ plots, all guarded for toolbox/file availability.
*
* Works by printing MATLAB code into `comp`. At runtime, for each field `fn`, the
* snippet loads raw samples if present, evaluates candidate fits, computes AIC,
* overlays the best PDF scaled to the histogram, and saves ECDF/QQ figures with
* names derived from `fn`. Errors are caught and reported as MATLAB warnings.
*
* @note The emitted MATLAB expects `analysisDir`, `scriptDir`, `fn`, `bc`, and `cnt`
* to exist in the surrounding script loop.
*
* @param comp Pointer to an open FILE stream for the comprehensive MATLAB script.
* @return void.
*/
static void emit_adv_modeling_snippet_comprehensive(FILE *comp)
{
fprintf(comp,
"%% --- Optional advanced modeling (raw samples + toolbox, no-risk) ---\n"
"enableAdvancedFits = true;\n"
"rawCandidates = {\n"
" fullfile(analysisDir, sprintf('%%s_samples.txt', fn)),\n"
" sprintf('%%s_samples.txt', fn)\n"
"};\n"
"rawLoaded = false;\n"
"for rk=1:numel(rawCandidates)\n"
" if exist(rawCandidates{rk},'file'), try raw = load(rawCandidates{rk}); rawLoaded=true; break; end, end\n"
"end\n"
"if enableAdvancedFits && rawLoaded && exist('fitdist','file') && exist('aicbic','file')\n"
" try\n"
" Cnames = {}; Cdist = {}; LL = []; K = [];\n"
" d = fitdist(raw(:),'Normal'); Cnames{end+1}='Normal'; Cdist{end+1}=d; LL(end+1)=sum(log(pdf(d,raw(:))+eps)); K(end+1)=2;\n"
" if all(raw(:)>0)\n"
" d = fitdist(raw(:),'Lognormal'); Cnames{end+1}='Lognormal'; Cdist{end+1}=d; LL(end+1)=sum(log(pdf(d,raw(:))+eps)); K(end+1)=2;\n"
" d = fitdist(raw(:),'Gamma'); Cnames{end+1}='Gamma'; Cdist{end+1}=d; LL(end+1)=sum(log(pdf(d,raw(:))+eps)); K(end+1)=2;\n"
" d = fitdist(raw(:),'Exponential'); Cnames{end+1}='Exponential'; Cdist{end+1}=d; LL(end+1)=sum(log(pdf(d,raw(:))+eps)); K(end+1)=1;\n"
" end\n"
" [AIC,~] = aicbic(LL, K, numel(raw)); [~,ix] = min(AIC);\n"
" bestName=Cnames{ix}; bestDist=Cdist{ix};\n"
" xfine = linspace(min(bc), max(bc), 300);\n"
" ypdf = pdf(bestDist, xfine); sf2 = max(cnt)/max(ypdf+eps);\n"
" plot(xfine, ypdf*sf2, 'm-', 'LineWidth', 1.2);\n"
" legend('Histogram','Normal Fit','Mean',['Best: ' bestName],'Location','best');\n"
" f_ecdf = figure('Name',[fn ' ECDF']); ecdf(raw(:)); grid on;\n"
" saveas(f_ecdf, fullfile(scriptDir, sprintf('%%s_ecdf.png', fn)));\n"
" if exist('savefig','file'), savefig(f_ecdf, fullfile(scriptDir, sprintf('%%s_ecdf.fig', fn))); end\n"
" f_qq = figure('Name',[fn ' QQ Normal']); qqplot(raw(:)); grid on;\n"
" saveas(f_qq, fullfile(scriptDir, sprintf('%%s_qq.png', fn)));\n"
" if exist('savefig','file'), savefig(f_qq, fullfile(scriptDir, sprintf('%%s_qq.fig', fn))); end\n"
" catch ME\n"
" warning('Advanced modeling failed for %%s: %%s', fn, ME.message);\n"
" end\n"
"end\n");
}
/**
* emit_kde_overlay_individual
*
* Writes an optional KDE overlay section to a per-field MATLAB script. If
* `ksdensity` is available, it estimates a smooth density either from loaded raw
* samples or from pseudo-samples expanded from the histogram bins, scales it to the
* histogram height, and overlays the curve with an updated legend.
*
* Works by printing MATLAB commands that: attempt to load raw samples from multiple
* locations; fall back to expanding bin centers by counts; compute KDE on a fine grid;
* scale and plot the KDE; and guard the entire section with try/catch and
* `exist('ksdensity','file')`.
*
* @note Requires `field`, `analysisDir`, `bin_centers`, and `counts` to be defined
* earlier in the script that includes this snippet.
*
* @param mf Pointer to an open FILE stream for the per-field MATLAB script.
* @return void.
*/
static void emit_kde_overlay_individual(FILE *mf)
{
fprintf(mf,
"%%%% %% Section: KDE Overlay (optional)\n"
"if exist('ksdensity','file')\n"
" try\n"
" %%%% If raw samples exist, prefer them; else approximate from histogram\n"
" haveRaw=false; rawCandidates={fullfile(analysisDir,sprintf('%%s_samples.txt',field)),sprintf('%%s_samples.txt',field)};\n"
" for rk=1:numel(rawCandidates)\n"
" if exist(rawCandidates{rk},'file'), try raw=load(rawCandidates{rk}); haveRaw=true; break; end, end\n"
" end\n"
" if haveRaw\n"
" xk = linspace(min(bin_centers), max(bin_centers), 400);\n"
" [yk,~] = ksdensity(raw(:), xk);\n"
" else\n"
" %%%% expand counts into pseudo-samples (coarse but OK if counts small)\n"
" ps=[]; for ii=1:numel(bin_centers), ps=[ps; repmat(bin_centers(ii), counts(ii), 1)]; end\n"
" if ~isempty(ps)\n"
" xk = linspace(min(bin_centers), max(bin_centers), 400);\n"
" [yk,~] = ksdensity(ps, xk);\n"
" else, xk=[]; yk=[]; end\n"
" end\n"
" if ~isempty(yk)\n"
" sfK = max(counts) / max(yk + eps);\n"
" plot(xk, yk*sfK, 'g-', 'LineWidth', 1.2);\n"
" legend('Histogram','Normal Fit','Mean','KDE','Location','best');\n"
" end\n"
" catch ME\n"
" warning('KDE overlay skipped: %s', ME.message);\n"
" end\n"
"end\n\n"
);
}
/**
* emit_kde_overlay_comprehensive
*
* Writes an optional KDE overlay section to the comprehensive MATLAB script. Behavior
* mirrors the per-field version but operates on variables for the current field in
* the loop. If `ksdensity` is present, it overlays a scaled KDE curve and updates
* the legend; otherwise the code path is skipped safely.
*
* Works by emitting guarded MATLAB code that prefers raw samples and falls back to
* pseudo-samples expanded from `(bc, cnt)`, then plots a scaled KDE on top of the
* histogram.
*
* @note Expects `fn`, `analysisDir`, `bc`, and `cnt` to be defined by the caller.
*
* @param comp Pointer to an open FILE stream for the comprehensive MATLAB script.
* @return void.
*/
static void emit_kde_overlay_comprehensive(FILE *comp)
{
fprintf(comp,
"%%%% %% Section: KDE Overlay (optional)\n"
"if exist('ksdensity','file')\n"
" try\n"
" haveRaw=false; rawCandidates={fullfile(analysisDir,sprintf('%%s_samples.txt',fn)),sprintf('%%s_samples.txt',fn)};\n"
" for rk=1:numel(rawCandidates)\n"
" if exist(rawCandidates{rk},'file'), try raw=load(rawCandidates{rk}); haveRaw=true; break; end, end\n"
" end\n"
" if haveRaw\n"
" xk = linspace(min(bc), max(bc), 400);\n"
" [yk,~] = ksdensity(raw(:), xk);\n"
" else\n"
" ps=[]; for ii=1:numel(bc), ps=[ps; repmat(bc(ii), cnt(ii), 1)]; end\n"
" if ~isempty(ps)\n"
" xk = linspace(min(bc), max(bc), 400);\n"
" [yk,~] = ksdensity(ps, xk);\n"
" else, xk=[]; yk=[]; end\n"
" end\n"
" if ~isempty(yk)\n"
" sfK = max(cnt) / max(yk + eps);\n"
" plot(xk, yk*sfK, 'g-', 'LineWidth', 1.2);\n"
" legend('Histogram','Normal Fit','Mean','KDE','Location','best');\n"
" end\n"
" catch ME\n"
" warning('KDE overlay skipped: %s', ME.message);\n"
" end\n"
"end\n\n"
);
}
/**
* emit_gmm_overlay_individual
*
* Writes an optional Gaussian Mixture Model (1–3 components) overlay section for a
* per-field MATLAB script. When `fitgmdist` is available and enough samples exist, it
* fits mixtures with k ∈ {1,2,3}, selects the model with the lowest BIC, and overlays
* its PDF scaled to the histogram, extending the legend accordingly.
*
* Works by printing MATLAB code that loads or synthesizes samples, iterates k, fits
* mixtures with regularization, tracks BIC, and overlays the best model’s PDF. Errors
* are caught and only a warning is emitted at runtime.
*
* @note Expects `analysisDir`, `field`, `bin_centers`, and `counts` to exist in the
* host MATLAB script.
*
* @param mf Pointer to an open FILE stream for the per-field MATLAB script.
* @return void.
*/
static void emit_gmm_overlay_individual(FILE *mf)
{
fprintf(mf,
"%%%% %% Section: Gaussian Mixture Model (BIC, 1..3 comps)\n"
"if exist('fitgmdist','file')\n"
" try\n"
" haveRaw=false; rawCandidates={fullfile(analysisDir,sprintf('%%s_samples.txt',field)),sprintf('%%s_samples.txt',field)};\n"
" for rk=1:numel(rawCandidates)\n"
" if exist(rawCandidates{rk},'file'), try raw=load(rawCandidates{rk}); haveRaw=true; break; end, end\n"
" end\n"
" if ~haveRaw\n"
" raw=[]; for ii=1:numel(bin_centers), raw=[raw; repmat(bin_centers(ii), counts(ii), 1)]; end\n"
" end\n"
" if numel(raw)>10\n"
" bestGM=[]; bestBIC=Inf;\n"
" for k=1:3\n"
" try\n"
" gm = fitgmdist(raw(:),k,'RegularizationValue',1e-6,'Options',statset('MaxIter',500));\n"
" BIC = gm.BIC;\n"
" if BIC<bestBIC, bestBIC=BIC; bestGM=gm; end\n"
" catch, end\n"
" end\n"
" if ~isempty(bestGM)\n"
" xg = linspace(min(bin_centers), max(bin_centers), 400);\n"
" yg = pdf(bestGM, xg'); yg = yg(:)';\n"
" sfG = max(counts)/max(yg+eps);\n"
" plot(xg, yg*sfG, 'm-', 'LineWidth', 1.2);\n"
" legend('Histogram','Normal Fit','Mean','KDE','BestParam','GMM','Location','best');\n"
" end\n"
" end\n"
" catch ME\n"
" warning('GMM overlay skipped: %s', ME.message);\n"
" end\n"
"end\n\n"
);
}
/**
* emit_gmm_overlay_comprehensive
*
* Writes an optional GMM overlay section into the comprehensive MATLAB script. It
* attempts 1–3 component fits with `fitgmdist`, selects the lowest-BIC model, and
* overlays the scaled PDF for the current field in the loop. All operations are
* guarded for toolbox availability and sample sufficiency.
*
* Works by emitting MATLAB commands that build or load samples from `(bc, cnt)`,
* fit mixtures with regularization, compare BIC, and overlay the best PDF.
*
* @note Expects `fn`, `bc`, and `cnt` to be defined by the surrounding script.
*
* @param comp Pointer to an open FILE stream for the comprehensive MATLAB script.
* @return void.
*/
static void emit_gmm_overlay_comprehensive(FILE *comp)
{
fprintf(comp,
"%%%% %% Section: Gaussian Mixture Model (BIC, 1..3 comps)\n"
"if exist('fitgmdist','file')\n"
" try\n"
" haveRaw=false; rawCandidates={fullfile(analysisDir,sprintf('%%s_samples.txt',fn)),sprintf('%%s_samples.txt',fn)};\n"
" for rk=1:numel(rawCandidates)\n"
" if exist(rawCandidates{rk},'file'), try raw=load(rawCandidates{rk}); haveRaw=true; break; end, end\n"
" end\n"
" if ~haveRaw\n"
" raw=[]; for ii=1:numel(bc), raw=[raw; repmat(bc(ii), cnt(ii), 1)]; end\n"
" end\n"
" if numel(raw)>10\n"
" bestGM=[]; bestBIC=Inf;\n"
" for k=1:3\n"
" try\n"
" gm = fitgmdist(raw(:),k,'RegularizationValue',1e-6,'Options',statset('MaxIter',500));\n"
" BIC = gm.BIC;\n"
" if BIC<bestBIC, bestBIC=BIC; bestGM=gm; end\n"
" catch, end\n"
" end\n"
" if ~isempty(bestGM)\n"
" xg = linspace(min(bc), max(bc), 400);\n"
" yg = pdf(bestGM, xg'); yg = yg(:)';\n"
" sfG = max(cnt)/max(yg+eps);\n"
" plot(xg, yg*sfG, 'm-', 'LineWidth', 1.2);\n"
" legend('Histogram','Normal Fit','Mean','KDE','BestParam','GMM','Location','best');\n"
" end\n"
" end\n"
" catch ME\n"
" warning('GMM overlay skipped: %s', ME.message);\n"
" end\n"
"end\n\n"
);
}
/**
* emit_boxcox_scan_individual
*
* Writes a Box-Cox transformation scan section for a per-field MATLAB script. When
* samples are positive, it sweeps λ ∈ [-2, 2] in steps of 0.1, evaluates a normality
* statistic (Anderson–Darling if available, else KS, else variance proxy), reports the
* best λ to the console, and leaves plots unchanged.
*
* Works by emitting MATLAB code that loads or synthesizes samples, applies the
* Box-Cox transform across λ, scores normality with available tests, tracks the best
* score, and prints the selected λ with the field name.
*
* @note Requires `analysisDir`, `field`, `bin_centers`, and `counts` to be present.
*
* @param mf Pointer to an open FILE stream for the per-field MATLAB script.
* @return void.
*/
static void emit_boxcox_scan_individual(FILE *mf)
{
fprintf(mf,
"%%%% %% Section: Box-Cox scan (report best lambda)\n"
"bestLam=NaN; bestScore=Inf; haveRaw=false;\n"
"rawCandidates={fullfile(analysisDir,sprintf('%%s_samples.txt',field)),sprintf('%%s_samples.txt',field)};\n"
"for rk=1:numel(rawCandidates)\n"
" if exist(rawCandidates{rk},'file'), try raw=load(rawCandidates{rk}); haveRaw=true; break; end, end\n"
"end\n"
"if ~haveRaw\n"
" raw=[]; for ii=1:numel(bin_centers), raw=[raw; repmat(bin_centers(ii), counts(ii), 1)]; end\n"
"end\n"
"raw = raw(:);\n"
"if ~isempty(raw) && all(raw>0)\n"
" lam= -2:0.1:2;\n"
" for L=lam\n"
" y = (L==0) .* log(raw) + (L~=0) .* ((raw.^L - 1)./L);\n"
" try\n"
" if exist('adtest','file'), [~,p,stat]=adtest((y-mean(y))/std(y)); score=stat; else\n"
" if exist('kstest','file'), [~,p]=kstest((y-mean(y))/std(y)); score=1-p; else, score=var(y); end\n"
" end\n"
" if score<bestScore, bestScore=score; bestLam=L; end\n"
" catch, end\n"
" end\n"
" if ~isnan(bestLam)\n"
" disp(sprintf('Box-Cox best lambda for %%s: %%g', field, bestLam));\n"
" end\n"
"end\n\n"
);
}
/**
* emit_boxcox_scan_comprehensive
*
* Writes a Box-Cox transformation scan section into the comprehensive MATLAB script.
* For each field in the loop, when data are strictly positive, it sweeps λ, evaluates
* normality using AD or KS if available, selects the best λ, and prints it to the
* console. No figures are modified by this step.
*
* Works by emitting MATLAB commands that construct/obtain samples from `(bc, cnt)`,
* transform, evaluate, and report the best λ.
*
* @note Expects `fn`, `bc`, and `cnt` to be defined by the caller.
*
* @param comp Pointer to an open FILE stream for the comprehensive MATLAB script.
* @return void.
*/
static void emit_boxcox_scan_comprehensive(FILE *comp)
{
fprintf(comp,
"%%%% %% Section: Box-Cox scan (report best lambda)\n"
"bestLam=NaN; bestScore=Inf; haveRaw=false;\n"
"rawCandidates={fullfile(analysisDir,sprintf('%%s_samples.txt',fn)),sprintf('%%s_samples.txt',fn)};\n"
"for rk=1:numel(rawCandidates)\n"
" if exist(rawCandidates{rk},'file'), try raw=load(rawCandidates{rk}); haveRaw=true; break; end, end\n"
"end\n"
"if ~haveRaw\n"
" raw=[]; for ii=1:numel(bc), raw=[raw; repmat(bc(ii), cnt(ii), 1)]; end\n"
"end\n"
"raw = raw(:);\n"
"if ~isempty(raw) && all(raw>0)\n"
" lam= -2:0.1:2;\n"
" for L=lam\n"
" y = (L==0) .* log(raw) + (L~=0) .* ((raw.^L - 1)./L);\n"
" try\n"
" if exist('adtest','file'), [~,p,stat]=adtest((y-mean(y))/std(y)); score=stat; else\n"
" if exist('kstest','file'), [~,p]=kstest((y-mean(y))/std(y)); score=1-p; else, score=var(y); end\n"
" end\n"
" if score<bestScore, bestScore=score; bestLam=L; end\n"
" catch, end\n"
" end\n"
" if ~isnan(bestLam)\n"
" disp(sprintf('Box-Cox best lambda for %%s: %%g', fn, bestLam));\n"
" end\n"
"end\n\n"
);
}
/**
* emit_gof_bootstrap_robust_individual
*
* Writes a goodness-of-fit / bootstrap / robust-markers section into a per-field
* MATLAB script. It computes normality p-values (KS/AD if available), bootstrap CIs
* for mean/std (when enough samples), and overlays robust indicators (median line and
* IQR band) on the current histogram axes.
*
* Works by emitting MATLAB code that loads/synthesizes raw samples, normalizes them,
* runs available tests, performs bootstrap resampling to estimate CIs, and plots
* robust summaries with patch/line overlays; any errors are caught and warned.
*
* @note Expects `analysisDir`, `field`, `bin_centers`, `counts`, `meanVal`, and
* `stdVal` to be defined in the host script.
*
* @param mf Pointer to an open FILE stream for the per-field MATLAB script.
* @return void.
*/
static void emit_gof_bootstrap_robust_individual(FILE *mf)
{
fprintf(mf,
"%%%% %% Section: GoF + Bootstrap + Robust\n"
"%% Normality tests (if available)\n"
"try\n"
" haveRaw=false; rawCandidates={fullfile(analysisDir,sprintf('%%s_samples.txt',field)),sprintf('%%s_samples.txt',field)};\n"
" for rk=1:numel(rawCandidates)\n"
" if exist(rawCandidates{rk},'file'), try raw=load(rawCandidates{rk}); haveRaw=true; break; end, end\n"
" end\n"
" if ~haveRaw\n"
" raw=[]; for ii=1:numel(bin_centers), raw=[raw; repmat(bin_centers(ii), counts(ii), 1)]; end\n"
" end\n"
" raw=raw(:);\n"
" if ~isempty(raw)\n"
" Z = (raw-meanVal)/stdVal;\n"
" pKS=NaN; pAD=NaN;\n"
" if exist('kstest','file'), [~,pKS]=kstest(Z); end\n"
" if exist('adtest','file'), [~,pAD]=adtest(Z); end\n"
" disp(sprintf('GoF (normality) KS p=%%g AD p=%%g', pKS, pAD));\n"
" %% Bootstrap CIs\n"
" B=1000; if numel(raw)>10\n"
" M=zeros(B,1); S=zeros(B,1);\n"
" for b=1:B\n"
" idx = randi(numel(raw), numel(raw), 1); rb = raw(idx);\n"
" M(b)=mean(rb); S(b)=std(rb);\n"
" end\n"
" ciM=quantile(M,[0.025 0.975]); ciS=quantile(S,[0.025 0.975]);\n"
" disp(sprintf('Bootstrap mean CI [%%g, %%g], std CI [%%g, %%g]', ciM(1),ciM(2),ciS(1),ciS(2)));\n"
" end\n"
" %% Robust markers\n"
" med = median(raw); q = quantile(raw,[0.25 0.75]);\n"
" yl = ylim; patch([q(1) q(2) q(2) q(1)],[0 0 yl(2) yl(2)], [0.9 0.9 1.0], 'FaceAlpha',0.15, 'EdgeColor','none');\n"
" plot([med med],[0 yl(2)],'b-.','LineWidth',1.0);\n"
" end\n"
"catch ME\n"
" warning('GoF/Bootstrap skipped: %s',ME.message);\n"
"end\n\n"
);
}
/**
* emit_gof_bootstrap_robust_comprehensive
*
* Writes a goodness-of-fit / bootstrap / robust-markers section into the
* comprehensive MATLAB script. For the current field, it computes KS/AD p-values when
* available, bootstraps mean/std confidence intervals, and overlays robust markers
* (median and IQR band) on the active histogram axes.
*
* Works by emitting MATLAB commands that assemble samples, run tests, perform
* bootstrap, and draw robust overlays. Failures are handled via try/catch with a
* warning message.
*
* @note Expects `fn`, `bc`, `cnt`, `mu`, and `sig` to be defined by the caller.
*
* @param comp Pointer to an open FILE stream for the comprehensive MATLAB script.
* @return void.
*/
static void emit_gof_bootstrap_robust_comprehensive(FILE *comp)
{
fprintf(comp,
"%%%% %% Section: GoF + Bootstrap + Robust\n"
"try\n"
" haveRaw=false; rawCandidates={fullfile(analysisDir,sprintf('%%s_samples.txt',fn)),sprintf('%%s_samples.txt',fn)};\n"
" for rk=1:numel(rawCandidates)\n"
" if exist(rawCandidates{rk},'file'), try raw=load(rawCandidates{rk}); haveRaw=true; break; end, end\n"
" end\n"
" if ~haveRaw\n"
" raw=[]; for ii=1:numel(bc), raw=[raw; repmat(bc(ii), cnt(ii), 1)]; end\n"
" end\n"
" raw=raw(:);\n"
" if ~isempty(raw)\n"
" Z = (raw-mu)/sig;\n"
" pKS=NaN; pAD=NaN;\n"
" if exist('kstest','file'), [~,pKS]=kstest(Z); end\n"
" if exist('adtest','file'), [~,pAD]=adtest(Z); end\n"
" disp(sprintf('GoF (normality) KS p=%%g AD p=%%g', pKS, pAD));\n"
" B=1000; if numel(raw)>10\n"
" M=zeros(B,1); S=zeros(B,1);\n"
" for b=1:B\n"
" idx = randi(numel(raw), numel(raw), 1); rb = raw(idx);\n"
" M(b)=mean(rb); S(b)=std(rb);\n"
" end\n"
" ciM=quantile(M,[0.025 0.975]); ciS=quantile(S,[0.025 0.975]);\n"
" disp(sprintf('Bootstrap mean CI [%%g, %%g], std CI [%%g, %%g]', ciM(1),ciM(2),ciS(1),ciS(2)));\n"
" end\n"
" med=median(raw); q=quantile(raw,[0.25 0.75]); yl=ylim;\n"
" patch([q(1) q(2) q(2) q(1)],[0 0 yl(2) yl(2)],[0.9 0.9 1.0],'FaceAlpha',0.15,'EdgeColor','none');\n"
" plot([med med],[0 yl(2)],'b-.','LineWidth',1.0);\n"
" end\n"
"catch ME\n"
" warning('GoF/Bootstrap skipped: %s',ME.message);\n"
"end\n\n"
);
}
/* ========================================================================
* Internal helpers (static)
* ===================================================================== */
/**
* resolve_scripts_dir
*
* Computes the directory where MATLAB scripts will be written and ensures it exists
* when a subdirectory is requested. When `scriptsSubdir` is empty or NULL, the output
* directory is simply `analysisDir`. The resulting path is written into `dst`.
*
* Works by formatting either `analysisDir/scriptsSubdir` (and creating it) or the
* bare `analysisDir` into the destination buffer. The caller then uses `dst` for
* subsequent file creations.
*
* @note If `create_directory` fails, later file operations will report errors; this
* function does not abort on directory creation failure.
*
* @param dst Buffer to receive the resolved directory path (NUL-terminated).
* @param dstSize Size of `dst` in bytes.
* @param analysisDir Base directory that already exists from previous pipeline steps.
* @param scriptsSubdir Optional subdirectory name under `analysisDir` for outputs.
* @return void.
*/
static void resolve_scripts_dir(char *dst, size_t dstSize,
const char *analysisDir,
const char *scriptsSubdir)
{
/// Decide target: either analysisDir/<scriptsSubdir> or analysisDir itself
if (scriptsSubdir && scriptsSubdir[0] != '\0') {
snprintf(dst, dstSize, "%s/%s", analysisDir, scriptsSubdir); // Compose path
create_directory(dst, ""); // Ensure it exists
} else {
snprintf(dst, dstSize, "%s", analysisDir); // Use base directly
// analysisDir is assumed to exist already.
}
}
/**
* emit_individual_script_for_field
*
* Generates a self-contained per-field MATLAB script `<field>_plot.m` that robustly
* discovers histogram/statistics files, plots the histogram, overlays a scaled
* Normal fit with a mean marker, prints a concise console summary, and (optionally)
* appends advanced modeling/KDE/GMM/Box-Cox/GoF sections. A PNG of the figure is saved
* next to the script at MATLAB runtime.
*
* Works by opening `<outDir>/<fieldName>_plot.m` and writing MATLAB code that:
* (1) resolves `histogram` and `stats` via multiple candidate paths, (2) builds the
* baseline plot and legend, (3) emits optional modeling snippets, and (4) saves a PNG.
* The function reports file-creation errors and returns without emitting on failure.
*
* @note The emitted MATLAB script is resilient to missing files/toolboxes and uses
* try/catch and `exist(...)` guards accordingly.
*
* @param outDir Directory where the MATLAB script is created.
* @param analysisDir Preferred directory to locate input files at runtime.
* @param fieldName Logical field name used to resolve file names and labels.
* @param indexForFigSuffix Integer suffix to make MATLAB figure handles unique.
* @return void.
*/
static void emit_individual_script_for_field(const char *outDir,
const char *analysisDir,
const char *fieldName,
int indexForFigSuffix)
{
char scriptPath[4096];
snprintf(scriptPath, sizeof(scriptPath), "%s/%s_plot.m", outDir, fieldName);
FILE *mf = fopen(scriptPath, "w");
if (!mf) {
fprintf(stderr, "Error creating MATLAB plot script: %s\n", scriptPath);
return;
}
/* Header + core inputs (keep original intent) */
fprintf(mf, "%%%% Auto-generated: per-field modeling for \"%s\"\n", fieldName);
fprintf(mf, "field = '%s';\n", fieldName);
fprintf(mf, "analysisDir = '%s';\n", analysisDir ? analysisDir : "");
/* Resolve the directory this script lives in (so saves land next to the .m file) */
fprintf(mf, "scriptDir = fileparts(mfilename('fullpath'));\n\n");
/* --- Histogram discovery (compatible superset) ---
Prefer the original file in analysisDir; fall back to scriptDir or CWD. */
fprintf(mf, "histCandidates = { ...\n");
fprintf(mf, " fullfile(analysisDir, sprintf('%%s_histogram.txt', field)), ...\n");
fprintf(mf, " fullfile(scriptDir, sprintf('%%s_histogram.txt', field)), ...\n");
fprintf(mf, " sprintf('%%s_histogram.txt', field) ...\n");
fprintf(mf, "};\n");
fprintf(mf, "histLoaded = false;\n");
fprintf(mf, "for k = 1:numel(histCandidates)\n");
fprintf(mf, " p = histCandidates{k};\n");
fprintf(mf, " if exist(p,'file')\n");
fprintf(mf, " try\n");
fprintf(mf, " hist_data = load(p);\n");
fprintf(mf, " if size(hist_data,2) >= 3\n");
fprintf(mf, " bin_centers = (hist_data(:,1) + hist_data(:,2))/2;\n");
fprintf(mf, " counts = hist_data(:,3);\n");
fprintf(mf, " histLoaded = true; break;\n");
fprintf(mf, " end\n");
fprintf(mf, " catch, end\n");
fprintf(mf, " end\n");
fprintf(mf, "end\n");
fprintf(mf, "if ~histLoaded, error('Histogram not found for %%s', field); end\n\n");
/* Figure + histogram (preserve your figure-handle style) */
fprintf(mf, "f%d = figure('Name', '%s Histogram');\n", indexForFigSuffix, fieldName);
fprintf(mf, "bar(bin_centers, counts, 1.0, 'FaceColor',[0.7 0.7 0.7]); grid on; hold on;\n");
fprintf(mf, "title('%s Histogram'); xlabel('%s Values'); ylabel('Frequency');\n\n", fieldName, fieldName);
/* --- Stats discovery (compatible superset) ---
Try *_stats.txt, *_full_analysis.txt, *_analysis.txt in analysisDir, then scriptDir, then CWD. */
fprintf(mf, "statNames = { '_stats.txt', '_full_analysis.txt', '_analysis.txt' };\n");
fprintf(mf, "statRoots = { analysisDir, scriptDir, '' };\n");
fprintf(mf, "statsTxt = '';\n");
fprintf(mf, "for r = 1:numel(statRoots)\n");
fprintf(mf, " for n = 1:numel(statNames)\n");
fprintf(mf, " if isempty(statRoots{r}), p = sprintf('%%s%%s', field, statNames{n});\n");
fprintf(mf, " else, p = fullfile(statRoots{r}, sprintf('%%s%%s', field, statNames{n})); end\n");
fprintf(mf, " if exist(p,'file'), statsTxt = fileread(p); break; end\n");
fprintf(mf, " end\n");
fprintf(mf, " if ~isempty(statsTxt), break; end\n");
fprintf(mf, "end\n");
/* Parse mean/std (your originals), plus optional skew & AD (safe extras) */
fprintf(mf, "meanVal = 0; stdVal = 1; skewVal = NaN; adVal = NaN;\n");
fprintf(mf, "if ~isempty(statsTxt)\n");
fprintf(mf, " tokens_mean = regexp(statsTxt, 'Mean:\\s*([-+0-9eE\\.]+)', 'tokens');\n");
fprintf(mf, " if ~isempty(tokens_mean), meanVal = str2double(tokens_mean{1}{1}); end\n");
fprintf(mf, " tstd = regexp(statsTxt, '(?:Std Dev|Standard Deviation):\\s*([-+0-9eE\\.]+)', 'tokens');\n");
fprintf(mf, " if ~isempty(tstd), stdVal = str2double(tstd{1}{1}); end\n");
fprintf(mf, " tsk = regexp(statsTxt, 'Skewness:\\s*([-+0-9eE\\.]+)', 'tokens');\n");
fprintf(mf, " if ~isempty(tsk), skewVal = str2double(tsk{1}{1}); end\n");
fprintf(mf, " tad = regexp(statsTxt, 'Anderson-Darling A\\^2:\\s*([-+0-9eE\\.]+)', 'tokens');\n");
fprintf(mf, " if ~isempty(tad), adVal = str2double(tad{1}{1}); end\n");
fprintf(mf, "end\n\n");
/* Normal overlay (keep your scaling using eps) + mean marker + legend */
fprintf(mf, "x = linspace(min(bin_centers), max(bin_centers), 200);\n");
fprintf(mf, "y = (1./(stdVal.*sqrt(2*pi))) .* exp(-0.5*((x-meanVal)./stdVal).^2);\n");
fprintf(mf, "sf = max(counts) / max(y + eps);\n");
fprintf(mf, "plot(x, y*sf, 'r-', 'LineWidth', 1.5);\n");
fprintf(mf, "plot([meanVal meanVal], [0 max(counts)], 'k--', 'LineWidth', 1.2);\n");
fprintf(mf, "legend('Histogram','Normal Fit','Mean');\n\n");
/* Console summary (safe optional extras) */
fprintf(mf, "disp('======================');\n");
fprintf(mf, "disp(['Field: ', field]);\n");
fprintf(mf, "disp(['Mean = ', num2str(meanVal)]);\n");
fprintf(mf, "disp(['Std = ', num2str(stdVal)]);\n");
fprintf(mf, "if ~isnan(skewVal), disp(['Skew = ', num2str(skewVal)]); end\n");
fprintf(mf, "if ~isnan(adVal), disp(['Anderson-Darling = ', num2str(adVal)]); end\n");
fprintf(mf, "disp('======================');\n\n");
emit_adv_modeling_snippet_individual(mf);
emit_kde_overlay_individual(mf);
emit_gmm_overlay_individual(mf);
emit_boxcox_scan_individual(mf);
emit_gof_bootstrap_robust_individual(mf);
/* Save PNG next to the .m script (non-intrusive improvement) */
fprintf(mf, "saveas(gcf, fullfile(scriptDir, sprintf('%%s_plot.png', field)));\n");
fclose(mf);
}
/**
* emit_comprehensive_script
*
* Generates a single MATLAB script `comprehensive_plots.m` that iterates over all
* fields, robustly discovers each field’s histogram and stats, produces the baseline
* histogram + scaled Normal overlay + mean marker, prints a summary to the console,
* appends optional modeling/KDE/GMM/Box-Cox/GoF sections, and saves a PNG per field.
*
* Works by creating `<outDir>/comprehensive_plots.m` and emitting MATLAB code that
* sets paths, defines the `fields` cell array from `fieldNames`, loops through each
* field with guarded discovery, plotting, optional overlays, and per-field saves.
* File-creation errors are reported and abort emission.
*
* @param outDir Directory where the comprehensive MATLAB script is written.
* @param analysisDir Preferred location for input discovery at runtime.
* @param fieldNames Array of field name strings to enumerate in the script.
* @param numFields Number of entries in `fieldNames`.
* @return void.
*/
static void emit_comprehensive_script(const char *outDir,
const char *analysisDir,
char **fieldNames,
int numFields)
{
char compPath[4096];
snprintf(compPath, sizeof(compPath), "%s/comprehensive_plots.m", outDir);
FILE *comp = fopen(compPath, "w");
if (!comp) {
fprintf(stderr, "Error creating comprehensive_plots.m\n");
return;
}
fprintf(comp, "%%%% Auto-generated: comprehensive modeling over all fields\n");
fprintf(comp, "close all; clear; clc;\n\n");
/* Script-local paths */
fprintf(comp, "analysisDir = '%s';\n", analysisDir ? analysisDir : "");
fprintf(comp, "scriptDir = fileparts(mfilename('fullpath'));\n\n");
/* Emit fields cell array */
fprintf(comp, "fields = {");
for (int i = 0; i < numFields; i++)
{
if (fieldNames[i] && fieldNames[i][0] != '\\0') {
fprintf(comp, "'%s'%s", fieldNames[i], (i < numFields - 1) ? ", " : "");
}
}
fprintf(comp, "};\n\n");
/* Unified discovery & plotting per field (non-fatal on missing data) */
fprintf(comp, "for i = 1:numel(fields)\n");
fprintf(comp, " fn = fields{i};\n");
fprintf(comp, " try\n");
fprintf(comp, " %% --- Histogram discovery (analysisDir → scriptDir → CWD) ---\n");
fprintf(comp, " histCandidates = {\n");
fprintf(comp, " fullfile(analysisDir, sprintf('%%s_histogram.txt', fn)),\n");
fprintf(comp, " fullfile(scriptDir, sprintf('%%s_histogram.txt', fn)),\n");
fprintf(comp, " sprintf('%%s_histogram.txt', fn)\n");
fprintf(comp, " };\n");
fprintf(comp, " histLoaded = false;\n");
fprintf(comp, " for k = 1:numel(histCandidates)\n");
fprintf(comp, " p = histCandidates{k};\n");
fprintf(comp, " if exist(p,'file')\n");
fprintf(comp, " try\n");
fprintf(comp, " H = load(p);\n");
fprintf(comp, " if size(H,2) >= 3\n");
fprintf(comp, " bc = (H(:,1)+H(:,2))/2;\n");
fprintf(comp, " cnt = H(:,3);\n");
fprintf(comp, " histLoaded = true; break;\n");
fprintf(comp, " end\n");
fprintf(comp, " catch, end\n");
fprintf(comp, " end\n");
fprintf(comp, " end\n");
fprintf(comp, " if ~histLoaded\n");
fprintf(comp, " warning('Histogram not found for %%s. Skipping.', fn);\n");
fprintf(comp, " continue;\n");
fprintf(comp, " end\n\n");
fprintf(comp, " %% --- Stats discovery: *_stats|_full_analysis|_analysis in analysisDir/scriptDir/CWD ---\n");
fprintf(comp, " statNames = { '_stats.txt', '_full_analysis.txt', '_analysis.txt' };\n");
fprintf(comp, " statRoots = { analysisDir, scriptDir, '' };\n");
fprintf(comp, " statsTxt = '';\n");
fprintf(comp, " for r = 1:numel(statRoots)\n");
fprintf(comp, " for n = 1:numel(statNames)\n");
fprintf(comp, " if isempty(statRoots{r})\n");
fprintf(comp, " p = sprintf('%%s%%s', fn, statNames{n});\n");
fprintf(comp, " else\n");
fprintf(comp, " p = fullfile(statRoots{r}, sprintf('%%s%%s', fn, statNames{n}));\n");
fprintf(comp, " end\n");
fprintf(comp, " if exist(p,'file'), statsTxt = fileread(p); break; end\n");
fprintf(comp, " end\n");
fprintf(comp, " if ~isempty(statsTxt), break; end\n");
fprintf(comp, " end\n");
fprintf(comp, " mu = 0; sig = 1; skewVal = NaN; adVal = NaN;\n");
fprintf(comp, " if ~isempty(statsTxt)\n");
fprintf(comp, " tm = regexp(statsTxt, 'Mean:\\s*([-+0-9eE\\.]+)', 'tokens');\n");
fprintf(comp, " if ~isempty(tm), mu = str2double(tm{1}{1}); end\n");
fprintf(comp, " ts = regexp(statsTxt, '(?:Std Dev|Standard Deviation):\\s*([-+0-9eE\\.]+)', 'tokens');\n");
fprintf(comp, " if ~isempty(ts), sig = str2double(ts{1}{1}); end\n");
fprintf(comp, " tsk = regexp(statsTxt, 'Skewness:\\s*([-+0-9eE\\.]+)', 'tokens');\n");
fprintf(comp, " if ~isempty(tsk), skewVal = str2double(tsk{1}{1}); end\n");
fprintf(comp, " tad = regexp(statsTxt, 'Anderson-Darling A\\^2:\\s*([-+0-9eE\\.]+)', 'tokens');\n");
fprintf(comp, " if ~isempty(tad), adVal = str2double(tad{1}{1}); end\n");
fprintf(comp, " end\n");
fprintf(comp, " if ~(isfinite(sig) && sig > 0), sig = 1; end\n\n");
fprintf(comp, " %% --- Plot: histogram + scaled normal + mean marker ---\n");
fprintf(comp, " f = figure('Name',[fn ' Histogram']);\n");
fprintf(comp, " bar(bc, cnt, 1.0, 'FaceColor',[0.7 0.7 0.7]); grid on; hold on;\n");
fprintf(comp, " title([fn ' Histogram']); xlabel([fn ' Values']); ylabel('Frequency');\n");
fprintf(comp, " x = linspace(min(bc), max(bc), 200);\n");
fprintf(comp, " y = (1./(sig.*sqrt(2*pi))) .* exp(-0.5*((x-mu)./sig).^2);\n");
fprintf(comp, " sf = max(cnt) / max(y + eps);\n");
fprintf(comp, " plot(x, y*sf, 'r-', 'LineWidth', 1.5);\n");
fprintf(comp, " plot([mu mu], [0 max(cnt)], 'k--', 'LineWidth', 1.2);\n");
fprintf(comp, " legend('Histogram','Normal Fit','Mean','Location','best');\n\n");
fprintf(comp, " %% --- Console summary ---\n");
fprintf(comp, " disp('======================');\n");
fprintf(comp, " disp(['Field: ', fn]);\n");
fprintf(comp, " disp(['Mean = ', num2str(mu)]);\n");
fprintf(comp, " disp(['Std = ', num2str(sig)]);\n");
fprintf(comp, " if ~isnan(skewVal), disp(['Skew = ', num2str(skewVal)]); end\n");
fprintf(comp, " if ~isnan(adVal), disp(['Anderson-Darling = ', num2str(adVal)]); end\n");
fprintf(comp, " disp('======================');\n\n");
emit_adv_modeling_snippet_comprehensive(comp);
emit_kde_overlay_comprehensive(comp);
emit_gmm_overlay_comprehensive(comp);
emit_boxcox_scan_comprehensive(comp);
emit_gof_bootstrap_robust_comprehensive(comp);
fprintf(comp, " %% --- Save PNG beside this script ---\n");
fprintf(comp, " saveas(f, fullfile(scriptDir, sprintf('%%s_comprehensive.png', fn)));\n");
fprintf(comp, " catch ME\n");
fprintf(comp, " warning('Error processing %%s: %%s', fn, ME.message);\n");
fprintf(comp, " end\n");
fprintf(comp, "end\n");
fclose(comp);
}
/**
* generate_matlab_scripts_unified
*
* Public entry that emits MATLAB plotting/modeling scripts in one pass. Depending on
* `flavor`, it generates per-field scripts `<field>_plot.m`, the aggregate
* `comprehensive_plots.m`, or both, placing them under `analysisDir` or an optional
* `scriptsSubdir`.
*
* Works by validating inputs, resolving the output directory, iterating fields to
* emit individual scripts when requested, and emitting the comprehensive script when
* requested. Failures to create specific files are reported per case without aborting
* the entire generation.
*
* @param analysisDir Base directory used for input discovery within the MATLAB code.
* @param fieldNames Array of field name strings to target.
* @param numFields Number of field names.
* @param flavor Bitmask selecting INDIVIDUAL and/or COMPREHENSIVE outputs.
* @param scriptsSubdir Optional subfolder under `analysisDir` for script outputs.
* @return void.
*/
void generate_matlab_scripts_unified(const char *analysisDir,
char **fieldNames,
int numFields,
MatlabScriptFlavor flavor,
const char *scriptsSubdir)
{
if (!analysisDir || !fieldNames || numFields <= 0) {
fprintf(stderr, "generate_matlab_scripts_unified: invalid arguments.\n");
return;
}
char outDir[4096];
resolve_scripts_dir(outDir, sizeof(outDir), analysisDir, scriptsSubdir);
if (flavor & MATLAB_SCRIPTS_INDIVIDUAL) {
for (int i = 0; i < numFields; ++i) {
if (fieldNames[i] && fieldNames[i][0] != '\0') {
emit_individual_script_for_field(outDir, analysisDir, fieldNames[i], i);
}
}
}
if (flavor & MATLAB_SCRIPTS_COMPREHENSIVE) {
emit_comprehensive_script(outDir, analysisDir, fieldNames, numFields);
}
}