From a00a89935957bc1fb8e02aa3e36733fedbee48bb Mon Sep 17 00:00:00 2001 From: Kaszanas <34846245+Kaszanas@users.noreply.github.com> Date: Sat, 14 Feb 2026 14:20:24 +0100 Subject: [PATCH 1/5] Added flag for single json output --- utils/flag_utils.go | 22 ++++++++++++++++------ 1 file changed, 16 insertions(+), 6 deletions(-) diff --git a/utils/flag_utils.go b/utils/flag_utils.go index a2fd59e..e88dad6 100644 --- a/utils/flag_utils.go +++ b/utils/flag_utils.go @@ -25,6 +25,7 @@ type CLIFlags struct { DependencyDirectory string NumberOfThreads int NumberOfPackages int + SingleJsonOutput bool PerformIntegrityCheck bool PerformValidityCheck bool PerformCleanup bool @@ -77,6 +78,14 @@ func ParseFlags() (CLIFlags, bool) { zip packaging and output .json directly to drive.`, ) + singleJsonOutput := flag.Bool( + "single_json_output", + false, + `Flag specifying if the output should be a single JSON file instead of multiple zip packages. + If set to true, the output will be a single JSON file containing an array of all processed replays. + In such case the "number_of_packages" flag will be ignored and all processed replays will be saved in a single JSON file.`, + ) + // Boolean Flags: help := flag.Bool( "help", @@ -138,12 +147,12 @@ func ParseFlags() (CLIFlags, bool) { // Misc flags: logLevelFlag := flag.Int( "log_level", - 4, - `Specifies a log level from 1-7: - Panic - 1, Fatal - 2, - Error - 3, Warn - 4, - Info - 5, Debug - 6, - Trace - 7`, + 3, + `Specifies a log level from 0-6: + Panic - 0, Fatal - 1, + Error - 2, Warn - 3, + Info - 4, Debug - 5, + Trace - 6`, ) logDirectoryFlag := flag.String( "log_dir", @@ -197,6 +206,7 @@ func ParseFlags() (CLIFlags, bool) { SkipDependencyDownload: *skipDependencyDownload, DependencyDirectory: absolutePathDependencyDirectory, NumberOfPackages: *numberOfPackagesFlag, + SingleJsonOutput: *singleJsonOutput, PerformIntegrityCheck: *performIntegrityCheckFlag, PerformValidityCheck: *performValidityCheckFlag, PerformCleanup: *performCleanupFlag, From fc6e6e643659847e1b0a31bf3a60c08e5f60671f Mon Sep 17 00:00:00 2001 From: Kaszanas <34846245+Kaszanas@users.noreply.github.com> Date: Sat, 14 Feb 2026 14:20:51 +0100 Subject: [PATCH 2/5] Compacted stringifyReplay output --- dataproc/stringify_replay.go | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/dataproc/stringify_replay.go b/dataproc/stringify_replay.go index 71380d2..21201d3 100644 --- a/dataproc/stringify_replay.go +++ b/dataproc/stringify_replay.go @@ -1,6 +1,7 @@ package dataproc import ( + "bytes" "encoding/json" "github.com/Kaszanas/SC2InfoExtractorGo/datastruct/replay_data" @@ -12,12 +13,22 @@ func stringifyReplay(replayData *replay_data.CleanedReplay) (bool, string) { log.Debug("Entered stringifyReplay()") - replayDataString, marshalErr := json.MarshalIndent(replayData, "", " ") + + replayDataStringBytes, marshalErr := json.Marshal(replayData) if marshalErr != nil { log.Error("Error while marshaling the string representation of cleanReplayData.") return false, "" } + compactedOutput := new(bytes.Buffer) + compactErr := json.Compact(compactedOutput, replayDataStringBytes) + if compactErr != nil { + log.Error("Error while compacting the string representation of cleanReplayData.") + return false, "" + } + + compactedString := compactedOutput.String() + log.Debug("Finished stringifyReplay()") - return true, string(replayDataString) + return true, compactedString } From 56c8848a38b523ce9d39cc50b823b60ca40a2a0d Mon Sep 17 00:00:00 2001 From: Kaszanas <34846245+Kaszanas@users.noreply.github.com> Date: Sat, 14 Feb 2026 14:21:55 +0100 Subject: [PATCH 3/5] Added writeResultsToSingleJSON, added goroutine for single json --- dataproc/dataproc_pipeline.go | 68 +++++++++++++++++++++++++++++++++-- 1 file changed, 65 insertions(+), 3 deletions(-) diff --git a/dataproc/dataproc_pipeline.go b/dataproc/dataproc_pipeline.go index 721b967..681ccab 100644 --- a/dataproc/dataproc_pipeline.go +++ b/dataproc/dataproc_pipeline.go @@ -26,6 +26,37 @@ type ReplayProcessingChannelContents struct { ChunkOfFiles []string } + +// writeResultsToSingleJSON handles stream writing to a single file +func writeResultsToSingleJSON(outputDir string, input <-chan string) { + outputPath := filepath.Join(outputDir, "all_replays.json") + f, err := os.Create(outputPath) + if err != nil { + log.Error("Failed to create output JSON file:", err) + // Drain channel to prevent blocking workers if file fails + for range input {} + return + } + defer f.Close() + + // Start the JSON array + f.WriteString("[\n") + + first := true + for jsonString := range input { + if !first { + f.WriteString(",\n") + } + f.WriteString(jsonString) + first = false + } + + // End the JSON array + f.WriteString("\n]") + log.Info("Successfully wrote combined JSON to ", outputPath) +} + + // PipelineWrapper is an orchestrator that distributes work // among available workers (threads) func PipelineWrapper( @@ -51,6 +82,21 @@ func PipelineWrapper( ) defer progressBar.Close() + + // 1. Create the results channel + singleJsonResultChan := make(chan string, cliFlags.NumberOfThreads*4) + + // 2. Start the single writer goroutine + var writerWg sync.WaitGroup + // Creating a single writer gorouting that will create a single JSON file + // with all of the replays as a JSON array. + writerWg.Add(1) + go func() { + defer writerWg.Done() + writeResultsToSingleJSON(cliFlags.OutputDirectory, singleJsonResultChan) + }() + + // If it is specified by the user to perform the processing without // multiprocessing GOMAXPROCS needs to be set to 1 in order to allow 1 thread: runtime.GOMAXPROCS(cliFlags.NumberOfThreads) @@ -59,13 +105,14 @@ func PipelineWrapper( // Adding a task for each of the supplied chunks to speed up the processing: wg.Add(cliFlags.NumberOfThreads) + // Spin up workers waiting for chunks to process: for i := 0; i < cliFlags.NumberOfThreads; i++ { go func() { + defer wg.Done() for { channelContents, ok := <-channel if !ok { - wg.Done() return } MultiprocessingChunkPipeline( @@ -76,6 +123,7 @@ func PipelineWrapper( foreignToEnglishMapping, progressBar, cliFlags, + singleJsonResultChan, ) } }() @@ -91,6 +139,8 @@ func PipelineWrapper( close(channel) wg.Wait() + close(singleJsonResultChan) + writerWg.Wait() progressBar.Close() log.Debug("Finished PipelineWrapper()") @@ -107,6 +157,7 @@ func MultiprocessingChunkPipeline( englishToForeignMapping map[string]string, progressBar *progressbar.ProgressBar, cliFlags utils.CLIFlags, + singleJsonResultChan chan<- string, ) { // Letting the orchestrator know that this processing task was finished: @@ -204,8 +255,17 @@ func MultiprocessingChunkPipeline( return } + if cliFlags.SingleJsonOutput { + singleJsonResultChan <- replayString + processedCounter++ + processingInfoStruct.AddToProcessed(replayFile) + log.Info("Sent file to writer for single JSON output.") + return + } + + // Saving output to zip archive: - if packageToZipBool { + if packageToZipBool{ // Append it to a list and when a package is created create a package summary and clear the list for next iterations persistent_data.AddReplaySummToPackageSumm( &replaySummary, @@ -217,7 +277,8 @@ func MultiprocessingChunkPipeline( replayString, replayFile, compressionMethod, - writer) + writer, + ) if !savedSuccess { compressionErrorCounter++ log.WithFields(log.Fields{ @@ -233,6 +294,7 @@ func MultiprocessingChunkPipeline( return } + okSaveToDrive := file_utils.SaveReplayJSONFileToDrive( replayString, replayFile, From 21a59e86ebe78f071a79c6739916bfcd4565eeea Mon Sep 17 00:00:00 2001 From: Kaszanas <34846245+Kaszanas@users.noreply.github.com> Date: Sat, 14 Feb 2026 14:22:25 +0100 Subject: [PATCH 4/5] Added logging for new single json flag --- main.go | 1 + 1 file changed, 1 insertion(+) diff --git a/main.go b/main.go index b505edb..76c21e4 100644 --- a/main.go +++ b/main.go @@ -48,6 +48,7 @@ func mainReturnWithCode() int { "CLIflags.SkipDependencyDownload": CLIflags.SkipDependencyDownload, "CLIflags.DependencyDirectory": CLIflags.DependencyDirectory, "CLIflags.NumberOfPackages": CLIflags.NumberOfPackages, + "CLIflags.SingleJsonOutput": CLIflags.SingleJsonOutput, "CLIflags.PerformIntegrityCheck": CLIflags.PerformIntegrityCheck, "CLIflags.PerformValidityCheck": CLIflags.PerformValidityCheck, "CLIflags.PerformCleanup": CLIflags.PerformCleanup, From 3372244cda2835ac084c84bfc18d3910e036cd2f Mon Sep 17 00:00:00 2001 From: Kaszanas <34846245+Kaszanas@users.noreply.github.com> Date: Sat, 14 Feb 2026 14:22:50 +0100 Subject: [PATCH 5/5] Logging the log leve --- utils/logging_utils.go | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/utils/logging_utils.go b/utils/logging_utils.go index b8b0865..97c5c14 100644 --- a/utils/logging_utils.go +++ b/utils/logging_utils.go @@ -37,6 +37,10 @@ func SetLogging(logPath string, logLevel int) (*os.File, bool) { log.SetOutput(logFile) log.Info("Set logging format, defined log file.") + logLevelString := log.Level(logLevel).String() + log.Info("Log level set to: " + logLevelString) + + log.SetLevel(log.Level(logLevel)) log.Info("Set logging level.")