diff --git a/Apps.OpenAI/Apps.OpenAI.csproj b/Apps.OpenAI/Apps.OpenAI.csproj index 7762921..9b0a4c9 100644 --- a/Apps.OpenAI/Apps.OpenAI.csproj +++ b/Apps.OpenAI/Apps.OpenAI.csproj @@ -4,7 +4,7 @@ net8.0 OpenAI Creating safe artificial general intelligence that benefits all of humanity - 2.8.8 + 2.8.9 Apps.OpenAI diff --git a/Apps.OpenAI/Services/ContentGlossaryService.cs b/Apps.OpenAI/Services/ContentGlossaryService.cs index 9f2a363..33a5d82 100644 --- a/Apps.OpenAI/Services/ContentGlossaryService.cs +++ b/Apps.OpenAI/Services/ContentGlossaryService.cs @@ -1,4 +1,6 @@ +using System; using System.Collections.Generic; +using System.IO; using System.Linq; using System.Text; using System.Text.RegularExpressions; @@ -33,7 +35,10 @@ private class GlossaryJson } var glossaryStream = await fileManagementClient.DownloadAsync(glossary); - var blackbirdGlossary = await glossaryStream.ConvertFromTbx(); + + using var sanitizedGlossaryStream = await glossaryStream.SanitizeTbxXmlAsync(); + + var blackbirdGlossary = await sanitizedGlossaryStream.ConvertFromTbx(); var jsonGlossary = new GlossaryJson(); var entriesIncluded = false; @@ -111,3 +116,34 @@ private bool IsEntryRelevantToSources(IEnumerable terms, IEnumerable SanitizeTbxXmlAsync(this Stream original) + { + if (original == null) + throw new ArgumentNullException(nameof(original)); + + if (original.CanSeek) + original.Position = 0; + + using var reader = new StreamReader( + original, + Encoding.UTF8, + detectEncodingFromByteOrderMarks: true); + + var text = await reader.ReadToEndAsync(); + + text = text.TrimStart( + '\uFEFF', + '\u200B', + '\u0000', + '\u00A0', + ' ', '\t', '\r', '\n'); + + var bytes = Encoding.UTF8.GetBytes(text); + var ms = new MemoryStream(bytes); + ms.Position = 0; + return ms; + } +} diff --git a/Apps.OpenAI/Services/PostEditService.cs b/Apps.OpenAI/Services/PostEditService.cs index 0e83353..dbe9e9e 100644 --- a/Apps.OpenAI/Services/PostEditService.cs +++ b/Apps.OpenAI/Services/PostEditService.cs @@ -161,7 +161,7 @@ private async Task ProcessAllBatchesAsync( errors.AddRange(batchResult.ErrorMessages); usages.Add(batchResult.Usage); - if (!batchResult.IsSuccess && !neverFail) + if (!neverFail && (!batchResult.IsSuccess || batchResult.ErrorMessages.Any())) { throw new PluginApplicationException( $"Failed to process batch {batchCounter} (size: {batchSize}). Errors: {string.Join(", ", batchResult.ErrorMessages)}"); diff --git a/Tests.OpenAI/Input/glossary.tbx b/Tests.OpenAI/Input/glossary.tbx deleted file mode 100644 index 358efe5..0000000 --- a/Tests.OpenAI/Input/glossary.tbx +++ /dev/null @@ -1,77 +0,0 @@ - - - - - - Glossary from XLIFF (English-French) - - - TBX file generated from selected phrases in the provided XLIFF - - - - - - - - - Hooray, you're here! - - - - - Hourra, vous êtes là ! - - - - - - - text segment - - - - - segment de texte - - - - - - - Speedy Gonzales - - - - - Speedy Gonzales - - - - - - - LingoChecks - - - - - vérifications linguistiques - - - - - - - Made with ❤ - - - - - Fait avec ❤ - - - - - - diff --git a/Tests.OpenAI/TranslationActionsTests.cs b/Tests.OpenAI/TranslationActionsTests.cs index bca3d13..1acce73 100644 --- a/Tests.OpenAI/TranslationActionsTests.cs +++ b/Tests.OpenAI/TranslationActionsTests.cs @@ -14,26 +14,24 @@ namespace Tests.OpenAI; [TestClass] public class TranslationActionsTests : TestBaseWithContext { - [TestMethod, ContextDataSource] + [TestMethod, ContextDataSource(ConnectionTypes.OpenAi)] public async Task Translate_html(InvocationContext context) { var actions = new TranslationActions(context, FileManagementClient); - var modelIdentifier = new TextChatModelIdentifier { ModelId = "gpt-5" }; + var modelIdentifier = new TextChatModelIdentifier { ModelId = "gpt-5-mini" }; var translateRequest = new TranslateContentRequest { - File = new FileReference { Name = "contentful.html" }, - TargetLanguage = "nl" - }; - var reasoningEffortRequest = new ReasoningEffortRequest - { - ReasoningEffort = "low" + File = new FileReference { Name = "" }, + TargetLanguage = "zh-Hans-CN", + OutputFileHandling = "original" }; - string? systemMessage = null; - var glossaryRequest = new GlossaryRequest(); + var reasoningEffortRequest = new ReasoningEffortRequest(); + string systemMessage = ""; + var glossaryRequest = new GlossaryRequest { Glossary = new FileReference { Name = "Glossary.tbx" } }; var result = await actions.TranslateContent(modelIdentifier, translateRequest, systemMessage, glossaryRequest, reasoningEffortRequest); Assert.IsNotNull(result); - Assert.Contains("contentful", result.File.Name); + //Assert.Contains("contentful", result.File.Name); Console.WriteLine(JsonConvert.SerializeObject(result, Formatting.Indented)); }
TBX file generated from selected phrases in the provided XLIFF