diff --git a/Source/AssetRipper.IO.Files.Tests/FileUtilsTests.cs b/Source/AssetRipper.IO.Files.Tests/FileUtilsTests.cs new file mode 100644 index 000000000..ae712146c --- /dev/null +++ b/Source/AssetRipper.IO.Files.Tests/FileUtilsTests.cs @@ -0,0 +1,45 @@ +using AssetRipper.IO.Files.Utils; + +namespace AssetRipper.IO.Files.Tests; + +public static class FileUtilsTests +{ + [Test] + public static void FilenameTruncationMultibyteCharacter() + { + Assert.Multiple(() => + { + // A length 3 cont cont cont + // 01000001 11100110 10010110 10000111 00001010 + Assert.That(FileUtils.GetUniqueName("/dir", "A文.ext", 4), Is.EqualTo(".ext")); + Assert.That(FileUtils.GetUniqueName("/dir", "A文.ext", 5), Is.EqualTo("A.ext")); + Assert.That(FileUtils.GetUniqueName("/dir", "A文.ext", 6), Is.EqualTo("A.ext")); + Assert.That(FileUtils.GetUniqueName("/dir", "A文.ext", 7), Is.EqualTo("A.ext")); + Assert.That(FileUtils.GetUniqueName("/dir", "A文.ext", 8), Is.EqualTo("A文.ext")); + Assert.That(FileUtils.GetUniqueName("/dir", "A文.ext", 9), Is.EqualTo("A文.ext")); + }); + } + + [Test] + public static void ExtensionLength() + { + Assert.Multiple(() => + { + Assert.That(FileUtils.GetUniqueName("/dir", "A文.ext", 7), Is.EqualTo("A.ext")); + Assert.That(FileUtils.GetUniqueName("/dir", "A文.ext", 8), Is.EqualTo("A文.ext")); + + Assert.That(FileUtils.GetUniqueName("/dir", "A文.exte", 8), Is.EqualTo("A.exte")); + Assert.That(FileUtils.GetUniqueName("/dir", "A文.exte", 9), Is.EqualTo("A文.exte")); + }); + } + + [Test] + public static void WithoutExtension() + { + Assert.Multiple(() => + { + Assert.That(FileUtils.GetUniqueName("/dir", "A文", 3), Is.EqualTo("A")); + Assert.That(FileUtils.GetUniqueName("/dir", "A文", 4), Is.EqualTo("A文")); + }); + } +} diff --git a/Source/AssetRipper.IO.Files/Utils/FileUtils.cs b/Source/AssetRipper.IO.Files/Utils/FileUtils.cs index f273fa9f5..ba5452e37 100644 --- a/Source/AssetRipper.IO.Files/Utils/FileUtils.cs +++ b/Source/AssetRipper.IO.Files/Utils/FileUtils.cs @@ -1,4 +1,4 @@ -using System.Runtime.InteropServices; +using System.Text; using System.Text.RegularExpressions; namespace AssetRipper.IO.Files.Utils @@ -37,12 +37,11 @@ namespace AssetRipper.IO.Files.Utils { string? ext = null; string? name = null; - int maxLength = maxNameLength - 4; string validFileName = fileName; - if (validFileName.Length > maxLength) + if (Encoding.UTF8.GetByteCount(fileName) > maxNameLength) { ext = Path.GetExtension(validFileName); - name = validFileName.Substring(0, maxLength - ext.Length); + name = TruncateToUTF8ByteLength(fileName, maxNameLength - Encoding.UTF8.GetByteCount(ext)); validFileName = name + ext; } @@ -120,5 +119,59 @@ namespace AssetRipper.IO.Files.Utils "lpt1", "lpt2", "lpt3", "lpt4", "lpt5", "lpt6", "lpt7", "lpt8", "lpt9", ]; private static readonly Regex FileNameRegex = GenerateFileNameRegex(); + + private static string TruncateToUTF8ByteLength(string str, int maxLength) + { + byte[] bytes = Encoding.UTF8.GetBytes(str); + int validLength = FindValidByteLength(bytes, maxLength); + return Encoding.UTF8.GetString(bytes[..validLength]); + } + + private static int FindValidByteLength(byte[] bytes, int maxLength) + { + int validLength = maxLength; + + // ascii char: 0_ + // two-byte char: 110_ 10_ + // three-byte char: 1110_ 10_ _10_ + // four-byte char : 11110_ 10_ _10_ _10 + + if (maxLength >= bytes.Length) + { + return bytes.Length; + } + + // next byte is a beginning, so we can safely truncate to maxLength + byte nextByte = bytes[maxLength]; + if ((nextByte & 0b11_000000) != 0b10_000000) + { + return maxLength; + } + + // move to end of the last full sequence + for (int i = maxLength - 1; i >= 0; i--) + { + byte currentByte = bytes[i]; + + if ((currentByte & 0b11_000000) == 0b10_000000) + { + // continuation byte + validLength--; + } + else if ((currentByte & 0b10000000) == 0b10000000) + { + // start of multi-byte sequence + validLength--; + break; + } + else + { + // ascii char + break; + } + } + + return validLength; + } } }