mirror of
https://github.com/AssetRipper/AssetRipper.git
synced 2025-12-11 20:15:29 +01:00
fix file name truncation in presence of non-ascii characters (#1402)
* fix file name truncation in presence of non-ascii characters * review: remove unnecessary -4 of maxNameLength * review: count byte length of extension * review: fix typo three -> four * review: rename byteLength -> maxLength * review: fix case where last character is incorrectly truncated * Add some unit tests
This commit is contained in:
parent
07804fc215
commit
716a07f50b
45
Source/AssetRipper.IO.Files.Tests/FileUtilsTests.cs
Normal file
45
Source/AssetRipper.IO.Files.Tests/FileUtilsTests.cs
Normal file
@ -0,0 +1,45 @@
|
||||
using AssetRipper.IO.Files.Utils;
|
||||
|
||||
namespace AssetRipper.IO.Files.Tests;
|
||||
|
||||
public static class FileUtilsTests
|
||||
{
|
||||
[Test]
|
||||
public static void FilenameTruncationMultibyteCharacter()
|
||||
{
|
||||
Assert.Multiple(() =>
|
||||
{
|
||||
// A length 3 cont cont cont
|
||||
// 01000001 11100110 10010110 10000111 00001010
|
||||
Assert.That(FileUtils.GetUniqueName("/dir", "A文.ext", 4), Is.EqualTo(".ext"));
|
||||
Assert.That(FileUtils.GetUniqueName("/dir", "A文.ext", 5), Is.EqualTo("A.ext"));
|
||||
Assert.That(FileUtils.GetUniqueName("/dir", "A文.ext", 6), Is.EqualTo("A.ext"));
|
||||
Assert.That(FileUtils.GetUniqueName("/dir", "A文.ext", 7), Is.EqualTo("A.ext"));
|
||||
Assert.That(FileUtils.GetUniqueName("/dir", "A文.ext", 8), Is.EqualTo("A文.ext"));
|
||||
Assert.That(FileUtils.GetUniqueName("/dir", "A文.ext", 9), Is.EqualTo("A文.ext"));
|
||||
});
|
||||
}
|
||||
|
||||
[Test]
|
||||
public static void ExtensionLength()
|
||||
{
|
||||
Assert.Multiple(() =>
|
||||
{
|
||||
Assert.That(FileUtils.GetUniqueName("/dir", "A文.ext", 7), Is.EqualTo("A.ext"));
|
||||
Assert.That(FileUtils.GetUniqueName("/dir", "A文.ext", 8), Is.EqualTo("A文.ext"));
|
||||
|
||||
Assert.That(FileUtils.GetUniqueName("/dir", "A文.exte", 8), Is.EqualTo("A.exte"));
|
||||
Assert.That(FileUtils.GetUniqueName("/dir", "A文.exte", 9), Is.EqualTo("A文.exte"));
|
||||
});
|
||||
}
|
||||
|
||||
[Test]
|
||||
public static void WithoutExtension()
|
||||
{
|
||||
Assert.Multiple(() =>
|
||||
{
|
||||
Assert.That(FileUtils.GetUniqueName("/dir", "A文", 3), Is.EqualTo("A"));
|
||||
Assert.That(FileUtils.GetUniqueName("/dir", "A文", 4), Is.EqualTo("A文"));
|
||||
});
|
||||
}
|
||||
}
|
||||
@ -1,4 +1,4 @@
|
||||
using System.Runtime.InteropServices;
|
||||
using System.Text;
|
||||
using System.Text.RegularExpressions;
|
||||
|
||||
namespace AssetRipper.IO.Files.Utils
|
||||
@ -37,12 +37,11 @@ namespace AssetRipper.IO.Files.Utils
|
||||
{
|
||||
string? ext = null;
|
||||
string? name = null;
|
||||
int maxLength = maxNameLength - 4;
|
||||
string validFileName = fileName;
|
||||
if (validFileName.Length > maxLength)
|
||||
if (Encoding.UTF8.GetByteCount(fileName) > maxNameLength)
|
||||
{
|
||||
ext = Path.GetExtension(validFileName);
|
||||
name = validFileName.Substring(0, maxLength - ext.Length);
|
||||
name = TruncateToUTF8ByteLength(fileName, maxNameLength - Encoding.UTF8.GetByteCount(ext));
|
||||
validFileName = name + ext;
|
||||
}
|
||||
|
||||
@ -120,5 +119,59 @@ namespace AssetRipper.IO.Files.Utils
|
||||
"lpt1", "lpt2", "lpt3", "lpt4", "lpt5", "lpt6", "lpt7", "lpt8", "lpt9",
|
||||
];
|
||||
private static readonly Regex FileNameRegex = GenerateFileNameRegex();
|
||||
|
||||
private static string TruncateToUTF8ByteLength(string str, int maxLength)
|
||||
{
|
||||
byte[] bytes = Encoding.UTF8.GetBytes(str);
|
||||
int validLength = FindValidByteLength(bytes, maxLength);
|
||||
return Encoding.UTF8.GetString(bytes[..validLength]);
|
||||
}
|
||||
|
||||
private static int FindValidByteLength(byte[] bytes, int maxLength)
|
||||
{
|
||||
int validLength = maxLength;
|
||||
|
||||
// ascii char: 0_
|
||||
// two-byte char: 110_ 10_
|
||||
// three-byte char: 1110_ 10_ _10_
|
||||
// four-byte char : 11110_ 10_ _10_ _10
|
||||
|
||||
if (maxLength >= bytes.Length)
|
||||
{
|
||||
return bytes.Length;
|
||||
}
|
||||
|
||||
// next byte is a beginning, so we can safely truncate to maxLength
|
||||
byte nextByte = bytes[maxLength];
|
||||
if ((nextByte & 0b11_000000) != 0b10_000000)
|
||||
{
|
||||
return maxLength;
|
||||
}
|
||||
|
||||
// move to end of the last full sequence
|
||||
for (int i = maxLength - 1; i >= 0; i--)
|
||||
{
|
||||
byte currentByte = bytes[i];
|
||||
|
||||
if ((currentByte & 0b11_000000) == 0b10_000000)
|
||||
{
|
||||
// continuation byte
|
||||
validLength--;
|
||||
}
|
||||
else if ((currentByte & 0b10000000) == 0b10000000)
|
||||
{
|
||||
// start of multi-byte sequence
|
||||
validLength--;
|
||||
break;
|
||||
}
|
||||
else
|
||||
{
|
||||
// ascii char
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
return validLength;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user