Skip to content

Commit 2ceb207

Browse files
Strip UTF-8 BOM in M2dReader and add test
Move UTF-8 BOM removal to the IO layer: M2dReader.GetString() now strips a leading U+FEFF if present. Remove the now-unused Sanitizer.RemoveUtf8Bom method and delete calls to it in several parsers (ItemOptionParser, TableParser). Add SanitizerBomTest to verify XML data with and without a BOM deserializes correctly. This centralizes BOM handling at read-time and removes duplicate sanitizer logic.
1 parent 6e0ed1d commit 2ceb207

5 files changed

Lines changed: 67 additions & 13 deletions

File tree

Maple2.File.IO/M2dReader.cs

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -63,7 +63,12 @@ public byte[] GetBytes(PackFileEntry entry) {
6363

6464
public string GetString(PackFileEntry entry) {
6565
byte[] data = CryptoManager.DecryptData(entry.FileHeader, m2dFile);
66-
return Encoding.Default.GetString(data);
66+
string result = Encoding.Default.GetString(data);
67+
// Remove UTF-8 BOM if present
68+
if (result.Length > 0 && result[0] == '\uFEFF') {
69+
return result[1..];
70+
}
71+
return result;
6772
}
6873

6974
public void Dispose() {

Maple2.File.Parser/ItemOptionParser.cs

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -131,7 +131,6 @@ public IEnumerable<ItemOptionConstantData> ParseConstant() {
131131

132132
public IEnumerable<ItemOptionConstant> ParseConstantNew() {
133133
string xml = Sanitizer.RemoveEmpty(xmlReader.GetString(xmlReader.GetEntry("table/itemoptionconstant.xml")));
134-
xml = Sanitizer.RemoveUtf8Bom(xml);
135134
var reader = XmlReader.Create(new StringReader(xml));
136135
var root = itemOptionConstantNewSerializer.Deserialize(reader) as ItemOptionConstantRootNew;
137136
Debug.Assert(root != null);
@@ -161,7 +160,6 @@ public IEnumerable<ItemOptionData> ParseRandom() {
161160

162161
public IEnumerable<ItemOptionRandomNew> ParseRandomNew() {
163162
string xml = Sanitizer.RemoveEmpty(xmlReader.GetString(xmlReader.GetEntry("table/itemoptionrandom.xml")));
164-
xml = Sanitizer.RemoveUtf8Bom(xml);
165163
var reader = XmlReader.Create(new StringReader(xml));
166164
var root = itemOptionNewSerializer.Deserialize(reader) as ItemOptionRandomRootNew;
167165
Debug.Assert(root != null);
@@ -192,7 +190,6 @@ public IEnumerable<ItemOptionData> ParseStatic() {
192190

193191
public IEnumerable<MergeOptionNew> ParseMergeOptionBaseNew() {
194192
string xml = Sanitizer.RemoveEmpty(xmlReader.GetString(xmlReader.GetEntry("table/itemmergeoptionbase.xml")));
195-
xml = Sanitizer.RemoveUtf8Bom(xml);
196193
var reader = XmlReader.Create(new StringReader(xml));
197194
var root = itemMergeOptionNewSerializer.Deserialize(reader) as ItemMergeOptionRootNew;
198195
Debug.Assert(root != null);

Maple2.File.Parser/TableParser.cs

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -815,7 +815,6 @@ public IEnumerable<JobTableNew> ParseJobTableNew() {
815815
public IEnumerable<(int Id, IDictionary<byte, List<IndividualItemDropItem>>)> ParseIndividualItemDropFinal() {
816816
string xml = Sanitizer.RemoveEmpty(xmlReader.GetString(xmlReader.GetEntry("table/individualitemdrop_final.xml")));
817817
xml = Sanitizer.SanitizeBool(xml);
818-
xml = Sanitizer.RemoveUtf8Bom(xml);
819818
var reader = XmlReader.Create(new StringReader(xml));
820819
var data = individualItemDropNewSerializer.Deserialize(reader) as IndividualItemDropRootNew;
821820
Debug.Assert(data != null);
@@ -1000,7 +999,6 @@ public IEnumerable<JobTableNew> ParseJobTableNew() {
1000999

10011000
public IEnumerable<(int ItemId, ShopFurnishing UgcItem)> ParseFurnishingShopUgcAll() {
10021001
string xml = Sanitizer.RemoveEmpty(xmlReader.GetString(xmlReader.GetEntry($"table/{locale}/shop_ugcall.xml")));
1003-
xml = Sanitizer.RemoveUtf8Bom(xml);
10041002

10051003
var reader = XmlReader.Create(new StringReader(xml));
10061004
var data = shopFurnishingSerializer.Deserialize(reader) as ShopFurnishingRoot;

Maple2.File.Parser/Tools/Sanitizer.cs

Lines changed: 0 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -95,11 +95,4 @@ private static string FixCommaFloats(string xml, params string[] attributes) {
9595
string pattern = $"({string.Join('|', attributes)})=\"(-?\\d+)(?:,(\\d+))\"";
9696
return Regex.Replace(xml, pattern, "$1=\"$2.$3\"");
9797
}
98-
99-
public static string RemoveUtf8Bom(string xml) {
100-
if (!string.IsNullOrEmpty(xml) && xml[0] == '\uFEFF') {
101-
return xml[1..];
102-
}
103-
return xml;
104-
}
10598
}
Lines changed: 61 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,61 @@
1+
using System.Text;
2+
using System.Xml.Serialization;
3+
using Microsoft.VisualStudio.TestTools.UnitTesting;
4+
5+
namespace Maple2.File.Tests;
6+
7+
[TestClass]
8+
public class SanitizerBomTest {
9+
private const string XML_CONTENT = "<?xml version=\"1.0\" encoding=\"utf-8\"?><ms2><item id=\"1\" name=\"TestItem\" /></ms2>";
10+
11+
[XmlRoot("ms2")]
12+
public class Ms2Root {
13+
[XmlElement("item")]
14+
public ItemElement? Item { get; set; }
15+
}
16+
17+
public class ItemElement {
18+
[XmlAttribute("id")]
19+
public int Id { get; set; }
20+
21+
[XmlAttribute("name")]
22+
public string Name { get; set; } = string.Empty;
23+
}
24+
25+
[TestMethod]
26+
public void TestParseXmlWithoutBom() {
27+
byte[] data = Encoding.UTF8.GetBytes(XML_CONTENT);
28+
string xml = Encoding.Default.GetString(data);
29+
30+
var serializer = new XmlSerializer(typeof(Ms2Root));
31+
using var reader = new StringReader(xml);
32+
var result = (Ms2Root?) serializer.Deserialize(reader);
33+
34+
Assert.IsNotNull(result);
35+
Assert.IsNotNull(result.Item);
36+
Assert.AreEqual(1, result.Item.Id);
37+
Assert.AreEqual("TestItem", result.Item.Name);
38+
}
39+
40+
[TestMethod]
41+
public void TestParseXmlWithBom() {
42+
byte[] bom = [0xEF, 0xBB, 0xBF];
43+
byte[] content = Encoding.UTF8.GetBytes(XML_CONTENT);
44+
byte[] data = [..bom, ..content];
45+
46+
// Simulate M2dReader.GetString() which strips BOM
47+
string xml = Encoding.Default.GetString(data);
48+
if (xml.Length > 0 && xml[0] == '\uFEFF') {
49+
xml = xml[1..];
50+
}
51+
52+
var serializer = new XmlSerializer(typeof(Ms2Root));
53+
using var reader = new StringReader(xml);
54+
var result = (Ms2Root?) serializer.Deserialize(reader);
55+
56+
Assert.IsNotNull(result);
57+
Assert.IsNotNull(result.Item);
58+
Assert.AreEqual(1, result.Item.Id);
59+
Assert.AreEqual("TestItem", result.Item.Name);
60+
}
61+
}

0 commit comments

Comments
 (0)