diff --git a/SharpNBT.Tests/Data/bigtest.snbt b/SharpNBT.Tests/Data/bigtest.snbt
new file mode 100644
index 0000000..e4a523d
--- /dev/null
+++ b/SharpNBT.Tests/Data/bigtest.snbt
@@ -0,0 +1,41 @@
+{
+ "test case": 90,
+ noQuotes: "HELLO WORLD THIS IS A TEST STRING ÅÄÖ!",
+ "test with \" escaped quote": 90,
+ 'single quoted with inner "double quotes"': 90,
+ "double quoted with inner 'single quote' in text": -45.0f,
+ shortTest: 32767s,
+ longTest: 9223372036854775807L,
+ byteTest: 127b,
+ byteArrayTest: [B; 0B, 62B, 34B, 16B, 8B, 10B, 22B, 44B, 76B, 18B, 70B, 32B, 4B, 86B, 78B, 80B, 92B, 14B, 46B, 48B],
+ "listTest (long)": [
+ -11L,
+ 12L,
+ 13L,
+ -14L,
+ 15L
+ ],
+ floatTest: 0.49823147f,
+ doubleTest: 0.4931287132182315d,
+ intTest: 2147483647,
+ "listTest (compound)": [
+ {
+ created-on: 1264099775885L,
+ name: "Compound tag #0"
+ },
+ {
+ created-on: 1264099775885L,
+ name: "Compound tag #1"
+ }
+ ],
+ "nested compound test": {
+ egg: {
+ name: "Eggbert",
+ value: 0.5f
+ },
+ ham: {
+ name: "Hampus",
+ value: 0.75f
+ }
+ }
+}
\ No newline at end of file
diff --git a/SharpNBT.Tests/SharpNBT.Tests.csproj b/SharpNBT.Tests/SharpNBT.Tests.csproj
index f8b48cf..15aafb5 100644
--- a/SharpNBT.Tests/SharpNBT.Tests.csproj
+++ b/SharpNBT.Tests/SharpNBT.Tests.csproj
@@ -32,6 +32,8 @@
PreserveNewest
+
+
diff --git a/SharpNBT.Tests/StringifiedTest.cs b/SharpNBT.Tests/StringifiedTest.cs
index 05b7adc..8b60ab9 100644
--- a/SharpNBT.Tests/StringifiedTest.cs
+++ b/SharpNBT.Tests/StringifiedTest.cs
@@ -1,4 +1,6 @@
using System.IO;
+using System.Runtime.InteropServices;
+using System.Text;
using Microsoft.VisualStudio.TestPlatform.ObjectModel;
using SharpNBT.SNBT;
using Xunit;
@@ -34,22 +36,19 @@ namespace SharpNBT.Tests
public void ParseSmall()
{
const string testString = "{name1:123,name2:\"sometext1\",name3:{subname1:456,subname2:\"sometext2\"}}";
- var lexer = new Lexer();
- foreach (var token in lexer.Tokenize(testString))
- {
- output.WriteLine($"{token.Type}: \"{token.Match.Trim()}\"");
- }
+ var tag = StringNbt.Parse(testString);
+ output.WriteLine(tag.PrettyPrinted());
}
[Fact]
public void ParseBig()
{
- var testString = File.ReadAllText("/code/ruby/craftbook-nbt/test/bigtest.snbt");
- var lexer = new Lexer();
- foreach (var token in lexer.Tokenize(testString))
- {
- output.WriteLine($"{token.Type}: \"{token.Match.Trim()}\"");
- }
+ using var stream = TestHelper.GetFile("bigtest.snbt", CompressionType.None);
+ using var reader = new StreamReader(stream, Encoding.UTF8);
+ var testString = reader.ReadToEnd();
+
+ var tag = StringNbt.Parse(testString);
+ output.WriteLine(tag.PrettyPrinted());
}
}
}
\ No newline at end of file
diff --git a/SharpNBT/SNBT/Lexer.cs b/SharpNBT/SNBT/Lexer.cs
index 7f620f9..03fb767 100644
--- a/SharpNBT/SNBT/Lexer.cs
+++ b/SharpNBT/SNBT/Lexer.cs
@@ -1,180 +1,50 @@
-using System;
using System.Collections.Generic;
-using System.IO;
-using System.Reflection.Emit;
-using System.Security;
-using System.Text.RegularExpressions;
-using JetBrains.Annotations;
+using System.Data;
namespace SharpNBT.SNBT
{
- internal enum TokenType
+ internal sealed class Lexer
{
- None,
- CompoundBegin,
- CompoundEnd,
- Identifier,
- String,
- Separator,
- Comma,
- ByteArray,
- IntArray,
- LongArray,
- ListArray,
- EndArray,
- Float,
- Double,
- Byte,
- Short,
- Long,
- Int,
- WhiteSpace,
- Char,
- EscapedChar
- }
-
-
- internal sealed class LexerRule
- {
-
- internal delegate string PostProcessHandler(Match match);
+ private readonly List ruleList;
- public Regex Matcher { get; }
-
- public TokenType Type { get; }
-
- public string Description { get; }
-
- public string PostProcess(Match match) => handler?.Invoke(match) ?? match.Value;
-
- private readonly PostProcessHandler handler;
-
- public LexerRule(TokenType type, string description, string pattern, [CanBeNull] PostProcessHandler process)
- {
- Type = type;
- Description = description;
- Matcher = new Regex(pattern, RegexOptions.Multiline | RegexOptions.CultureInvariant);
- handler = process;
- }
-
- // public LexerRule(TokenType type, string description, Regex regex)
- // {
- // Description = description;
- // Type = type;
- //
- // }
- }
-
- internal sealed class Token
- {
- public TokenType Type { get; }
-
- public string Match { get; }
-
- public Token(TokenType type, string match)
- {
- Type = type;
- Type = type;
- Match = match;
- }
- }
-
- internal class Lexer
- {
- private static readonly string DoubleQuoteIdentifier = "\"(.*?)\"\\s*(?=:)";
-
- private static readonly List rules;
-
-
- private const string IDENTIFIER_DOUBLE_QUOTES = "\".*?\"\\s*(?>:)";
- private const string IDENTIFIER_SINGLE_QUOTES = "'.*?'\\s*(?>:)";
- private const string IDENTIFIER_NO_QUOTES = @"[A-Za-z0-9_-]+\s*(?=:)";
-
- private const string STRING_DOUBLE_QUOTED = "^\\s*\".*?\"";
- private const string STRING_SINGLE_QUOTED = "^\\s*'.*?'";
-
- private const string COMPOUND_START = "\\s*{\\s*";
- private const string COMPOUND_END = @"\}";
-
-
- private const string SEPARATOR = "^\\s*:\\s*";
- private const string COMMA = "^\\s*,\\s*";
-
-
- static Lexer()
- {
- rules = new List
- {
- new LexerRule(TokenType.CompoundBegin, "Opening Compound brace", "^{", null),
- new LexerRule(TokenType.WhiteSpace, "Useless whitespace", @"^[\s]+", null),
-
- new LexerRule(TokenType.Identifier, "Single-quoted name", "^\\s*'(.*?)'\\s*(?=:)", m => m.Groups[1].Value),
- new LexerRule(TokenType.Identifier, "Double-quoted name", "^\\s*\"(.*?)\"\\s*(?=:)", m => m.Groups[1].Value),
- new LexerRule(TokenType.Identifier, "Unquoted name", "^\\s*([A-Za-z0-9_-]+)\\s*(?=:)", m => m.Groups[1].Value),
-
-
- new LexerRule(TokenType.String, "Double-quoted string value", "^\"(.*?)\"", null),
- new LexerRule(TokenType.String, "Single-quoted string value", "^'(.*?)'", null)
-
- // new LexerRule(TokenType.CompoundBegin, COMPOUND_START),
- // new LexerRule(TokenType.CompoundEnd, COMPOUND_END),
- // new LexerRule(TokenType.Identifier, IDENTIFIER_DOUBLE_QUOTES),
- // new LexerRule(TokenType.Identifier, IDENTIFIER_SINGLE_QUOTES),
- // new LexerRule(TokenType.Identifier, IDENTIFIER_NO_QUOTES),
- // new LexerRule(TokenType.String, STRING_DOUBLE_QUOTED),
- // new LexerRule(TokenType.String, STRING_SINGLE_QUOTED),
- // new LexerRule(TokenType.Separator, SEPARATOR),
- // new LexerRule(TokenType.Comma, COMMA),
- // new LexerRule(TokenType.ByteArray, @"\[B;[\s]*?"),
- // new LexerRule(TokenType.IntArray, @"\[I;[\s]*?"),
- // new LexerRule(TokenType.LongArray, @"\[L;[\s]*?"),
- // new LexerRule(TokenType.ListArray, @"\[[\s]*?"),
- // new LexerRule(TokenType.EndArray, @"[\s]*\]"),
- // new LexerRule(TokenType.Float, @"-?[0-9]*\.[0-9]+[Ff]"),
- // new LexerRule(TokenType.Double, @"-?[0-9]*\.[0-9]+[Dd]?"),
- // new LexerRule(TokenType.Byte, "-?([0-9]+)[Bb]"),
- // new LexerRule(TokenType.Short, "-?([0-9]+)[Ss]"),
- // new LexerRule(TokenType.Long, "-?([0-9]+)[Ll]"),
- // new LexerRule(TokenType.Int, "-?([0-9]+)"),
- // new LexerRule(TokenType.WhiteSpace, @"[\s]+"),
- // new LexerRule(TokenType.String, @"[\S]+"),
- // new LexerRule(TokenType.Char, ".")
- };
- }
-
- private static string Process(Match match)
- {
- throw new NotImplementedException();
- }
-
public Lexer()
{
-
+ ruleList = new List();
}
- public IEnumerable Tokenize(string input)
- {
- string.Create(input.Length, input, (span, i) =>
- {
-
- });
- var pos = 0;
+ public void AddRule(TokenType type, string pattern, bool skipped = false) => ruleList.Add(new LexerRule(type, pattern, null, skipped));
- do
- {
- Label:
- foreach (var rule in rules)
- {
- var match = rule.Matcher.Match(input, pos);
- if (match.Success)
- {
- yield return new Token(rule.Type, rule.PostProcess(match));
- pos = match.Index + match.Length - 1;
- break;
- }
- }
- } while (++pos < input.Length);
-
+ public void AddRule(TokenType type, string pattern, ResultHandler handler, bool skipped = false)
+ {
+ ruleList.Add(new LexerRule(type, pattern, handler, skipped));
}
+
+ public IEnumerable Tokenize(string source)
+ {
+ var index = 0;
+ while (index < source.Length)
+ {
+ var success = false;
+
+ foreach (var rule in ruleList)
+ {
+ var match = rule.Pattern.Match(source, index);
+ if (!match.Success || match.Index - index != 0)
+ continue;
+
+ if (!rule.IsSkipped)
+ yield return new Token(rule.Type, rule.Process(source, index, match));
+
+ index += match.Length;
+ success = true;
+ break;
+ }
+
+ if (!success)
+ throw new SyntaxErrorException($"Unrecognized sequence at index {index}: '{source[index]}'");
+ }
+ }
+
+
}
}
\ No newline at end of file
diff --git a/SharpNBT/SNBT/LexerRule.cs b/SharpNBT/SNBT/LexerRule.cs
new file mode 100644
index 0000000..68e21c3
--- /dev/null
+++ b/SharpNBT/SNBT/LexerRule.cs
@@ -0,0 +1,37 @@
+using System;
+using System.Text.RegularExpressions;
+
+namespace SharpNBT.SNBT
+{
+
+ internal delegate string ResultHandler(Match match);
+
+ internal class LexerRule
+ {
+ private readonly ResultHandler processResult;
+
+ public TokenType Type { get; }
+
+ public Regex Pattern { get; }
+
+ public bool IsSkipped { get; }
+
+
+ public LexerRule(TokenType type, string pattern, bool skipped = false) : this(type, pattern, null, skipped)
+ {
+ }
+
+ public LexerRule(TokenType type, string pattern, ResultHandler handler, bool skipped = false)
+ {
+ Type = type;
+ Pattern = new Regex(pattern);
+ IsSkipped = skipped;
+ processResult = handler;
+ }
+
+ public string Process(string source, int index, Match match)
+ {
+ return processResult is null ? source.Substring(index, match.Length) : processResult.Invoke(match);
+ }
+ }
+}
\ No newline at end of file
diff --git a/SharpNBT/SNBT/StringNbt.cs b/SharpNBT/SNBT/StringNbt.cs
new file mode 100644
index 0000000..53e77ba
--- /dev/null
+++ b/SharpNBT/SNBT/StringNbt.cs
@@ -0,0 +1,174 @@
+using System;
+using System.Collections.Generic;
+using System.Data;
+using System.Text.RegularExpressions;
+using JetBrains.Annotations;
+
+namespace SharpNBT.SNBT
+{
+ public static class StringNbt
+ {
+ private static readonly Lexer lexer;
+
+ static StringNbt()
+ {
+ lexer = new Lexer();
+ lexer.AddRule(TokenType.Whitespace, @"(\r|\t|\v|\f|\s)+?", true);
+ lexer.AddRule(TokenType.Separator, ",", true);
+ lexer.AddRule(TokenType.Compound, @"{");
+ lexer.AddRule(TokenType.EndCompound, @"}");
+ lexer.AddRule(TokenType.Identifier, "\"(.*?)\"\\s*(?>:)", FirstGroupValue);
+ lexer.AddRule(TokenType.Identifier, "'(.*?)'\\s*(?>:)", FirstGroupValue);
+ lexer.AddRule(TokenType.Identifier, "([A-Za-z0-9_-]+)\\s*(?>:)", FirstGroupValue);
+ lexer.AddRule(TokenType.String, "\"(.*?)\"", FirstGroupValue);
+ lexer.AddRule(TokenType.String, "'(.*?)'", FirstGroupValue);
+ lexer.AddRule(TokenType.ByteArray, @"\[B;");
+ lexer.AddRule(TokenType.IntArray, @"\[I;");
+ lexer.AddRule(TokenType.LongArray, @"\[L;");
+ lexer.AddRule(TokenType.List, @"\[");
+ lexer.AddRule(TokenType.EndArray, @"\]");
+ lexer.AddRule(TokenType.Float, @"(-?[0-9]*\.[0-9]+)[Ff]", FirstGroupValue);
+ lexer.AddRule(TokenType.Double, @"(-?[0-9]*\.[0-9]+)[Dd]?", FirstGroupValue);
+ lexer.AddRule(TokenType.Byte, "(-?[0-9]+)[Bb]", FirstGroupValue);
+ lexer.AddRule(TokenType.Short, "(-?[0-9]+)[Ss]", FirstGroupValue);
+ lexer.AddRule(TokenType.Long, "(-?[0-9]+)[Ll]", FirstGroupValue);
+ lexer.AddRule(TokenType.Int, "(-?[0-9]+)", FirstGroupValue);
+ }
+
+ private static string FirstGroupValue(Match match) => match.Groups[1].Value;
+
+ public static CompoundTag Parse([NotNull] string source)
+ {
+ if (source is null)
+ throw new ArgumentNullException(nameof(source));
+
+ if (string.IsNullOrWhiteSpace(source))
+ return new CompoundTag(null);
+
+ var queue = new Queue(lexer.Tokenize(source));
+ return Parse(queue);
+ }
+
+ private static T Parse(Queue queue) where T : Tag => (T)Parse(queue);
+
+ private static Tag Parse(Queue queue)
+ {
+ string name = null;
+ var token = MoveNext(queue);
+
+ if (token.Type == TokenType.Identifier)
+ {
+ name = token.Value;
+ token = MoveNext(queue);
+ }
+
+ return token.Type switch
+ {
+ TokenType.Compound => ParseCompound(name, queue),
+ TokenType.String => new StringTag(name, token.Value),
+ TokenType.ByteArray => ParseByteArray(name, queue),
+ TokenType.IntArray => ParseIntArray(name, queue),
+ TokenType.LongArray => ParseLongArray(name, queue),
+ TokenType.List => ParseList(name, queue),
+ TokenType.Byte => new ByteTag(name, sbyte.Parse(token.Value)),
+ TokenType.Short => new ShortTag(name, short.Parse(token.Value)),
+ TokenType.Int => new IntTag(name, int.Parse(token.Value)),
+ TokenType.Long => new LongTag(name, long.Parse(token.Value)),
+ TokenType.Float => new FloatTag(name, float.Parse(token.Value)),
+ TokenType.Double => new DoubleTag(name, double.Parse(token.Value)),
+ _ => throw new SyntaxErrorException()
+ };
+ }
+
+ [NotNull]
+ private static Token MoveNext(Queue queue)
+ {
+ if (queue.TryDequeue(out var token))
+ return token;
+
+ throw new SyntaxErrorException("Unexpected end-of-input");
+ }
+
+ private static void MoveNext(Queue queue, TokenType assertType)
+ {
+ var token = MoveNext(queue);
+ if (token.Type != assertType)
+ throw new SyntaxErrorException($"Expected token of type {assertType}, but encountered {token.Type}.");
+ }
+
+ private static CompoundTag ParseCompound(string name, Queue queue)
+ {
+ var compound = new CompoundTag(name);
+ while (queue.TryPeek(out var token) && token.Type != TokenType.EndCompound)
+ {
+ compound.Add(Parse(queue));
+ }
+ MoveNext(queue, TokenType.EndCompound);
+ return compound;
+ }
+
+ private static ListTag ParseList(string name, Queue queue)
+ {
+ var values = new List();
+ while (queue.TryPeek(out var token) && token.Type != TokenType.EndArray)
+ {
+ values.Add(Parse(queue));
+ }
+
+ MoveNext(queue, TokenType.EndArray);
+ if (values.Count > 0)
+ {
+ var type = values[0].Type;
+ return new ListTag(name, type, values);
+ }
+ return new ListTag(name, TagType.End);
+ }
+
+ private static ByteArrayTag ParseByteArray(string name, Queue queue)
+ {
+ var values = new List();
+ foreach (var token in DequeueUntil(queue, TokenType.EndArray))
+ {
+ if (token.Type != TokenType.Byte)
+ throw new SyntaxErrorException($"Invalid token type in array, expected {TokenType.Byte}, got {token.Type}.");
+ values.Add(unchecked((byte) sbyte.Parse(token.Value)));
+ }
+ return new ByteArrayTag(name, values);
+ }
+
+ private static IntArrayTag ParseIntArray(string name, Queue queue)
+ {
+ var values = new List();
+ foreach (var token in DequeueUntil(queue, TokenType.EndArray))
+ {
+ if (token.Type != TokenType.Int)
+ throw new SyntaxErrorException($"Invalid token type in array, expected {TokenType.Int}, got {token.Type}.");
+ values.Add(int.Parse(token.Value));
+ }
+ return new IntArrayTag(name, values);
+ }
+
+ private static LongArrayTag ParseLongArray(string name, Queue queue)
+ {
+ var values = new List();
+ foreach (var token in DequeueUntil(queue, TokenType.EndArray))
+ {
+ if (token.Type != TokenType.Long)
+ throw new SyntaxErrorException($"Invalid token type in array, expected {TokenType.Long}, got {token.Type}.");
+ values.Add(long.Parse(token.Value));
+ }
+ return new LongArrayTag(name, values);
+ }
+
+ private static IEnumerable DequeueUntil(Queue queue, TokenType type)
+ {
+ while (true)
+ {
+ var token = MoveNext(queue);
+ if (token.Type == type)
+ yield break;
+ yield return token;
+ }
+ }
+ }
+}
\ No newline at end of file
diff --git a/SharpNBT/SNBT/Token.cs b/SharpNBT/SNBT/Token.cs
new file mode 100644
index 0000000..c915f5d
--- /dev/null
+++ b/SharpNBT/SNBT/Token.cs
@@ -0,0 +1,35 @@
+using JetBrains.Annotations;
+
+namespace SharpNBT.SNBT
+{
+ ///
+ /// An object emitted by the lexer to describe a logical fragment of code that can be parsed.
+ ///
+ [PublicAPI]
+ public sealed class Token
+ {
+ ///
+ /// Gets a value describing the general type code fragment this represents.
+ ///
+ public TokenType Type { get; }
+
+ ///
+ /// Gets a value of this fragment, which can vary depending on context and the .
+ ///
+ public string Value { get; }
+
+ ///
+ /// Creates a new instance of the class.
+ ///
+ /// A value describing the general type code fragment this represents.
+ /// Ahe value of this code fragment.
+ public Token(TokenType type, [NotNull] string value)
+ {
+ Type = type;
+ Value = value;
+ }
+
+ ///
+ public override string ToString() => $"[{Type}] \"{Value}\"";
+ }
+}
\ No newline at end of file
diff --git a/SharpNBT/SNBT/TokenType.cs b/SharpNBT/SNBT/TokenType.cs
new file mode 100644
index 0000000..1d08a68
--- /dev/null
+++ b/SharpNBT/SNBT/TokenType.cs
@@ -0,0 +1,98 @@
+using JetBrains.Annotations;
+
+namespace SharpNBT.SNBT
+{
+ ///
+ /// Describes types of tokens that the SNBT lexer can emit.
+ ///
+ [PublicAPI]
+ public enum TokenType
+ {
+ ///
+ /// Any whitespace/newline not found within a string or identifier.
+ ///
+ /// This type is not yielded during tokenization.
+ Whitespace,
+
+ ///
+ /// A separator between objects and array elements.
+ ///
+ /// This type is not yielded during tokenization.
+ Separator,
+
+ ///
+ /// The beginning of new object.
+ ///
+ Compound,
+
+ ///
+ /// The end of a .
+ ///
+ EndCompound,
+
+ ///
+ /// The name of an tag.
+ ///
+ Identifier,
+
+ ///
+ /// A value, which may be contain escaped quotes.
+ ///
+ String,
+
+ ///
+ /// The beginning of a .
+ ///
+ ByteArray,
+
+ ///
+ /// The beginning of a .
+ ///
+ IntArray,
+
+ ///
+ /// The beginning of a .
+ ///
+ LongArray,
+
+ ///
+ /// The beginning of a .
+ ///
+ List,
+
+ ///
+ /// The end of a , , or .
+ ///
+ EndArray,
+
+ ///
+ /// A value or element of a depending on context.
+ ///
+ Byte,
+
+ ///
+ /// A value.
+ ///
+ Short,
+
+ ///
+ /// A value or element of a depending on context.
+ ///
+ Int,
+
+ ///
+ /// A value or element of a depending on context.
+ ///
+ Long,
+
+ ///
+ /// A value.
+ ///
+ Float,
+
+ ///
+ /// A value.
+ ///
+ Double
+ }
+}
\ No newline at end of file