Implemented a new SNBT parser that does not use regex

2023-08-26 23:33:08 -04:00 · 2023-08-26 23:33:08 -04:00 · f46d240767
parent 7f99b388d7
commit f46d240767
8 changed files with 456 additions and 432 deletions
--- a/SharpNBT/SNBT/Lexer.cs
+++ b/SharpNBT/SNBT/Lexer.cs
@ -1,49 +0,0 @@
-using System.Collections.Generic;
-using System.Data;
-
-namespace SharpNBT.SNBT;
-
-internal sealed class Lexer
-{
-    private readonly List<LexerRule> ruleList;
-        
-    public Lexer()
-    {
-        ruleList = new List<LexerRule>();
-    }
-        
-    public void AddRule(TokenType type, string pattern, bool skipped = false) => ruleList.Add(new LexerRule(type, pattern, null, skipped));
-
-    public void AddRule(TokenType type, string pattern, ResultHandler handler, bool skipped = false)
-    {
-        ruleList.Add(new LexerRule(type, pattern, handler, skipped));
-    }
-        
-    public IEnumerable<Token> Tokenize(string source)
-    {
-        var index = 0;
-        while (index < source.Length)
-        {
-            var success = false;
-
-            foreach (var rule in ruleList)
-            {
-                var match = rule.Pattern.Match(source, index);
-                if (!match.Success || match.Index - index != 0) 
-                    continue;
-                    
-                if (!rule.IsSkipped)
-                    yield return new Token(rule.Type, rule.Process(source, index, match));
-
-                index += match.Length;
-                success = true;
-                break;
-            }
-
-            if (!success)
-                throw new SyntaxErrorException($"Unrecognized sequence at index {index}: '{source[index]}'");
-        }
-    }
-        
-        
-}
--- a/SharpNBT/SNBT/LexerRule.cs
+++ b/SharpNBT/SNBT/LexerRule.cs
@ -1,35 +0,0 @@
-using System;
-using System.Text.RegularExpressions;
-
-namespace SharpNBT.SNBT;
-
-internal delegate string ResultHandler(Match match);
-
-internal class LexerRule
-{
-    private readonly ResultHandler processResult;
-        
-    public TokenType Type { get; }
-        
-    public Regex Pattern { get; }
-        
-    public bool IsSkipped { get; }
-        
-        
-    public LexerRule(TokenType type, string pattern, bool skipped = false) : this(type, pattern, null, skipped)
-    {
-    }
-        
-    public LexerRule(TokenType type, string pattern, ResultHandler handler, bool skipped = false)
-    {
-        Type = type;
-        Pattern = new Regex(pattern, RegexOptions.Compiled);
-        IsSkipped = skipped;
-        processResult = handler;
-    }
-
-    public string Process(string source, int index, Match match)
-    {
-        return processResult is null ? source.Substring(index, match.Length) : processResult.Invoke(match);
-    }
-}
--- a/SharpNBT/SNBT/Scanner.cs
+++ b/SharpNBT/SNBT/Scanner.cs
@ -0,0 +1,64 @@
+using System;
+using System.Data;
+using System.Diagnostics.CodeAnalysis;
+using System.Text;
+
+namespace SharpNBT.SNBT;
+
+internal ref struct Scanner
+{
+    public ReadOnlySpan<char> Source;
+    public int Position;
+    
+    public char Current => Source[Position];
+
+    public bool IsEndOfInput => Position >= Source.Length;
+    
+    public Scanner(ReadOnlySpan<byte> utf8Bytes, Encoding encoding)
+    {
+        Position = -1;
+        var count = encoding.GetCharCount(utf8Bytes);
+        var chars = new char[count];
+        encoding.GetChars(utf8Bytes, chars);
+        Source = new ReadOnlySpan<char>(chars);
+    }
+
+    public Scanner(string text, Encoding encoding) : this(encoding.GetBytes(text), encoding)
+    {
+    }
+
+    public char Peek(int numChars = 1)
+    {
+        if (Position + numChars >= Source.Length)
+            SyntaxError("Unexpected end of input.");
+        return Source[Position + numChars];
+    }
+    
+    public bool MoveNext(bool skipWhitespace, bool fail)
+    {
+        ReadChar:
+        Position++;
+        if (Position >= Source.Length)
+        {
+            if (fail)
+                SyntaxError("Unexpected end of input.");
+            return false;
+        }
+        
+        if (skipWhitespace && char.IsWhiteSpace(Current))
+            goto ReadChar;
+        return true;
+    }
+
+    public void AssertChar(char c)
+    {
+        if (Current != c)
+            SyntaxError($"Expected \"{c}\", got \"{Current}\".");
+    }
+
+    [DoesNotReturn]
+    public Exception SyntaxError(string message)
+    {
+        throw new SyntaxErrorException($"Syntax error at index {Position}: {message}");
+    }
+}
--- a/SharpNBT/SNBT/StringNbt.cs
+++ b/SharpNBT/SNBT/StringNbt.cs
@ -1,10 +1,9 @@
 using System;
 using System.Collections.Generic;
 using System.Data;
-using System.IO;
+using System.Globalization;
+using System.Numerics;
 using System.Text;
-using System.Text.RegularExpressions;
-using System.Threading.Tasks;
 using JetBrains.Annotations;

 namespace SharpNBT.SNBT;
@ -15,91 +14,6 @@ namespace SharpNBT.SNBT;
 [PublicAPI]
 public static class StringNbt
 {
-    private static readonly Lexer lexer;
-
-    static StringNbt()
-    {
-        lexer = new Lexer();
-        lexer.AddRule(TokenType.Whitespace, @"(\r|\t|\v|\f|\s)+?", true);
-        lexer.AddRule(TokenType.Separator, ",", true);
-        lexer.AddRule(TokenType.Compound, @"{");
-        lexer.AddRule(TokenType.EndCompound, @"}");
-        lexer.AddRule(TokenType.Identifier, "\"(.*?)\"\\s*(?>:)", FirstGroupValue);
-        lexer.AddRule(TokenType.Identifier, "'(.*?)'\\s*(?>:)", FirstGroupValue);
-        lexer.AddRule(TokenType.Identifier, "([A-Za-z0-9_-]+)\\s*(?>:)", FirstGroupValue);
-        lexer.AddRule(TokenType.String, "\"(.*?)\"", FirstGroupValue);
-        lexer.AddRule(TokenType.String, "'(.*?)'", FirstGroupValue);
-        lexer.AddRule(TokenType.ByteArray, @"\[B;");
-        lexer.AddRule(TokenType.IntArray, @"\[I;");
-        lexer.AddRule(TokenType.LongArray, @"\[L;");
-        lexer.AddRule(TokenType.List, @"\[");
-        lexer.AddRule(TokenType.EndArray, @"\]");
-        lexer.AddRule(TokenType.Float, @"(-?[0-9]*\.[0-9]+)[Ff]", FirstGroupValue);
-        lexer.AddRule(TokenType.Double, @"(-?[0-9]*\.[0-9]+)[Dd]?", FirstGroupValue);
-        lexer.AddRule(TokenType.Bool, "(true|false)", FirstGroupValue);
-        lexer.AddRule(TokenType.Byte, "(-?[0-9]+)[Bb]", FirstGroupValue);
-        lexer.AddRule(TokenType.Short, "(-?[0-9]+)[Ss]", FirstGroupValue);
-        lexer.AddRule(TokenType.Long, "(-?[0-9]+)[Ll]", FirstGroupValue);
-        lexer.AddRule(TokenType.Int, "(-?[0-9]+)", FirstGroupValue);
-    }
-
-    /// <summary>
-    /// Parse the text in the given <paramref name="stream"/> into a <see cref="CompoundTag"/>.
-    /// </summary>
-    /// <param name="stream">A <see cref="Stream"/> containing the SNBT data.</param>
-    /// <param name="length">The number of bytes to read from the <paramref name="stream"/>, advancing its position.</param>
-    /// <returns>The <see cref="CompoundTag"/> instance described in the source text.</returns>
-    /// <exception cref="ArgumentNullException">When <paramref name="stream"/> is <see langword="null"/>.</exception>
-    /// <exception cref="IOException">When <paramref name="stream"/> is not opened for reading.</exception>
-    /// <exception cref="ArgumentException">When <paramref name="length"/> is negative.</exception>
-    /// <exception cref="SyntaxErrorException">When <paramref name="stream"/> contains invalid SNBT code.</exception>
-    public static CompoundTag Parse(Stream stream, int length)
-    {
-        Validate(stream, length);
-        if (length == 0)
-            return new CompoundTag(null);
-            
-        var buffer = new byte[length];
-        stream.Read(buffer, 0, length);
-        var str = Encoding.UTF8.GetString(buffer, 0, buffer.Length);
-            
-        return Parse(str);
-    }
-
-    /// <summary>
-    /// Asynchronously parses the text in the given <paramref name="stream"/> into a <see cref="CompoundTag"/>.
-    /// </summary>
-    /// <param name="stream">A <see cref="Stream"/> containing the SNBT data.</param>
-    /// <param name="length">The number of bytes to read from the <paramref name="stream"/>, advancing its position.</param>
-    /// <returns>The <see cref="CompoundTag"/> instance described in the source text.</returns>
-    /// <exception cref="ArgumentNullException">When <paramref name="stream"/> is <see langword="null"/>.</exception>
-    /// <exception cref="IOException">When <paramref name="stream"/> is not opened for reading.</exception>
-    /// <exception cref="ArgumentException">When <paramref name="length"/> is negative.</exception>
-    /// <exception cref="SyntaxErrorException">When <paramref name="stream"/> contains invalid SNBT code.</exception>
-    public static async Task<CompoundTag> ParseAsync(Stream stream, int length)
-    {
-        Validate(stream, length);
-        if (length == 0)
-            return new CompoundTag(null);
-            
-        var buffer = new byte[length];
-        await stream.ReadAsync(buffer, 0, length);
-        var str = Encoding.UTF8.GetString(buffer, 0, buffer.Length);
-            
-        return Parse(str);
-    }
-
-    private static void Validate(Stream stream, int length)
-    {
-        if (stream is null)
-            throw new ArgumentNullException(nameof(stream));
-        if (!stream.CanRead)
-            throw new IOException("Stream is not opened for reading.");
-            
-        if (length < 0)
-            throw new ArgumentException(Strings.NegativeLengthSpecified, nameof(length));
-    }
-        
    /// <summary>
    /// Parse the given <paramref name="source"/> text into a <see cref="CompoundTag"/>.
    /// </summary>
@ -109,137 +23,314 @@ public static class StringNbt
    /// <exception cref="SyntaxErrorException">When <paramref name="source"/> is invalid SNBT code.</exception>
    public static CompoundTag Parse(string source)
    {
-        if (source is null)
-            throw new ArgumentNullException(nameof(source));
-
-        if (string.IsNullOrWhiteSpace(source))
-            return new CompoundTag(null);
-
-        var queue = new Queue<Token>(lexer.Tokenize(source));
-        return Parse<CompoundTag>(queue);
-    }
-
-    private static T Parse<T>(Queue<Token> queue) where T : Tag => (T)Parse(queue);
+        var bytes = Encoding.UTF8.GetBytes(source);
+        var scanner = new Scanner(bytes, Encoding.UTF8);
        
-    private static Tag Parse(Queue<Token> queue)
-    {
-        string name = null;
-        var token = MoveNext(queue);
-
-        if (token.Type == TokenType.Identifier)
-        {
-            name = token.Value;
-            token = MoveNext(queue);
-        }
-
-        return token.Type switch
-        {
-            TokenType.Compound => ParseCompound(name, queue),
-            TokenType.String => new StringTag(name, token.Value),
-            TokenType.ByteArray => ParseByteArray(name, queue),
-            TokenType.IntArray => ParseIntArray(name, queue),
-            TokenType.LongArray => ParseLongArray(name, queue),
-            TokenType.List => ParseList(name, queue),
-            TokenType.Bool => new BoolTag(name, bool.Parse(token.Value)),
-            TokenType.Byte => new ByteTag(name, sbyte.Parse(token.Value)),
-            TokenType.Short => new ShortTag(name, short.Parse(token.Value)),
-            TokenType.Int => new IntTag(name, int.Parse(token.Value)),
-            TokenType.Long => new LongTag(name, long.Parse(token.Value)),
-            TokenType.Float => new FloatTag(name, float.Parse(token.Value)),
-            TokenType.Double => new DoubleTag(name, double.Parse(token.Value)),
-            _ => throw new SyntaxErrorException()
-        };
+        scanner.MoveNext(true, true);
+        scanner.AssertChar('{');
+        return ParseCompound(null, ref scanner);
    }
+
+    private static CompoundTag ParseCompound(string? name, ref Scanner scanner)
+    {
+        scanner.MoveNext(true, true);
        
-    private static Token MoveNext(Queue<Token> queue)
-    {
-        if (queue.TryDequeue(out var token))
-            return token;
-            
-        throw new SyntaxErrorException("Unexpected end-of-input");
-    }
-        
-    private static void MoveNext(Queue<Token> queue, TokenType assertType)
-    {
-        var token = MoveNext(queue);
-        if (token.Type != assertType)
-            throw new SyntaxErrorException($"Expected token of type {assertType}, but encountered {token.Type}.");
-    }
+        // For the case of "{}", return empty compound tag. 
+        var result = new CompoundTag(name);
+        if (scanner.Current == '}')
+            return result;

-    private static CompoundTag ParseCompound(string name, Queue<Token> queue)
-    {
-        var compound = new CompoundTag(name);
-        while (queue.TryPeek(out var token) && token.Type != TokenType.EndCompound)
-        {
-            compound.Add(Parse(queue));
-        }
-        MoveNext(queue, TokenType.EndCompound);
-        return compound;
-    }
-        
-    private static ListTag ParseList(string name, Queue<Token> queue)
-    {
-        var values = new List<Tag>();
-        while (queue.TryPeek(out var token) && token.Type != TokenType.EndArray)
-        {
-            values.Add(Parse(queue));
-        }
-
-        MoveNext(queue, TokenType.EndArray);
-        if (values.Count > 0)
-        {
-            var type = values[0].Type;
-            return new ListTag(name, type, values);
-        }
-        return new ListTag(name, TagType.End);
-    }
-        
-    private static ByteArrayTag ParseByteArray(string name, Queue<Token> queue)
-    {
-        var values = new List<byte>();
-        foreach (var token in DequeueUntil(queue, TokenType.EndArray))
-        {
-            if (token.Type != TokenType.Byte)
-                throw new SyntaxErrorException($"Invalid token type in array, expected {TokenType.Byte}, got {token.Type}.");
-            values.Add(unchecked((byte) sbyte.Parse(token.Value)));
-        }
-        return new ByteArrayTag(name, values);
-    }
-
-    private static IntArrayTag ParseIntArray(string name, Queue<Token> queue)
-    {
-        var values = new List<int>();
-        foreach (var token in DequeueUntil(queue, TokenType.EndArray))
-        {
-            if (token.Type != TokenType.Int)
-                throw new SyntaxErrorException($"Invalid token type in array, expected {TokenType.Int}, got {token.Type}.");
-            values.Add(int.Parse(token.Value));
-        }
-        return new IntArrayTag(name, values);
-    }
-        
-    private static LongArrayTag ParseLongArray(string name, Queue<Token> queue)
-    {
-        var values = new List<long>();
-        foreach (var token in DequeueUntil(queue, TokenType.EndArray))
-        {
-            if (token.Type != TokenType.Long)
-                throw new SyntaxErrorException($"Invalid token type in array, expected {TokenType.Long}, got {token.Type}.");
-            values.Add(long.Parse(token.Value));
-        }
-        return new LongArrayTag(name, values);
-    }
-
-    private static IEnumerable<Token> DequeueUntil(Queue<Token> queue, TokenType type)
-    {
        while (true)
        {
-            var token = MoveNext(queue);
-            if (token.Type == type)
-                yield break;
-            yield return token;
+            // Read the name of the tag
+            var childName = ParseString(ref scanner, out _);
+            
+            // Move to and asser the next significant character is a deliminator.
+            // scanner.MoveNext(true, true);
+            scanner.AssertChar(':');
+            
+            // Move to and parse the tag value
+            scanner.MoveNext(true, true);
+            var tag = ParseTag(childName, ref scanner);
+            result.Add(tag);
+            // scanner.MoveNext(true, true);
+
+            if (char.IsWhiteSpace(scanner.Current))
+                scanner.MoveNext(true, true);
+            
+            // Comma encountered, read another tag.
+            if (scanner.Current == ',')
+            {
+                scanner.MoveNext(true, true);
+                continue;
+            }
+
+            // Closing brace encountered, break loop.
+            if (scanner.Current == '}')
+            {
+                scanner.MoveNext(true, false);
+                break;
+            }
+            
+            // Invalid character
+            scanner.SyntaxError($"Expected ',' or '}}', got '{scanner.Current}'.");
        }
+
+        return result;
    }
+
+    private static string ParseString(ref Scanner scanner, out bool quoted)
+    {
+        var quote = scanner.Current;
+        if (quote != '"' && quote != '\'')
+        {
+            quoted = false;
+            return ParseUnquotedString(ref scanner);
+        }
+
+        quoted = true;
+        var escape = false;
+        var closed = false;
+        var sb = new StringBuilder();
        
-    private static string FirstGroupValue(Match match) => match.Groups[1].Value;
+        while (scanner.MoveNext(false, false))
+        {
+            if (escape)
+            {
+                escape = false;
+                sb.Append(scanner.Current);
+                continue;
+            }
+
+            if (scanner.Current == quote)
+            {
+                closed = true;
+                scanner.Position++;
+                break;
+            }
+
+            if (scanner.Current == '\\')
+            {
+                // TODO: Control characters like \r \n, \t, etc.
+                escape = true;
+                continue;
+            }
+
+            sb.Append(scanner.Current);
+        }
+
+        if (!closed)
+            scanner.SyntaxError("Improperly terminated string.");
+        return sb.ToString();
+    }
+    
+    private static string ParseUnquotedString(ref Scanner scanner)
+    {
+        var start = scanner.Position;
+        for (var length = 0; scanner.MoveNext(false, true); length++)
+        {
+            if (!AllowedInUnquoted(scanner.Current))
+                return new string(scanner.Source.Slice(start, length + 1));
+        }
+
+        return string.Empty;
+    }
+
+    private static Tag ParseTag(string? name, ref Scanner scanner)
+    {
+        return scanner.Current switch
+        {
+            '{' => ParseCompound(name, ref scanner),
+            '[' => ParseArray(name, ref scanner),
+            _ => ParseLiteral(name, ref scanner)
+        };
+    }
+
+    private static Tag ParseLiteral(string? name, ref Scanner scanner)
+    {
+        // Read the input as a string
+        var value = ParseString(ref scanner, out var quoted);
+        if (quoted || value.Length == 0)
+            return new StringTag(name, value);
+        
+        // Early out for true/false values
+        if (bool.TryParse(value, out var boolean))
+            return new ByteTag(name, boolean);
+
+        var suffix = value[^1];
+        if (char.IsNumber(suffix))
+        {
+            // int and double do not require a suffix
+            if (value.Contains('.') && double.TryParse(value, NumberStyles.Float, NumberFormatInfo.InvariantInfo, out var f64))
+                return new DoubleTag(name, f64);
+
+            if (int.TryParse(value, NumberStyles.Integer, NumberFormatInfo.InvariantInfo, out var i32))
+                return new IntTag(name, i32);
+        } 
+        else if (TryParseNumber(name, value, suffix, out var tag))
+        {
+            return tag;
+        }
+
+        if (value.Length > 2 && value[0] == '0' && char.ToLowerInvariant(value[1]) == 'x')
+        {
+            // TODO: The "official" spec doesn't seem to support hexadecimal numbers
+            if (int.TryParse(value[2..], NumberStyles.HexNumber, NumberFormatInfo.InvariantInfo, out var hex))
+                return new IntTag(name, hex);
+        }
+        
+        // When all else fails, assume it is an unquoted string
+        return new StringTag(name, value);
+    }
+
+
+    private static bool TryParseNumber(string? name, string value, char suffix, out Tag tag)
+    {
+        // A much less complicated char.ToLower()
+        if (suffix >= 'a')
+            suffix -= (char) 32;
+        
+        switch (suffix)
+        {
+            case 'B':
+                if (int.TryParse(value[..^1], NumberStyles.Integer, NumberFormatInfo.InvariantInfo, out var u8))
+                {
+                    tag = new ByteTag(name, u8);
+                    return true;
+                }
+                break;
+            case 'S':
+                if (short.TryParse(value[..^1], NumberStyles.Integer, NumberFormatInfo.InvariantInfo, out var i16))
+                {
+                    tag = new ShortTag(name, i16);
+                    return true;
+                }
+                break;
+            case 'L':
+                if (long.TryParse(value[..^1], NumberStyles.Integer, NumberFormatInfo.InvariantInfo, out var i64))
+                {
+                    tag = new LongTag(name, i64);
+                    return true;
+                }
+                break;
+            case 'F':
+                if (float.TryParse(value[..^1], NumberStyles.Float, NumberFormatInfo.InvariantInfo, out var f32))
+                {
+                    tag = new FloatTag(name, f32);
+                    return true;
+                }
+                break;
+            case 'D':
+                if (double.TryParse(value[..^1], NumberStyles.Float, NumberFormatInfo.InvariantInfo, out var f64))
+                {
+                    tag = new DoubleTag(name, f64);
+                    return true;
+                }
+                break;
+        }
+        
+        tag = null!;
+        return false;
+    }
+    
+    private static Tag ParseArray(string? name, ref Scanner scanner)
+    {
+        scanner.MoveNext(true, true);
+        if (scanner.Current == ']')
+            return new ListTag(name, TagType.End);
+
+        if (scanner.Peek() == ';')
+        {
+            // This is an array of integer values
+            var prefix = scanner.Current;
+            scanner.Position += 2;
+            return prefix switch
+            {
+                'B' => new ByteArrayTag(name, ParseArrayValues<byte>(ref scanner)),
+                'I' => new IntArrayTag(name, ParseArrayValues<int>(ref scanner)),
+                'L' => new LongArrayTag(name, ParseArrayValues<long>(ref scanner)),
+                _ => throw scanner.SyntaxError($"Invalid type specifier. Expected 'B', 'I', or 'L', got '{prefix}'.")
+            };
+        }
+
+        // No prefix, so this must be a list of tags if valid
+        return ParseList(name, ref scanner);
+    }
+
+    private static Tag ParseList(string? name, ref Scanner scanner)
+    {
+        var list = new List<Tag>();
+        while (true)
+        {
+            var child = ParseTag(null, ref scanner);
+            list.Add(child);
+        
+            if (char.IsWhiteSpace(scanner.Current))
+                scanner.MoveNext(true, true);
+            
+            // Comma encountered, read another tag.
+            if (scanner.Current == ',')
+            {
+                scanner.MoveNext(true, true);
+                continue;
+            }
+
+            // Closing brace encountered, break loop.
+            if (scanner.Current == ']')
+            {
+                scanner.MoveNext(true, false);
+                break;
+            }
+            
+            // Invalid character
+            scanner.SyntaxError($"Expected ',' or ']', got '{scanner.Current}'.");
+        }
+
+        var childType = list.Count > 0 ? list[0].Type : TagType.End;
+        return new ListTag(name, childType, list);
+    }
+
+    private static T[] ParseArrayValues<T>(ref Scanner scanner) where T : INumber<T>, IParsable<T>
+    {
+        // Early-out for []
+        if (scanner.Current == ']')
+        {
+            scanner.Position++;
+            return Array.Empty<T>();
+        }
+
+        var start = scanner.Position;
+        while (scanner.MoveNext(true, true))
+        {
+            var c = char.ToLowerInvariant(scanner.Current);
+            if (c == ']')
+                break;
+            if (char.IsNumber(c) || c == ',')
+                continue;
+            if (c is not ('b' or 'l'))
+                scanner.SyntaxError($"Invalid character '{c}' in integer array.");
+        }
+
+        var span = scanner.Source.Slice(start, scanner.Position - start);
+        var strings = new string(span).Split(SplitSeparators, SplitOpts);
+        
+        var values = new T[strings.Length];
+        for (var i = 0; i < values.Length; i++)
+            values[i] = T.Parse(strings[i], NumberStyles.Integer, NumberFormatInfo.InvariantInfo);
+
+        scanner.Position++; // Consume the closing ']'
+        return values;
+    }
+    
+    private static bool AllowedInUnquoted(char c)
+    {
+        return c == '_' || c == '-' ||
+               c == '.' || c == '+' ||
+               c >= '0' && c <= '9' ||
+               c >= 'A' && c <= 'Z' ||
+               c >= 'a' && c <= 'z';
+    }
+    
+    private const StringSplitOptions SplitOpts = StringSplitOptions.TrimEntries | StringSplitOptions.RemoveEmptyEntries;
+    private static readonly char[] SplitSeparators = new[] { ',', 'b', 'B', 'l', 'L' };
 }
--- a/SharpNBT/SNBT/Token.cs
+++ b/SharpNBT/SNBT/Token.cs
@ -1,34 +0,0 @@
-using JetBrains.Annotations;
-
-namespace SharpNBT.SNBT;
-
-/// <summary>
-/// An object emitted by the lexer to describe a logical fragment of code that can be parsed.
-/// </summary>
-[PublicAPI]
-public sealed class Token
-{
-    /// <summary>
-    /// Gets a value describing the general type code fragment this <see cref="Token"/> represents.
-    /// </summary>
-    public TokenType Type { get; }
-
-    /// <summary>
-    /// Gets a value of this fragment, which can vary depending on context and the <see cref="Type"/>.
-    /// </summary>
-    public string Value { get; }
-
-    /// <summary>
-    /// Creates a new instance of the <see cref="Token"/> class.
-    /// </summary>
-    /// <param name="type">A value describing the general type code fragment this <see cref="Token"/> represents.</param>
-    /// <param name="value">Ahe value of this code fragment.</param>
-    public Token(TokenType type,  string value)
-    {
-        Type = type;
-        Value = value;
-    }
-
-    /// <inheritdoc />
-    public override string ToString() => $"[{Type}] \"{Value}\"";
-}
--- a/SharpNBT/SNBT/TokenType.cs
+++ b/SharpNBT/SNBT/TokenType.cs
@ -1,102 +0,0 @@
-using JetBrains.Annotations;
-
-namespace SharpNBT.SNBT;
-
-/// <summary>
-/// Describes types of tokens that the SNBT lexer can emit.
-/// </summary>
-[PublicAPI]
-public enum TokenType
-{
-    /// <summary>
-    /// Any whitespace/newline not found within a string or identifier.
-    /// </summary>
-    /// <remarks>This type is not yielded during tokenization.</remarks>
-    Whitespace,
-        
-    /// <summary>
-    /// A separator between objects and array elements.
-    /// </summary>
-    /// <remarks>This type is not yielded during tokenization.</remarks>
-    Separator,
-        
-    /// <summary>
-    /// The beginning of new <see cref="CompoundTag"/> object.
-    /// </summary>
-    Compound,
-        
-    /// <summary>
-    /// The end of a <see cref="CompoundTag"/>.
-    /// </summary>
-    EndCompound,
-        
-    /// <summary>
-    /// The name of an tag.
-    /// </summary>
-    Identifier,
-        
-    /// <summary>
-    /// A <see cref="StringTag"/> value, which may be contain escaped quotes.
-    /// </summary>
-    String,
-        
-    /// <summary>
-    /// The beginning of a <see cref="ByteArrayTag"/>.
-    /// </summary>
-    ByteArray,
-        
-    /// <summary>
-    /// The beginning of a <see cref="IntArrayTag"/>.
-    /// </summary>
-    IntArray,
-        
-    /// <summary>
-    /// The beginning of a <see cref="LongArrayTag"/>.
-    /// </summary>
-    LongArray,
-        
-    /// <summary>
-    /// The beginning of a <see cref="ListTag"/>.
-    /// </summary>
-    List,
-        
-    /// <summary>
-    /// The end of a <see cref="ByteArrayTag"/>, <see cref="IntArrayTag"/>, <see cref="LongArrayTag"/> or <see cref="ListTag"/>.
-    /// </summary>
-    EndArray,
-        
-    /// <summary>
-    /// A <see cref="ByteTag"/> value or element of a <see cref="ByteArrayTag"/> depending on context.
-    /// </summary>
-    Byte,
-        
-    /// <summary>
-    /// A <see cref="BoolTag"/> value.
-    /// </summary>
-    Bool,
-        
-    /// <summary>
-    /// A <see cref="ShortTag"/> value.
-    /// </summary>
-    Short,
-        
-    /// <summary>
-    /// A <see cref="IntTag"/> value or element of a <see cref="IntArrayTag"/> depending on context.
-    /// </summary>
-    Int,
-        
-    /// <summary>
-    /// A <see cref="LongTag"/> value or element of a <see cref="LongArrayTag"/> depending on context.
-    /// </summary>
-    Long,
-        
-    /// <summary>
-    /// A <see cref="FloatTag"/> value.
-    /// </summary>
-    Float,
-        
-    /// <summary>
-    /// A <see cref="DoubleTag"/> value.
-    /// </summary>
-    Double
-}
--- a/SharpNBT/Tags/ArrayTag.cs
+++ b/SharpNBT/Tags/ArrayTag.cs
@ -0,0 +1,6 @@
+namespace SharpNBT;
+
+public class ArrayTag
+{
+    
+}
--- a/SharpNBT/Tags/NumericTag.cs
+++ b/SharpNBT/Tags/NumericTag.cs
@ -0,0 +1,83 @@
+using System;
+using System.Collections.Generic;
+using System.Numerics;
+using System.Runtime.Serialization;
+using JetBrains.Annotations;
+
+namespace SharpNBT;
+
+/// <summary>
+/// Abstract base class for <see cref="Tag"/> types that contain a single numeric value.
+/// </summary>
+/// <typeparam name="T">A value type that implements <see cref="INumber{TSelf}"/>.</typeparam>
+[PublicAPI][Serializable]
+public abstract class NumericTag<T> : Tag, IEquatable<NumericTag<T>>, IComparable<NumericTag<T>>, IComparable where T : unmanaged, INumber<T>
+{
+    public T Value { get; set;  }
+
+    protected NumericTag(TagType type, string? name, T value) : base(type, name)
+    {
+        Value = value;
+    }
+
+    protected NumericTag(SerializationInfo info, StreamingContext context) : base(info, context)
+    {
+        var value = info.GetValue("value", typeof(T));
+        Value = value is null ? default : (T)value;
+    }
+
+    public bool Equals(NumericTag<T>? other)
+    {
+        if (ReferenceEquals(null, other)) return false;
+        if (ReferenceEquals(this, other)) return true;
+        return base.Equals(other) && Value.Equals(other.Value);
+    }
+
+    public override bool Equals(object? obj)
+    {
+        if (ReferenceEquals(null, obj)) return false;
+        if (ReferenceEquals(this, obj)) return true;
+        if (obj.GetType() != this.GetType()) return false;
+        return Equals((NumericTag<T>)obj);
+    }
+
+    public override int GetHashCode() => base.GetHashCode();
+
+    public int CompareTo(NumericTag<T>? other)
+    {
+        if (ReferenceEquals(this, other)) return 0;
+        if (ReferenceEquals(null, other)) return 1;
+        return Value.CompareTo(other.Value);
+    }
+
+    public int CompareTo(object? obj)
+    {
+        if (ReferenceEquals(null, obj)) return 1;
+        if (ReferenceEquals(this, obj)) return 0;
+        return obj is NumericTag<T> other ? CompareTo(other) : throw new ArgumentException($"Object must be of type {nameof(NumericTag<T>)}");
+    }
+    
+    public static bool operator ==(NumericTag<T>? left, NumericTag<T>? right) => Equals(left, right);
+
+    public static bool operator !=(NumericTag<T>? left, NumericTag<T>? right) => !Equals(left, right);
+
+    public static bool operator <(NumericTag<T>? left, NumericTag<T>? right)
+    {
+        return Comparer<NumericTag<T>>.Default.Compare(left, right) < 0;
+    }
+
+    public static bool operator >(NumericTag<T>? left, NumericTag<T>? right)
+    {
+        return Comparer<NumericTag<T>>.Default.Compare(left, right) > 0;
+    }
+
+    public static bool operator <=(NumericTag<T>? left, NumericTag<T>? right)
+    {
+        return Comparer<NumericTag<T>>.Default.Compare(left, right) <= 0;
+    }
+
+    public static bool operator >=(NumericTag<T>? left, NumericTag<T>? right)
+    {
+        return Comparer<NumericTag<T>>.Default.Compare(left, right) >= 0;
+    }
+}