From 31aac628b6604d00db9690e0faaccbf116fbfb09 Mon Sep 17 00:00:00 2001
From: ForeverZer0 <Zvezda2013!Svitlana>
Date: Tue, 31 Aug 2021 19:33:57 -0400
Subject: [PATCH] Fixed formatting with ByteArray and LongArray

---
 SharpNBT.Tests/StringifiedTest.cs |  25 +++++
 SharpNBT/SNBT/Lexer.cs            | 153 +++++++++++++++++++++++-------
 SharpNBT/Tags/ByteArrayTag.cs     |   8 +-
 SharpNBT/Tags/LongArrayTag.cs     |  10 +-
 SharpNBT/Tags/Tag.cs              |   2 +
 5 files changed, 161 insertions(+), 37 deletions(-)

diff --git a/SharpNBT.Tests/StringifiedTest.cs b/SharpNBT.Tests/StringifiedTest.cs
index 4ec3584..05b7adc 100644
--- a/SharpNBT.Tests/StringifiedTest.cs
+++ b/SharpNBT.Tests/StringifiedTest.cs
@@ -1,3 +1,6 @@
+using System.IO;
+using Microsoft.VisualStudio.TestPlatform.ObjectModel;
+using SharpNBT.SNBT;
 using Xunit;
 using Xunit.Abstractions;
 
@@ -26,5 +29,27 @@ namespace SharpNBT.Tests
             var tag = TestHelper.GetTag("hello_world.nbt", CompressionType.None);
             output.WriteLine(tag.Stringify(true));
         }
+
+        [Fact]
+        public void ParseSmall()
+        {
+            const string testString = "{name1:123,name2:\"sometext1\",name3:{subname1:456,subname2:\"sometext2\"}}";
+            var lexer = new Lexer();
+            foreach (var token in lexer.Tokenize(testString))
+            {
+                output.WriteLine($"{token.Type}: \"{token.Match.Trim()}\"");
+            }
+        }
+
+        [Fact]
+        public void ParseBig()
+        {
+            var testString = File.ReadAllText("/code/ruby/craftbook-nbt/test/bigtest.snbt");
+            var lexer = new Lexer();
+            foreach (var token in lexer.Tokenize(testString))
+            {
+                output.WriteLine($"{token.Type}: \"{token.Match.Trim()}\"");
+            }
+        }
     }
 }
\ No newline at end of file
diff --git a/SharpNBT/SNBT/Lexer.cs b/SharpNBT/SNBT/Lexer.cs
index c6b40aa..7f620f9 100644
--- a/SharpNBT/SNBT/Lexer.cs
+++ b/SharpNBT/SNBT/Lexer.cs
@@ -1,7 +1,10 @@
 using System;
 using System.Collections.Generic;
 using System.IO;
+using System.Reflection.Emit;
+using System.Security;
 using System.Text.RegularExpressions;
+using JetBrains.Annotations;
 
 namespace SharpNBT.SNBT
 {
@@ -26,70 +29,152 @@ namespace SharpNBT.SNBT
         Long,
         Int,
         WhiteSpace,
-        Char
+        Char,
+        EscapedChar
     }
 
+
     internal sealed class LexerRule
     {
-        internal delegate string PostProcessHandler(string input);
+
+        internal delegate string PostProcessHandler(Match match);
         
-        private readonly Regex regex;
+        public Regex Matcher { get; }
         
         public TokenType Type { get; }
         
-        public LexerRule(TokenType type, string pattern) : this(type, new Regex(pattern, RegexOptions.Multiline | RegexOptions.CultureInvariant))
-        {
-        }
+        public string Description { get; }
 
-        public LexerRule(TokenType type, Regex regex)
+        public string PostProcess(Match match) => handler?.Invoke(match) ?? match.Value;
+
+        private readonly PostProcessHandler handler;
+        
+        public LexerRule(TokenType type, string description, string pattern, [CanBeNull] PostProcessHandler process)
         {
             Type = type;
-            this.regex = regex ?? throw new ArgumentNullException(nameof(regex));
+            Description = description;
+            Matcher = new Regex(pattern, RegexOptions.Multiline | RegexOptions.CultureInvariant);
+            handler = process;
         }
 
-        public LexerRule(TokenType type, Regex regex, PostProcessHandler handler)
+        // public LexerRule(TokenType type, string description, Regex regex)
+        // {
+        //     Description = description;
+        //     Type = type;
+        //     
+        // }
+    }
+    
+    internal sealed class Token
+    {
+        public TokenType Type { get; }
+        
+        public string Match { get; }
+
+        public Token(TokenType type, string match)
         {
-            
+            Type = type;
+            Type = type;
+            Match = match;
         }
     }
 
     internal class Lexer
     {
+        private static readonly string DoubleQuoteIdentifier = "\"(.*?)\"\\s*(?=:)";
+
         private static readonly List<LexerRule> rules;
         
+
+        private const string IDENTIFIER_DOUBLE_QUOTES = "\".*?\"\\s*(?>:)";
+        private const string IDENTIFIER_SINGLE_QUOTES = "'.*?'\\s*(?>:)";
+        private const string IDENTIFIER_NO_QUOTES = @"[A-Za-z0-9_-]+\s*(?=:)";
+
+        private const string STRING_DOUBLE_QUOTED = "^\\s*\".*?\"";
+        private const string STRING_SINGLE_QUOTED = "^\\s*'.*?'";
+
+        private const string COMPOUND_START = "\\s*{\\s*";
+        private const string COMPOUND_END = @"\}";
+
+
+        private const string SEPARATOR = "^\\s*:\\s*";
+        private const string COMMA = "^\\s*,\\s*";
+        
+        
         static Lexer()
         {
             rules = new List<LexerRule>
             {
-                new LexerRule(TokenType.CompoundBegin, @"\{[\s]*"),
-                new LexerRule(TokenType.CompoundEnd, @"[\s]*\}"),
-                new LexerRule(TokenType.Identifier, "\".+?\"(?=:)"),
-                new LexerRule(TokenType.Identifier, "'.+?'(?=:) "),
-                new LexerRule(TokenType.Identifier, "A-Za-z0-9_-]+?(?=:) "),
-                new LexerRule(TokenType.String, "\".*?\""),
-                new LexerRule(TokenType.String, "'.*?'"),
-                new LexerRule(TokenType.Separator, @"[\s]*:[\s]*"),
-                new LexerRule(TokenType.Comma, @"[\s]*,[\s]*"),
-                new LexerRule(TokenType.ByteArray, @"\[B;[\s]*?"),
-                new LexerRule(TokenType.IntArray, @"\[I;[\s]*?"),
-                new LexerRule(TokenType.LongArray, @"\[L;[\s]*?"),
-                new LexerRule(TokenType.ListArray, @"\[[\s]*?"),
-                new LexerRule(TokenType.EndArray, @"[\s]*\]"),
-                new LexerRule(TokenType.Float, @"-?[0-9]*\.[0-9]+[Ff]"),
-                new LexerRule(TokenType.Double, @"-?[0-9]*\.[0-9]+[Dd]?"),
-                new LexerRule(TokenType.Byte, "-?([0-9]+)[Bb]"),
-                new LexerRule(TokenType.Short, "-?([0-9]+)[Ss]"),
-                new LexerRule(TokenType.Long, "-?([0-9]+)[Ll]"),
-                new LexerRule(TokenType.Int, "-?([0-9]+)"),
-                new LexerRule(TokenType.WhiteSpace, @"[\s]+"),
-                new LexerRule(TokenType.String, @"[\S]+"),
-                new LexerRule(TokenType.Char, ".")
+                new LexerRule(TokenType.CompoundBegin, "Opening Compound brace", "^{", null),
+                new LexerRule(TokenType.WhiteSpace, "Useless whitespace", @"^[\s]+", null),
+                
+                new LexerRule(TokenType.Identifier, "Single-quoted name",  "^\\s*'(.*?)'\\s*(?=:)", m => m.Groups[1].Value),
+                new LexerRule(TokenType.Identifier, "Double-quoted name",  "^\\s*\"(.*?)\"\\s*(?=:)", m => m.Groups[1].Value),
+                new LexerRule(TokenType.Identifier, "Unquoted name",  "^\\s*([A-Za-z0-9_-]+)\\s*(?=:)", m => m.Groups[1].Value),
+                
+                
+                new LexerRule(TokenType.String, "Double-quoted string value", "^\"(.*?)\"", null),
+                new LexerRule(TokenType.String, "Single-quoted string value", "^'(.*?)'", null)
+
+                // new LexerRule(TokenType.CompoundBegin, COMPOUND_START),
+                // new LexerRule(TokenType.CompoundEnd, COMPOUND_END),
+                // new LexerRule(TokenType.Identifier, IDENTIFIER_DOUBLE_QUOTES),
+                // new LexerRule(TokenType.Identifier, IDENTIFIER_SINGLE_QUOTES),
+                // new LexerRule(TokenType.Identifier, IDENTIFIER_NO_QUOTES),
+                // new LexerRule(TokenType.String, STRING_DOUBLE_QUOTED),
+                // new LexerRule(TokenType.String, STRING_SINGLE_QUOTED),
+                // new LexerRule(TokenType.Separator, SEPARATOR),
+                // new LexerRule(TokenType.Comma, COMMA),
+                // new LexerRule(TokenType.ByteArray, @"\[B;[\s]*?"),
+                // new LexerRule(TokenType.IntArray, @"\[I;[\s]*?"),
+                // new LexerRule(TokenType.LongArray, @"\[L;[\s]*?"),
+                // new LexerRule(TokenType.ListArray, @"\[[\s]*?"),
+                // new LexerRule(TokenType.EndArray, @"[\s]*\]"),
+                // new LexerRule(TokenType.Float, @"-?[0-9]*\.[0-9]+[Ff]"),
+                // new LexerRule(TokenType.Double, @"-?[0-9]*\.[0-9]+[Dd]?"),
+                // new LexerRule(TokenType.Byte, "-?([0-9]+)[Bb]"),
+                // new LexerRule(TokenType.Short, "-?([0-9]+)[Ss]"),
+                // new LexerRule(TokenType.Long, "-?([0-9]+)[Ll]"),
+                // new LexerRule(TokenType.Int, "-?([0-9]+)"),
+                // new LexerRule(TokenType.WhiteSpace, @"[\s]+"),
+                // new LexerRule(TokenType.String, @"[\S]+"),
+                // new LexerRule(TokenType.Char, ".")
             };
         }
-        
+
+        private static string Process(Match match)
+        {
+            throw new NotImplementedException();
+        }
+
         public Lexer()
         {
             
         }
+        
+        public IEnumerable<Token> Tokenize(string input)
+        {
+            string.Create(input.Length, input, (span, i) =>
+            {
+                
+            });
+            var pos = 0;
+
+            do
+            {
+                Label:
+                foreach (var rule in rules)
+                {
+                    var match = rule.Matcher.Match(input, pos);
+                    if (match.Success)
+                    {
+                        yield return new Token(rule.Type, rule.PostProcess(match));
+                        pos = match.Index + match.Length - 1;
+                        break;
+                    }
+                }
+            } while (++pos < input.Length);
+            
+        }
     }
 }
\ No newline at end of file
diff --git a/SharpNBT/Tags/ByteArrayTag.cs b/SharpNBT/Tags/ByteArrayTag.cs
index 823c836..4638e7f 100644
--- a/SharpNBT/Tags/ByteArrayTag.cs
+++ b/SharpNBT/Tags/ByteArrayTag.cs
@@ -63,6 +63,12 @@ namespace SharpNBT
         /// </summary>
         /// <returns>This NBT tag in SNBT format.</returns>
         /// <seealso href="https://minecraft.fandom.com/wiki/NBT_format#SNBT_format"/>
-        public override string Stringify() => $"{StringifyName}[B;{string.Join(',', this)}]";
+        public override string Stringify()
+        {
+            var values = new string[Count];
+            for (var i = 0; i < Count; i++)
+                values[i] = $"{this[i]}b";
+            return $"{StringifyName}[B;{string.Join(',', values)}]";
+        }
     }
 }
\ No newline at end of file
diff --git a/SharpNBT/Tags/LongArrayTag.cs b/SharpNBT/Tags/LongArrayTag.cs
index f7c3178..a6d0f09 100644
--- a/SharpNBT/Tags/LongArrayTag.cs
+++ b/SharpNBT/Tags/LongArrayTag.cs
@@ -60,12 +60,18 @@ namespace SharpNBT
             var word = Count == 1 ? Strings.WordElement : Strings.WordElements;
             return $"TAG_Long_Array({PrettyName}): [{Count} {word}]";
         }
-        
+
         /// <summary>
         /// Gets the <i>string</i> representation of this NBT tag (SNBT).
         /// </summary>
         /// <returns>This NBT tag in SNBT format.</returns>
         /// <seealso href="https://minecraft.fandom.com/wiki/NBT_format#SNBT_format"/>
-        public override string Stringify() => $"{StringifyName}[L;{string.Join(',', this)}]";
+        public override string Stringify()
+        {
+            var values = new string[Count];
+            for (var i = 0; i < Count; i++)
+                values[i] = $"{this[i]}l";
+            return $"{StringifyName}[L;{string.Join(',', values)}]";
+        }
     }
 }
\ No newline at end of file
diff --git a/SharpNBT/Tags/Tag.cs b/SharpNBT/Tags/Tag.cs
index f518943..6f592f3 100644
--- a/SharpNBT/Tags/Tag.cs
+++ b/SharpNBT/Tags/Tag.cs
@@ -1,6 +1,7 @@
 ﻿using System;
 using System.Collections.Generic;
 using System.IO;
+using System.Runtime.CompilerServices;
 using System.Runtime.Serialization;
 using System.Runtime.Serialization.Json;
 using System.Text;
@@ -8,6 +9,7 @@ using System.Text.RegularExpressions;
 using JetBrains.Annotations;
 
 [assembly: CLSCompliant(true)]
+[assembly: InternalsVisibleTo("SharpNBT.Tests")]
 
 namespace SharpNBT
 {