From 31aac628b6604d00db9690e0faaccbf116fbfb09 Mon Sep 17 00:00:00 2001 From: ForeverZer0 Date: Tue, 31 Aug 2021 19:33:57 -0400 Subject: [PATCH] Fixed formatting with ByteArray and LongArray --- SharpNBT.Tests/StringifiedTest.cs | 25 +++++ SharpNBT/SNBT/Lexer.cs | 153 +++++++++++++++++++++++------- SharpNBT/Tags/ByteArrayTag.cs | 8 +- SharpNBT/Tags/LongArrayTag.cs | 10 +- SharpNBT/Tags/Tag.cs | 2 + 5 files changed, 161 insertions(+), 37 deletions(-) diff --git a/SharpNBT.Tests/StringifiedTest.cs b/SharpNBT.Tests/StringifiedTest.cs index 4ec3584..05b7adc 100644 --- a/SharpNBT.Tests/StringifiedTest.cs +++ b/SharpNBT.Tests/StringifiedTest.cs @@ -1,3 +1,6 @@ +using System.IO; +using Microsoft.VisualStudio.TestPlatform.ObjectModel; +using SharpNBT.SNBT; using Xunit; using Xunit.Abstractions; @@ -26,5 +29,27 @@ namespace SharpNBT.Tests var tag = TestHelper.GetTag("hello_world.nbt", CompressionType.None); output.WriteLine(tag.Stringify(true)); } + + [Fact] + public void ParseSmall() + { + const string testString = "{name1:123,name2:\"sometext1\",name3:{subname1:456,subname2:\"sometext2\"}}"; + var lexer = new Lexer(); + foreach (var token in lexer.Tokenize(testString)) + { + output.WriteLine($"{token.Type}: \"{token.Match.Trim()}\""); + } + } + + [Fact] + public void ParseBig() + { + var testString = File.ReadAllText("/code/ruby/craftbook-nbt/test/bigtest.snbt"); + var lexer = new Lexer(); + foreach (var token in lexer.Tokenize(testString)) + { + output.WriteLine($"{token.Type}: \"{token.Match.Trim()}\""); + } + } } } \ No newline at end of file diff --git a/SharpNBT/SNBT/Lexer.cs b/SharpNBT/SNBT/Lexer.cs index c6b40aa..7f620f9 100644 --- a/SharpNBT/SNBT/Lexer.cs +++ b/SharpNBT/SNBT/Lexer.cs @@ -1,7 +1,10 @@ using System; using System.Collections.Generic; using System.IO; +using System.Reflection.Emit; +using System.Security; using System.Text.RegularExpressions; +using JetBrains.Annotations; namespace SharpNBT.SNBT { @@ -26,70 +29,152 @@ namespace SharpNBT.SNBT Long, Int, WhiteSpace, - Char + Char, + EscapedChar } + internal sealed class LexerRule { - internal delegate string PostProcessHandler(string input); + + internal delegate string PostProcessHandler(Match match); - private readonly Regex regex; + public Regex Matcher { get; } public TokenType Type { get; } - public LexerRule(TokenType type, string pattern) : this(type, new Regex(pattern, RegexOptions.Multiline | RegexOptions.CultureInvariant)) - { - } + public string Description { get; } - public LexerRule(TokenType type, Regex regex) + public string PostProcess(Match match) => handler?.Invoke(match) ?? match.Value; + + private readonly PostProcessHandler handler; + + public LexerRule(TokenType type, string description, string pattern, [CanBeNull] PostProcessHandler process) { Type = type; - this.regex = regex ?? throw new ArgumentNullException(nameof(regex)); + Description = description; + Matcher = new Regex(pattern, RegexOptions.Multiline | RegexOptions.CultureInvariant); + handler = process; } - public LexerRule(TokenType type, Regex regex, PostProcessHandler handler) + // public LexerRule(TokenType type, string description, Regex regex) + // { + // Description = description; + // Type = type; + // + // } + } + + internal sealed class Token + { + public TokenType Type { get; } + + public string Match { get; } + + public Token(TokenType type, string match) { - + Type = type; + Type = type; + Match = match; } } internal class Lexer { + private static readonly string DoubleQuoteIdentifier = "\"(.*?)\"\\s*(?=:)"; + private static readonly List rules; + + private const string IDENTIFIER_DOUBLE_QUOTES = "\".*?\"\\s*(?>:)"; + private const string IDENTIFIER_SINGLE_QUOTES = "'.*?'\\s*(?>:)"; + private const string IDENTIFIER_NO_QUOTES = @"[A-Za-z0-9_-]+\s*(?=:)"; + + private const string STRING_DOUBLE_QUOTED = "^\\s*\".*?\""; + private const string STRING_SINGLE_QUOTED = "^\\s*'.*?'"; + + private const string COMPOUND_START = "\\s*{\\s*"; + private const string COMPOUND_END = @"\}"; + + + private const string SEPARATOR = "^\\s*:\\s*"; + private const string COMMA = "^\\s*,\\s*"; + + static Lexer() { rules = new List { - new LexerRule(TokenType.CompoundBegin, @"\{[\s]*"), - new LexerRule(TokenType.CompoundEnd, @"[\s]*\}"), - new LexerRule(TokenType.Identifier, "\".+?\"(?=:)"), - new LexerRule(TokenType.Identifier, "'.+?'(?=:) "), - new LexerRule(TokenType.Identifier, "A-Za-z0-9_-]+?(?=:) "), - new LexerRule(TokenType.String, "\".*?\""), - new LexerRule(TokenType.String, "'.*?'"), - new LexerRule(TokenType.Separator, @"[\s]*:[\s]*"), - new LexerRule(TokenType.Comma, @"[\s]*,[\s]*"), - new LexerRule(TokenType.ByteArray, @"\[B;[\s]*?"), - new LexerRule(TokenType.IntArray, @"\[I;[\s]*?"), - new LexerRule(TokenType.LongArray, @"\[L;[\s]*?"), - new LexerRule(TokenType.ListArray, @"\[[\s]*?"), - new LexerRule(TokenType.EndArray, @"[\s]*\]"), - new LexerRule(TokenType.Float, @"-?[0-9]*\.[0-9]+[Ff]"), - new LexerRule(TokenType.Double, @"-?[0-9]*\.[0-9]+[Dd]?"), - new LexerRule(TokenType.Byte, "-?([0-9]+)[Bb]"), - new LexerRule(TokenType.Short, "-?([0-9]+)[Ss]"), - new LexerRule(TokenType.Long, "-?([0-9]+)[Ll]"), - new LexerRule(TokenType.Int, "-?([0-9]+)"), - new LexerRule(TokenType.WhiteSpace, @"[\s]+"), - new LexerRule(TokenType.String, @"[\S]+"), - new LexerRule(TokenType.Char, ".") + new LexerRule(TokenType.CompoundBegin, "Opening Compound brace", "^{", null), + new LexerRule(TokenType.WhiteSpace, "Useless whitespace", @"^[\s]+", null), + + new LexerRule(TokenType.Identifier, "Single-quoted name", "^\\s*'(.*?)'\\s*(?=:)", m => m.Groups[1].Value), + new LexerRule(TokenType.Identifier, "Double-quoted name", "^\\s*\"(.*?)\"\\s*(?=:)", m => m.Groups[1].Value), + new LexerRule(TokenType.Identifier, "Unquoted name", "^\\s*([A-Za-z0-9_-]+)\\s*(?=:)", m => m.Groups[1].Value), + + + new LexerRule(TokenType.String, "Double-quoted string value", "^\"(.*?)\"", null), + new LexerRule(TokenType.String, "Single-quoted string value", "^'(.*?)'", null) + + // new LexerRule(TokenType.CompoundBegin, COMPOUND_START), + // new LexerRule(TokenType.CompoundEnd, COMPOUND_END), + // new LexerRule(TokenType.Identifier, IDENTIFIER_DOUBLE_QUOTES), + // new LexerRule(TokenType.Identifier, IDENTIFIER_SINGLE_QUOTES), + // new LexerRule(TokenType.Identifier, IDENTIFIER_NO_QUOTES), + // new LexerRule(TokenType.String, STRING_DOUBLE_QUOTED), + // new LexerRule(TokenType.String, STRING_SINGLE_QUOTED), + // new LexerRule(TokenType.Separator, SEPARATOR), + // new LexerRule(TokenType.Comma, COMMA), + // new LexerRule(TokenType.ByteArray, @"\[B;[\s]*?"), + // new LexerRule(TokenType.IntArray, @"\[I;[\s]*?"), + // new LexerRule(TokenType.LongArray, @"\[L;[\s]*?"), + // new LexerRule(TokenType.ListArray, @"\[[\s]*?"), + // new LexerRule(TokenType.EndArray, @"[\s]*\]"), + // new LexerRule(TokenType.Float, @"-?[0-9]*\.[0-9]+[Ff]"), + // new LexerRule(TokenType.Double, @"-?[0-9]*\.[0-9]+[Dd]?"), + // new LexerRule(TokenType.Byte, "-?([0-9]+)[Bb]"), + // new LexerRule(TokenType.Short, "-?([0-9]+)[Ss]"), + // new LexerRule(TokenType.Long, "-?([0-9]+)[Ll]"), + // new LexerRule(TokenType.Int, "-?([0-9]+)"), + // new LexerRule(TokenType.WhiteSpace, @"[\s]+"), + // new LexerRule(TokenType.String, @"[\S]+"), + // new LexerRule(TokenType.Char, ".") }; } - + + private static string Process(Match match) + { + throw new NotImplementedException(); + } + public Lexer() { } + + public IEnumerable Tokenize(string input) + { + string.Create(input.Length, input, (span, i) => + { + + }); + var pos = 0; + + do + { + Label: + foreach (var rule in rules) + { + var match = rule.Matcher.Match(input, pos); + if (match.Success) + { + yield return new Token(rule.Type, rule.PostProcess(match)); + pos = match.Index + match.Length - 1; + break; + } + } + } while (++pos < input.Length); + + } } } \ No newline at end of file diff --git a/SharpNBT/Tags/ByteArrayTag.cs b/SharpNBT/Tags/ByteArrayTag.cs index 823c836..4638e7f 100644 --- a/SharpNBT/Tags/ByteArrayTag.cs +++ b/SharpNBT/Tags/ByteArrayTag.cs @@ -63,6 +63,12 @@ namespace SharpNBT /// /// This NBT tag in SNBT format. /// - public override string Stringify() => $"{StringifyName}[B;{string.Join(',', this)}]"; + public override string Stringify() + { + var values = new string[Count]; + for (var i = 0; i < Count; i++) + values[i] = $"{this[i]}b"; + return $"{StringifyName}[B;{string.Join(',', values)}]"; + } } } \ No newline at end of file diff --git a/SharpNBT/Tags/LongArrayTag.cs b/SharpNBT/Tags/LongArrayTag.cs index f7c3178..a6d0f09 100644 --- a/SharpNBT/Tags/LongArrayTag.cs +++ b/SharpNBT/Tags/LongArrayTag.cs @@ -60,12 +60,18 @@ namespace SharpNBT var word = Count == 1 ? Strings.WordElement : Strings.WordElements; return $"TAG_Long_Array({PrettyName}): [{Count} {word}]"; } - + /// /// Gets the string representation of this NBT tag (SNBT). /// /// This NBT tag in SNBT format. /// - public override string Stringify() => $"{StringifyName}[L;{string.Join(',', this)}]"; + public override string Stringify() + { + var values = new string[Count]; + for (var i = 0; i < Count; i++) + values[i] = $"{this[i]}l"; + return $"{StringifyName}[L;{string.Join(',', values)}]"; + } } } \ No newline at end of file diff --git a/SharpNBT/Tags/Tag.cs b/SharpNBT/Tags/Tag.cs index f518943..6f592f3 100644 --- a/SharpNBT/Tags/Tag.cs +++ b/SharpNBT/Tags/Tag.cs @@ -1,6 +1,7 @@ using System; using System.Collections.Generic; using System.IO; +using System.Runtime.CompilerServices; using System.Runtime.Serialization; using System.Runtime.Serialization.Json; using System.Text; @@ -8,6 +9,7 @@ using System.Text.RegularExpressions; using JetBrains.Annotations; [assembly: CLSCompliant(true)] +[assembly: InternalsVisibleTo("SharpNBT.Tests")] namespace SharpNBT {