Fixed formatting with ByteArray and LongArray
This commit is contained in:
parent
16369b68e3
commit
31aac628b6
|
@ -1,3 +1,6 @@
|
|||
using System.IO;
|
||||
using Microsoft.VisualStudio.TestPlatform.ObjectModel;
|
||||
using SharpNBT.SNBT;
|
||||
using Xunit;
|
||||
using Xunit.Abstractions;
|
||||
|
||||
|
@ -26,5 +29,27 @@ namespace SharpNBT.Tests
|
|||
var tag = TestHelper.GetTag("hello_world.nbt", CompressionType.None);
|
||||
output.WriteLine(tag.Stringify(true));
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void ParseSmall()
|
||||
{
|
||||
const string testString = "{name1:123,name2:\"sometext1\",name3:{subname1:456,subname2:\"sometext2\"}}";
|
||||
var lexer = new Lexer();
|
||||
foreach (var token in lexer.Tokenize(testString))
|
||||
{
|
||||
output.WriteLine($"{token.Type}: \"{token.Match.Trim()}\"");
|
||||
}
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void ParseBig()
|
||||
{
|
||||
var testString = File.ReadAllText("/code/ruby/craftbook-nbt/test/bigtest.snbt");
|
||||
var lexer = new Lexer();
|
||||
foreach (var token in lexer.Tokenize(testString))
|
||||
{
|
||||
output.WriteLine($"{token.Type}: \"{token.Match.Trim()}\"");
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
|
@ -1,7 +1,10 @@
|
|||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.IO;
|
||||
using System.Reflection.Emit;
|
||||
using System.Security;
|
||||
using System.Text.RegularExpressions;
|
||||
using JetBrains.Annotations;
|
||||
|
||||
namespace SharpNBT.SNBT
|
||||
{
|
||||
|
@ -26,70 +29,152 @@ namespace SharpNBT.SNBT
|
|||
Long,
|
||||
Int,
|
||||
WhiteSpace,
|
||||
Char
|
||||
Char,
|
||||
EscapedChar
|
||||
}
|
||||
|
||||
|
||||
internal sealed class LexerRule
|
||||
{
|
||||
internal delegate string PostProcessHandler(string input);
|
||||
|
||||
private readonly Regex regex;
|
||||
internal delegate string PostProcessHandler(Match match);
|
||||
|
||||
public Regex Matcher { get; }
|
||||
|
||||
public TokenType Type { get; }
|
||||
|
||||
public LexerRule(TokenType type, string pattern) : this(type, new Regex(pattern, RegexOptions.Multiline | RegexOptions.CultureInvariant))
|
||||
{
|
||||
}
|
||||
public string Description { get; }
|
||||
|
||||
public LexerRule(TokenType type, Regex regex)
|
||||
public string PostProcess(Match match) => handler?.Invoke(match) ?? match.Value;
|
||||
|
||||
private readonly PostProcessHandler handler;
|
||||
|
||||
public LexerRule(TokenType type, string description, string pattern, [CanBeNull] PostProcessHandler process)
|
||||
{
|
||||
Type = type;
|
||||
this.regex = regex ?? throw new ArgumentNullException(nameof(regex));
|
||||
Description = description;
|
||||
Matcher = new Regex(pattern, RegexOptions.Multiline | RegexOptions.CultureInvariant);
|
||||
handler = process;
|
||||
}
|
||||
|
||||
public LexerRule(TokenType type, Regex regex, PostProcessHandler handler)
|
||||
{
|
||||
// public LexerRule(TokenType type, string description, Regex regex)
|
||||
// {
|
||||
// Description = description;
|
||||
// Type = type;
|
||||
//
|
||||
// }
|
||||
}
|
||||
|
||||
internal sealed class Token
|
||||
{
|
||||
public TokenType Type { get; }
|
||||
|
||||
public string Match { get; }
|
||||
|
||||
public Token(TokenType type, string match)
|
||||
{
|
||||
Type = type;
|
||||
Type = type;
|
||||
Match = match;
|
||||
}
|
||||
}
|
||||
|
||||
internal class Lexer
|
||||
{
|
||||
private static readonly string DoubleQuoteIdentifier = "\"(.*?)\"\\s*(?=:)";
|
||||
|
||||
private static readonly List<LexerRule> rules;
|
||||
|
||||
|
||||
private const string IDENTIFIER_DOUBLE_QUOTES = "\".*?\"\\s*(?>:)";
|
||||
private const string IDENTIFIER_SINGLE_QUOTES = "'.*?'\\s*(?>:)";
|
||||
private const string IDENTIFIER_NO_QUOTES = @"[A-Za-z0-9_-]+\s*(?=:)";
|
||||
|
||||
private const string STRING_DOUBLE_QUOTED = "^\\s*\".*?\"";
|
||||
private const string STRING_SINGLE_QUOTED = "^\\s*'.*?'";
|
||||
|
||||
private const string COMPOUND_START = "\\s*{\\s*";
|
||||
private const string COMPOUND_END = @"\}";
|
||||
|
||||
|
||||
private const string SEPARATOR = "^\\s*:\\s*";
|
||||
private const string COMMA = "^\\s*,\\s*";
|
||||
|
||||
|
||||
static Lexer()
|
||||
{
|
||||
rules = new List<LexerRule>
|
||||
{
|
||||
new LexerRule(TokenType.CompoundBegin, @"\{[\s]*"),
|
||||
new LexerRule(TokenType.CompoundEnd, @"[\s]*\}"),
|
||||
new LexerRule(TokenType.Identifier, "\".+?\"(?=:)"),
|
||||
new LexerRule(TokenType.Identifier, "'.+?'(?=:) "),
|
||||
new LexerRule(TokenType.Identifier, "A-Za-z0-9_-]+?(?=:) "),
|
||||
new LexerRule(TokenType.String, "\".*?\""),
|
||||
new LexerRule(TokenType.String, "'.*?'"),
|
||||
new LexerRule(TokenType.Separator, @"[\s]*:[\s]*"),
|
||||
new LexerRule(TokenType.Comma, @"[\s]*,[\s]*"),
|
||||
new LexerRule(TokenType.ByteArray, @"\[B;[\s]*?"),
|
||||
new LexerRule(TokenType.IntArray, @"\[I;[\s]*?"),
|
||||
new LexerRule(TokenType.LongArray, @"\[L;[\s]*?"),
|
||||
new LexerRule(TokenType.ListArray, @"\[[\s]*?"),
|
||||
new LexerRule(TokenType.EndArray, @"[\s]*\]"),
|
||||
new LexerRule(TokenType.Float, @"-?[0-9]*\.[0-9]+[Ff]"),
|
||||
new LexerRule(TokenType.Double, @"-?[0-9]*\.[0-9]+[Dd]?"),
|
||||
new LexerRule(TokenType.Byte, "-?([0-9]+)[Bb]"),
|
||||
new LexerRule(TokenType.Short, "-?([0-9]+)[Ss]"),
|
||||
new LexerRule(TokenType.Long, "-?([0-9]+)[Ll]"),
|
||||
new LexerRule(TokenType.Int, "-?([0-9]+)"),
|
||||
new LexerRule(TokenType.WhiteSpace, @"[\s]+"),
|
||||
new LexerRule(TokenType.String, @"[\S]+"),
|
||||
new LexerRule(TokenType.Char, ".")
|
||||
new LexerRule(TokenType.CompoundBegin, "Opening Compound brace", "^{", null),
|
||||
new LexerRule(TokenType.WhiteSpace, "Useless whitespace", @"^[\s]+", null),
|
||||
|
||||
new LexerRule(TokenType.Identifier, "Single-quoted name", "^\\s*'(.*?)'\\s*(?=:)", m => m.Groups[1].Value),
|
||||
new LexerRule(TokenType.Identifier, "Double-quoted name", "^\\s*\"(.*?)\"\\s*(?=:)", m => m.Groups[1].Value),
|
||||
new LexerRule(TokenType.Identifier, "Unquoted name", "^\\s*([A-Za-z0-9_-]+)\\s*(?=:)", m => m.Groups[1].Value),
|
||||
|
||||
|
||||
new LexerRule(TokenType.String, "Double-quoted string value", "^\"(.*?)\"", null),
|
||||
new LexerRule(TokenType.String, "Single-quoted string value", "^'(.*?)'", null)
|
||||
|
||||
// new LexerRule(TokenType.CompoundBegin, COMPOUND_START),
|
||||
// new LexerRule(TokenType.CompoundEnd, COMPOUND_END),
|
||||
// new LexerRule(TokenType.Identifier, IDENTIFIER_DOUBLE_QUOTES),
|
||||
// new LexerRule(TokenType.Identifier, IDENTIFIER_SINGLE_QUOTES),
|
||||
// new LexerRule(TokenType.Identifier, IDENTIFIER_NO_QUOTES),
|
||||
// new LexerRule(TokenType.String, STRING_DOUBLE_QUOTED),
|
||||
// new LexerRule(TokenType.String, STRING_SINGLE_QUOTED),
|
||||
// new LexerRule(TokenType.Separator, SEPARATOR),
|
||||
// new LexerRule(TokenType.Comma, COMMA),
|
||||
// new LexerRule(TokenType.ByteArray, @"\[B;[\s]*?"),
|
||||
// new LexerRule(TokenType.IntArray, @"\[I;[\s]*?"),
|
||||
// new LexerRule(TokenType.LongArray, @"\[L;[\s]*?"),
|
||||
// new LexerRule(TokenType.ListArray, @"\[[\s]*?"),
|
||||
// new LexerRule(TokenType.EndArray, @"[\s]*\]"),
|
||||
// new LexerRule(TokenType.Float, @"-?[0-9]*\.[0-9]+[Ff]"),
|
||||
// new LexerRule(TokenType.Double, @"-?[0-9]*\.[0-9]+[Dd]?"),
|
||||
// new LexerRule(TokenType.Byte, "-?([0-9]+)[Bb]"),
|
||||
// new LexerRule(TokenType.Short, "-?([0-9]+)[Ss]"),
|
||||
// new LexerRule(TokenType.Long, "-?([0-9]+)[Ll]"),
|
||||
// new LexerRule(TokenType.Int, "-?([0-9]+)"),
|
||||
// new LexerRule(TokenType.WhiteSpace, @"[\s]+"),
|
||||
// new LexerRule(TokenType.String, @"[\S]+"),
|
||||
// new LexerRule(TokenType.Char, ".")
|
||||
};
|
||||
}
|
||||
|
||||
private static string Process(Match match)
|
||||
{
|
||||
throw new NotImplementedException();
|
||||
}
|
||||
|
||||
public Lexer()
|
||||
{
|
||||
|
||||
}
|
||||
|
||||
public IEnumerable<Token> Tokenize(string input)
|
||||
{
|
||||
string.Create(input.Length, input, (span, i) =>
|
||||
{
|
||||
|
||||
});
|
||||
var pos = 0;
|
||||
|
||||
do
|
||||
{
|
||||
Label:
|
||||
foreach (var rule in rules)
|
||||
{
|
||||
var match = rule.Matcher.Match(input, pos);
|
||||
if (match.Success)
|
||||
{
|
||||
yield return new Token(rule.Type, rule.PostProcess(match));
|
||||
pos = match.Index + match.Length - 1;
|
||||
break;
|
||||
}
|
||||
}
|
||||
} while (++pos < input.Length);
|
||||
|
||||
}
|
||||
}
|
||||
}
|
|
@ -63,6 +63,12 @@ namespace SharpNBT
|
|||
/// </summary>
|
||||
/// <returns>This NBT tag in SNBT format.</returns>
|
||||
/// <seealso href="https://minecraft.fandom.com/wiki/NBT_format#SNBT_format"/>
|
||||
public override string Stringify() => $"{StringifyName}[B;{string.Join(',', this)}]";
|
||||
public override string Stringify()
|
||||
{
|
||||
var values = new string[Count];
|
||||
for (var i = 0; i < Count; i++)
|
||||
values[i] = $"{this[i]}b";
|
||||
return $"{StringifyName}[B;{string.Join(',', values)}]";
|
||||
}
|
||||
}
|
||||
}
|
|
@ -66,6 +66,12 @@ namespace SharpNBT
|
|||
/// </summary>
|
||||
/// <returns>This NBT tag in SNBT format.</returns>
|
||||
/// <seealso href="https://minecraft.fandom.com/wiki/NBT_format#SNBT_format"/>
|
||||
public override string Stringify() => $"{StringifyName}[L;{string.Join(',', this)}]";
|
||||
public override string Stringify()
|
||||
{
|
||||
var values = new string[Count];
|
||||
for (var i = 0; i < Count; i++)
|
||||
values[i] = $"{this[i]}l";
|
||||
return $"{StringifyName}[L;{string.Join(',', values)}]";
|
||||
}
|
||||
}
|
||||
}
|
|
@ -1,6 +1,7 @@
|
|||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.IO;
|
||||
using System.Runtime.CompilerServices;
|
||||
using System.Runtime.Serialization;
|
||||
using System.Runtime.Serialization.Json;
|
||||
using System.Text;
|
||||
|
@ -8,6 +9,7 @@ using System.Text.RegularExpressions;
|
|||
using JetBrains.Annotations;
|
||||
|
||||
[assembly: CLSCompliant(true)]
|
||||
[assembly: InternalsVisibleTo("SharpNBT.Tests")]
|
||||
|
||||
namespace SharpNBT
|
||||
{
|
||||
|
|
Loading…
Reference in New Issue