Basic lexer added

Supports splitting by whitespace Supports escaping into string mode and returning back to whitespace splitting mode
2021-03-02 22:26:35 +02:00 · 2021-03-02 22:26:35 +02:00 · 1f103bf190
parent c63701da6d
commit 1f103bf190
2 changed files with 106 additions and 1 deletions
--- a/source/tlang/commandline/lexer.d
+++ b/source/tlang/commandline/lexer.d
@ -0,0 +1,87 @@
+module compiler.lexer;
+
+public final class Lexer
+{
+    /* The source to be lexed */
+    private string sourceCode;
+
+    /* The tokens */
+    private string[] tokens;
+
+    this(string sourceCode)
+    {
+        this.sourceCode = sourceCode;
+    }
+
+    /* Perform the lexing process */
+    public void performLex()
+    {
+        string[] currentTokens;
+        string currentToken;
+        ulong position;
+        char currentChar;
+
+        bool stringMode;
+
+        while(position != sourceCode.length)
+        {
+            currentChar = sourceCode[position];
+
+            if(currentChar == ' ' && !stringMode)
+            {
+                /* TODO: Check if current token is fulled, then flush */
+                if(currentToken.length != 0)
+                {
+                    currentTokens ~= currentToken;
+                    currentToken = "";
+                }
+
+                position++;
+            }
+            else if(currentChar == '"')
+            {
+                /* If we are not in string mode */
+                if(!stringMode)
+                {
+                    /* Add the opening " to the token */
+                    currentToken ~= '"';
+
+                    /* Enable string mode */
+                    stringMode = true;
+                }
+                /* If we are in string mode */
+                else
+                {
+                    /* Add the closing " to the token */
+                    currentToken ~= '"';
+
+                    /* Flush the token */
+                    currentTokens ~= currentToken;
+                    currentToken = "";
+
+                    /* Get out of string mode */
+                    stringMode = false;
+                }
+
+                position++;
+            }
+            else
+            {
+                currentToken ~= currentChar;
+                position++;
+            }
+        }
+
+        /* When we end we don't flush the last token, flush it now */
+        currentTokens ~= currentToken;
+
+
+        tokens = currentTokens;
+    }
+
+    /* Return the tokens */
+    public string[] getTokens()
+    {
+        return tokens;
+    }
+}
--- a/source/tlang/compiler/compiler.d
+++ b/source/tlang/compiler/compiler.d
@ -1,6 +1,24 @@
 module compiler.compiler;

-void beginCompilation(string[] sourceFile)
+import gogga;
+import std.conv : to;
+import compiler.lexer;
+
+void beginCompilation(string[] sourceFiles)
 {
    /* TODO: Begin compilation process, take in data here */
+    gprintln("Compiling files "~to!(string)(sourceFiles)~" ...");
+
+    Lexer[] lexers;
+    foreach(string sourceFile; sourceFiles)
+    {
+        gprintln("Performing tokenization on '"~sourceFile~"' ...");
+
+        /* TODO: Open source file */
+        string sourceCode = "hello \"world\";";
+        Lexer currentLexer = new Lexer(sourceCode);
+        currentLexer.performLex();
+        
+        gprintln("Collected "~to!(string)(currentLexer.getTokens()));
+    }
 }