Basic lexer added
Supports splitting by whitespace Supports escaping into string mode and returning back to whitespace splitting mode
This commit is contained in:
parent
c63701da6d
commit
1f103bf190
|
@ -0,0 +1,87 @@
|
|||
module compiler.lexer;
|
||||
|
||||
public final class Lexer
|
||||
{
|
||||
/* The source to be lexed */
|
||||
private string sourceCode;
|
||||
|
||||
/* The tokens */
|
||||
private string[] tokens;
|
||||
|
||||
this(string sourceCode)
|
||||
{
|
||||
this.sourceCode = sourceCode;
|
||||
}
|
||||
|
||||
/* Perform the lexing process */
|
||||
public void performLex()
|
||||
{
|
||||
string[] currentTokens;
|
||||
string currentToken;
|
||||
ulong position;
|
||||
char currentChar;
|
||||
|
||||
bool stringMode;
|
||||
|
||||
while(position != sourceCode.length)
|
||||
{
|
||||
currentChar = sourceCode[position];
|
||||
|
||||
if(currentChar == ' ' && !stringMode)
|
||||
{
|
||||
/* TODO: Check if current token is fulled, then flush */
|
||||
if(currentToken.length != 0)
|
||||
{
|
||||
currentTokens ~= currentToken;
|
||||
currentToken = "";
|
||||
}
|
||||
|
||||
position++;
|
||||
}
|
||||
else if(currentChar == '"')
|
||||
{
|
||||
/* If we are not in string mode */
|
||||
if(!stringMode)
|
||||
{
|
||||
/* Add the opening " to the token */
|
||||
currentToken ~= '"';
|
||||
|
||||
/* Enable string mode */
|
||||
stringMode = true;
|
||||
}
|
||||
/* If we are in string mode */
|
||||
else
|
||||
{
|
||||
/* Add the closing " to the token */
|
||||
currentToken ~= '"';
|
||||
|
||||
/* Flush the token */
|
||||
currentTokens ~= currentToken;
|
||||
currentToken = "";
|
||||
|
||||
/* Get out of string mode */
|
||||
stringMode = false;
|
||||
}
|
||||
|
||||
position++;
|
||||
}
|
||||
else
|
||||
{
|
||||
currentToken ~= currentChar;
|
||||
position++;
|
||||
}
|
||||
}
|
||||
|
||||
/* When we end we don't flush the last token, flush it now */
|
||||
currentTokens ~= currentToken;
|
||||
|
||||
|
||||
tokens = currentTokens;
|
||||
}
|
||||
|
||||
/* Return the tokens */
|
||||
public string[] getTokens()
|
||||
{
|
||||
return tokens;
|
||||
}
|
||||
}
|
|
@ -1,6 +1,24 @@
|
|||
module compiler.compiler;
|
||||
|
||||
void beginCompilation(string[] sourceFile)
|
||||
import gogga;
|
||||
import std.conv : to;
|
||||
import compiler.lexer;
|
||||
|
||||
void beginCompilation(string[] sourceFiles)
|
||||
{
|
||||
/* TODO: Begin compilation process, take in data here */
|
||||
gprintln("Compiling files "~to!(string)(sourceFiles)~" ...");
|
||||
|
||||
Lexer[] lexers;
|
||||
foreach(string sourceFile; sourceFiles)
|
||||
{
|
||||
gprintln("Performing tokenization on '"~sourceFile~"' ...");
|
||||
|
||||
/* TODO: Open source file */
|
||||
string sourceCode = "hello \"world\";";
|
||||
Lexer currentLexer = new Lexer(sourceCode);
|
||||
currentLexer.performLex();
|
||||
|
||||
gprintln("Collected "~to!(string)(currentLexer.getTokens()));
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue