Assembler.Tokenizer.Scan C# (CSharp) Method

Scan() public method

public Scan ( ) : void
return void
        public void Scan()
        {
            if (hasTokenized)
                throw new InvalidOperationException("Scan() has already been called");

            try
            {
                while (pos < source.Length)
                {
                    currentLine = futureLine;

                    // Skip whitespace
                    while (pos < source.Length && char.IsWhiteSpace(source[pos]))
                    {
                        if (source[pos++] == LineBreak)
                            futureLine++;

                    }

                    if (pos >= source.Length)
                        continue;

                    // Single line comment
                    if (pos < source.Length - 1 && source.Substring(pos, 2) == "//")
                    {
                        while (pos < source.Length && source[pos] != LineBreak)
                        {
                            pos++;
                        }
                        continue;
                    }

                    // Multi-line comment
                    if (pos < source.Length - 1 && source.Substring(pos, 2) == "/*")
                    {
                        pos += 2;
                        while (source.Substring(pos, 2) != "*/")
                        {
                            if (source[pos++] == LineBreak)
                                futureLine++;
                        }
                        pos += 2;
                        continue;
                    }

                    // Delimiters
                    if (Delimiters.Contains(source[pos]))
                    {
                        AddToken(BasicTokenType.Delimiter, "" + source[pos++]);
                        continue;
                    }

                    // Strings
                    if (source[pos] == '"')
                    {
                        var value = "";

                        while (source[++pos] != '"')
                        {
                            var chValue = "" + source[pos];

                            if (source[pos] == LineBreak)
                                futureLine++;

                            if (source[pos] == '\\')
                            {
                                pos++;
                                switch (source[pos])
                                {
                                    case 'a':
                                        chValue = "\a";
                                        break;
                                    case 'b':
                                        chValue = "\b";
                                        break;
                                    case 'f':
                                        chValue = "\f";
                                        break;
                                    case 'n':
                                        chValue = "\n";
                                        break;
                                    case 'r':
                                        chValue = "\r";
                                        break;
                                    case 't':
                                        chValue = "\t";
                                        break;
                                    case 'v':
                                        chValue = "\v";
                                        break;
                                    case '"':
                                        chValue = "\"";
                                        break;
                                    case '\\':
                                        chValue = "\\";
                                        break;
                                    case '0':
                                        chValue = "\0";
                                        break;
                                    case 'x':
                                        var hex = "" + source[++pos] + source[++pos];
                                        try
                                        {
                                            var hexVal = Convert.ToByte(hex, 16);
                                            chValue = Encoding.GetEncoding(437).GetString(new[] { hexVal });
                                        }
                                        catch
                                        {
                                            throw new AssemblerException(string.Format("Invalid hexadecimal escape sequence on line {0}", currentLine));
                                        }
                                        break;
                                    default:
                                        chValue = "" + source[pos];
                                        break;
                                }
                            }

                            value += chValue;
                        }

                        AddToken(BasicTokenType.String, value);
                        pos++;
                        continue;
                    }

                    // Word
                    if (char.IsLetter(source[pos]) || source[pos] == '_')
                    {
                        var value = "";

                        while (pos < source.Length && (char.IsLetterOrDigit(source[pos]) || source[pos] == '_'))
                        {
                            value += source[pos++];
                        }

                        AddToken(BasicTokenType.Word, value);
                        continue;
                    }

                    // Number
                    if (char.IsDigit(source[pos]) || source[pos] == '-')
                    {
                        var negative = source[pos] == '-';
                        var hex = false;

                        var value = "";

                        if (negative)
                        {
                            value = "-";
                            pos++;
                        }

                        while (pos < source.Length && char.IsDigit(source[pos]))
                        {
                            value += source[pos++];
                        }

                        if (pos < source.Length && value == "0" && source[pos] == 'x' && !negative)
                        {
                            pos++;
                            value = "";
                            hex = true;

                            while (pos < source.Length && char.IsLetterOrDigit(source[pos]))
                            {
                                value += source[pos++];
                            }

                            if (value.Length == 0)
                                throw new AssemblerException(string.Format("Invalid hexadecimal number on line {0}", currentLine));
                        }

                        short number;
                        try
                        {
                            number = !hex ? short.Parse(value, CultureInfo.InvariantCulture) : Convert.ToInt16(value, 16);
                        }
                        catch (Exception e)
                        {
                            throw new AssemblerException(string.Format("Invalid number on line {0}", currentLine), e);
                        }

                        AddToken(BasicTokenType.Number, number.ToString("D", CultureInfo.InvariantCulture));
                        continue;
                    }

                    throw new AssemblerException(string.Format("Unexpected character '{0}' on line {1}", source[pos], currentLine));
                }

                lineCount = futureLine;
                hasTokenized = true;
            }
            catch (Exception e)
            {
                if (e is IndexOutOfRangeException ||
                    e is ArgumentOutOfRangeException)
                {
                    throw new AssemblerException(string.Format("Unexpected end of file from token on line {0}.", currentLine), e);
                }
                throw;
            }
        }

Usage Example

Example #1
0
        public void Scan()
        {
            if (hasTokenized)
            {
                throw new InvalidOperationException("Scan() has already been called");
            }

            var tokenizer = new Tokenizer(source);

            tokenizer.Scan();

            var t = tokenizer.Tokens;

            for (var i = 0; t[i].Type != BasicTokenType.EndOfFile; i++)
            {
                var tok = t[i];

                switch (tok.Type)
                {
                case BasicTokenType.Word:
                {
                    Opcode   opcode;
                    Register register;

                    if (Enum.TryParse(tok.Value, true, out opcode) && opcode < Opcode.None)
                    {
                        tokens.Add(new Token(TokenType.Opcode, tok.Value.ToLower(), tok.Line));
                    }
                    else if (Enum.TryParse(tok.Value, true, out register))
                    {
                        tokens.Add(new Token(TokenType.Register, tok.Value.ToLower(), tok.Line));
                    }
                    else
                    {
                        tokens.Add(new Token(TokenType.Word, tok.Value, tok.Line));
                    }

                    break;
                }

                case BasicTokenType.Delimiter:
                {
                    if (tok.Value == ",")
                    {
                        tokens.Add(new Token(TokenType.Comma, tok.Value, tok.Line));
                        break;
                    }

                    if (tok.Value == "[")
                    {
                        tokens.Add(new Token(TokenType.OpenBracket, tok.Value, tok.Line));
                        break;
                    }

                    if (tok.Value == "]")
                    {
                        tokens.Add(new Token(TokenType.CloseBracket, tok.Value, tok.Line));
                        break;
                    }

                    if (tok.Value == ":" && tokens.Count > 0)
                    {
                        var last = tokens[tokens.Count - 1];
                        if (last.Type == TokenType.Word)
                        {
                            var periodCount = last.Value.Count(c => c == '.');

                            if (periodCount > 1)
                            {
                                throw new AssemblerException(string.Format("Label with more than one period on line {0}", last.Line));
                            }

                            tokens.RemoveAt(tokens.Count - 1);
                            tokens.Add(new Token(TokenType.Label, last.Value, last.Line));
                            break;
                        }
                    }

                    if (tok.Value == "+")
                    {
                        tokens.Add(new Token(TokenType.Plus, tok.Value, tok.Line));
                        break;
                    }

                    throw new AssemblerException(string.Format("Unexpected delimiter '{0}' on line {1}", tok.Value, tok.Line));
                }

                case BasicTokenType.Number:
                {
                    tokens.Add(new Token(TokenType.Number, tok.Value, tok.Line));
                    break;
                }

                case BasicTokenType.String:
                {
                    tokens.Add(new Token(TokenType.String, tok.Value, tok.Line));
                    break;
                }

                default:
                    throw new AssemblerException(string.Format("Unhandled BasicToken {0} on line {1}", tok.Type, tok.Line));
                }
            }

            hasTokenized = true;
        }
All Usage Examples Of Assembler.Tokenizer::Scan