Lightningbeam/lightningbeam-ui/beamdsp/src/lexer.rs

285 lines
9.0 KiB
Rust

use crate::error::CompileError;
use crate::token::{Span, Token, TokenKind};
pub struct Lexer<'a> {
source: &'a [u8],
pos: usize,
line: u32,
col: u32,
}
impl<'a> Lexer<'a> {
pub fn new(source: &'a str) -> Self {
Self {
source: source.as_bytes(),
pos: 0,
line: 1,
col: 1,
}
}
pub fn tokenize(&mut self) -> Result<Vec<Token>, CompileError> {
let mut tokens = Vec::new();
loop {
self.skip_whitespace_and_comments();
if self.pos >= self.source.len() {
tokens.push(Token {
kind: TokenKind::Eof,
span: self.span(),
});
break;
}
tokens.push(self.next_token()?);
}
Ok(tokens)
}
fn span(&self) -> Span {
Span::new(self.line, self.col)
}
fn peek(&self) -> Option<u8> {
self.source.get(self.pos).copied()
}
fn peek_next(&self) -> Option<u8> {
self.source.get(self.pos + 1).copied()
}
fn advance(&mut self) -> u8 {
let ch = self.source[self.pos];
self.pos += 1;
if ch == b'\n' {
self.line += 1;
self.col = 1;
} else {
self.col += 1;
}
ch
}
fn skip_whitespace_and_comments(&mut self) {
loop {
// Skip whitespace
while self.pos < self.source.len() && self.source[self.pos].is_ascii_whitespace() {
self.advance();
}
// Skip line comments
if self.pos + 1 < self.source.len()
&& self.source[self.pos] == b'/'
&& self.source[self.pos + 1] == b'/'
{
while self.pos < self.source.len() && self.source[self.pos] != b'\n' {
self.advance();
}
continue;
}
break;
}
}
fn next_token(&mut self) -> Result<Token, CompileError> {
let span = self.span();
let ch = self.advance();
match ch {
b'{' => Ok(Token { kind: TokenKind::LBrace, span }),
b'}' => Ok(Token { kind: TokenKind::RBrace, span }),
b'[' => Ok(Token { kind: TokenKind::LBracket, span }),
b']' => Ok(Token { kind: TokenKind::RBracket, span }),
b'(' => Ok(Token { kind: TokenKind::LParen, span }),
b')' => Ok(Token { kind: TokenKind::RParen, span }),
b':' => Ok(Token { kind: TokenKind::Colon, span }),
b',' => Ok(Token { kind: TokenKind::Comma, span }),
b';' => Ok(Token { kind: TokenKind::Semicolon, span }),
b'+' => Ok(Token { kind: TokenKind::Plus, span }),
b'-' => Ok(Token { kind: TokenKind::Minus, span }),
b'*' => Ok(Token { kind: TokenKind::Star, span }),
b'/' => Ok(Token { kind: TokenKind::Slash, span }),
b'%' => Ok(Token { kind: TokenKind::Percent, span }),
b'.' if self.peek() == Some(b'.') => {
self.advance();
Ok(Token { kind: TokenKind::DotDot, span })
}
b'=' if self.peek() == Some(b'=') => {
self.advance();
Ok(Token { kind: TokenKind::EqEq, span })
}
b'=' => Ok(Token { kind: TokenKind::Eq, span }),
b'!' if self.peek() == Some(b'=') => {
self.advance();
Ok(Token { kind: TokenKind::BangEq, span })
}
b'!' => Ok(Token { kind: TokenKind::Bang, span }),
b'<' if self.peek() == Some(b'=') => {
self.advance();
Ok(Token { kind: TokenKind::LtEq, span })
}
b'<' => Ok(Token { kind: TokenKind::Lt, span }),
b'>' if self.peek() == Some(b'=') => {
self.advance();
Ok(Token { kind: TokenKind::GtEq, span })
}
b'>' => Ok(Token { kind: TokenKind::Gt, span }),
b'&' if self.peek() == Some(b'&') => {
self.advance();
Ok(Token { kind: TokenKind::AmpAmp, span })
}
b'|' if self.peek() == Some(b'|') => {
self.advance();
Ok(Token { kind: TokenKind::PipePipe, span })
}
b'"' => self.read_string(span),
ch if ch.is_ascii_digit() => self.read_number(ch, span),
ch if ch.is_ascii_alphabetic() || ch == b'_' => self.read_ident(ch, span),
_ => Err(CompileError::new(
format!("Unexpected character: '{}'", ch as char),
span,
)),
}
}
fn read_string(&mut self, span: Span) -> Result<Token, CompileError> {
let mut s = String::new();
loop {
match self.peek() {
Some(b'"') => {
self.advance();
return Ok(Token {
kind: TokenKind::StringLit(s),
span,
});
}
Some(b'\n') | None => {
return Err(CompileError::new("Unterminated string literal", span));
}
Some(_) => {
s.push(self.advance() as char);
}
}
}
}
fn read_number(&mut self, first: u8, span: Span) -> Result<Token, CompileError> {
let mut s = String::new();
s.push(first as char);
let mut is_float = false;
while let Some(ch) = self.peek() {
if ch.is_ascii_digit() {
s.push(self.advance() as char);
} else if ch == b'.' && self.peek_next() != Some(b'.') && !is_float {
is_float = true;
s.push(self.advance() as char);
} else {
break;
}
}
if is_float {
let val: f32 = s
.parse()
.map_err(|_| CompileError::new(format!("Invalid float literal: {}", s), span))?;
Ok(Token {
kind: TokenKind::FloatLit(val),
span,
})
} else {
let val: i32 = s
.parse()
.map_err(|_| CompileError::new(format!("Invalid integer literal: {}", s), span))?;
// Check if this could be a float (e.g. 0 used in float context)
// For now, emit as IntLit; parser/validator handles coercion
Ok(Token {
kind: TokenKind::IntLit(val),
span,
})
}
}
fn read_ident(&mut self, first: u8, span: Span) -> Result<Token, CompileError> {
let mut s = String::new();
s.push(first as char);
while let Some(ch) = self.peek() {
if ch.is_ascii_alphanumeric() || ch == b'_' {
s.push(self.advance() as char);
} else {
break;
}
}
Ok(Token {
kind: TokenKind::from_ident(&s),
span,
})
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_simple_tokens() {
let mut lexer = Lexer::new("name \"Test\" category effect");
let tokens = lexer.tokenize().unwrap();
assert_eq!(tokens[0].kind, TokenKind::Name);
assert_eq!(tokens[1].kind, TokenKind::StringLit("Test".into()));
assert_eq!(tokens[2].kind, TokenKind::Category);
assert_eq!(tokens[3].kind, TokenKind::Effect);
}
#[test]
fn test_numbers() {
let mut lexer = Lexer::new("42 3.14 0.5");
let tokens = lexer.tokenize().unwrap();
assert_eq!(tokens[0].kind, TokenKind::IntLit(42));
assert_eq!(tokens[1].kind, TokenKind::FloatLit(3.14));
assert_eq!(tokens[2].kind, TokenKind::FloatLit(0.5));
}
#[test]
fn test_operators() {
let mut lexer = Lexer::new("== != <= >= && || ..");
let tokens = lexer.tokenize().unwrap();
assert_eq!(tokens[0].kind, TokenKind::EqEq);
assert_eq!(tokens[1].kind, TokenKind::BangEq);
assert_eq!(tokens[2].kind, TokenKind::LtEq);
assert_eq!(tokens[3].kind, TokenKind::GtEq);
assert_eq!(tokens[4].kind, TokenKind::AmpAmp);
assert_eq!(tokens[5].kind, TokenKind::PipePipe);
assert_eq!(tokens[6].kind, TokenKind::DotDot);
}
#[test]
fn test_comments() {
let mut lexer = Lexer::new("let x = 5; // comment\nlet y = 10;");
let tokens = lexer.tokenize().unwrap();
// Should skip the comment
assert_eq!(tokens[0].kind, TokenKind::Let);
assert_eq!(tokens[5].kind, TokenKind::Let);
}
#[test]
fn test_range_vs_float() {
// "0..10" should parse as IntLit(0), DotDot, IntLit(10), not as a float
let mut lexer = Lexer::new("0..10");
let tokens = lexer.tokenize().unwrap();
assert_eq!(tokens[0].kind, TokenKind::IntLit(0));
assert_eq!(tokens[1].kind, TokenKind::DotDot);
assert_eq!(tokens[2].kind, TokenKind::IntLit(10));
}
}