use std::borrow::Borrow;
use std::io;
use std::str;
use std::u64;
use error::ErrorCode;
use read::{ElispStr, Reference};
use crate::{datum::SpanInfo, Cons, Number, Value};
pub use crate::{
datum::{Datum, Span},
syntax::{CharSyntax, KeywordSyntax, StringSyntax},
};
pub use read::{IoRead, Position, Read, SliceRead, StrRead};
#[doc(inline)]
pub use error::{Error, Result};
pub struct Parser<R> {
read: R,
scratch: Vec<u8>,
remaining_depth: u8,
options: Options,
}
#[derive(Debug, Copy, Clone)]
pub struct Options {
keyword_syntaxes: u8,
nil_symbol: NilSymbol,
t_symbol: TSymbol,
brackets: Brackets,
string_syntax: StringSyntax,
char_syntax: CharSyntax,
}
#[derive(Debug, Copy, Clone, PartialEq)]
pub enum NilSymbol {
EmptyList,
Default,
Special,
}
#[derive(Debug, Copy, Clone, PartialEq)]
pub enum TSymbol {
True,
Default,
}
#[derive(Debug, Copy, Clone, PartialEq)]
pub enum Brackets {
List,
Vector,
}
impl Options {
pub fn new() -> Self {
Options {
keyword_syntaxes: 0,
nil_symbol: NilSymbol::Default,
t_symbol: TSymbol::Default,
brackets: Brackets::List,
string_syntax: StringSyntax::R6RS,
char_syntax: CharSyntax::R6RS,
}
}
pub fn elisp() -> Self {
Self::new()
.with_keyword_syntax(KeywordSyntax::ColonPrefix)
.with_nil_symbol(NilSymbol::EmptyList)
.with_brackets(Brackets::Vector)
.with_string_syntax(StringSyntax::Elisp)
.with_char_syntax(CharSyntax::Elisp)
}
pub fn with_keyword_syntax(mut self, syntax: KeywordSyntax) -> Self {
self.keyword_syntaxes |= syntax.to_flag();
self
}
pub fn with_keyword_syntaxes<I, T>(mut self, styles: I) -> Self
where
I: IntoIterator<Item = T>,
T: Borrow<KeywordSyntax>,
{
self.keyword_syntaxes = styles
.into_iter()
.fold(0, |flags, syntax| flags | syntax.borrow().to_flag());
self
}
pub fn with_nil_symbol(mut self, treatment: NilSymbol) -> Self {
self.nil_symbol = treatment;
self
}
pub fn with_t_symbol(mut self, treatment: TSymbol) -> Self {
self.t_symbol = treatment;
self
}
pub fn with_brackets(mut self, treatment: Brackets) -> Self {
self.brackets = treatment;
self
}
pub fn with_string_syntax(mut self, syntax: StringSyntax) -> Self {
self.string_syntax = syntax;
self
}
pub fn with_char_syntax(mut self, syntax: CharSyntax) -> Self {
self.char_syntax = syntax;
self
}
#[inline]
pub fn keyword_syntax(self, syntax: KeywordSyntax) -> bool {
(self.keyword_syntaxes & syntax.to_flag()) != 0
}
pub fn nil_symbol(self) -> NilSymbol {
self.nil_symbol
}
pub fn t_symbol(self) -> TSymbol {
self.t_symbol
}
pub fn brackets(self) -> Brackets {
self.brackets
}
pub fn string_syntax(self) -> StringSyntax {
self.string_syntax
}
pub fn char_syntax(self) -> CharSyntax {
self.char_syntax
}
}
impl Default for Options {
fn default() -> Self {
Options {
keyword_syntaxes: KeywordSyntax::Octothorpe.to_flag(),
nil_symbol: NilSymbol::Default,
t_symbol: TSymbol::Default,
brackets: Brackets::List,
string_syntax: StringSyntax::R6RS,
char_syntax: CharSyntax::R6RS,
}
}
}
#[derive(Debug, Clone)]
enum Token {
Null,
Nil,
Bool(bool),
Char(char),
Number(Number),
Symbol(Box<str>),
Keyword(Box<str>),
String(Box<str>),
Bytes(Box<[u8]>),
ListOpen(u8),
Quotation(&'static str),
VecOpen(u8),
ByteVecOpen(u8),
}
impl<'de, R> Parser<R>
where
R: read::Read<'de>,
{
pub fn new(read: R) -> Self {
Parser {
read,
scratch: Vec::with_capacity(128),
remaining_depth: 128,
options: Options::default(),
}
}
pub fn with_options(read: R, options: Options) -> Self {
Parser {
read,
scratch: Vec::with_capacity(128),
remaining_depth: 128,
options,
}
}
}
impl<R> Parser<read::IoRead<R>>
where
R: io::Read,
{
pub fn from_reader(reader: R) -> Self {
Parser::new(read::IoRead::new(reader))
}
pub fn from_reader_custom(reader: R, options: Options) -> Self {
Parser::with_options(read::IoRead::new(reader), options)
}
}
impl<'a> Parser<read::SliceRead<'a>> {
pub fn from_slice(bytes: &'a [u8]) -> Self {
Parser::new(read::SliceRead::new(bytes))
}
pub fn from_slice_custom(bytes: &'a [u8], options: Options) -> Self {
Parser::with_options(read::SliceRead::new(bytes), options)
}
}
impl<'a> Parser<read::StrRead<'a>> {
#[allow(clippy::should_implement_trait)]
pub fn from_str(s: &'a str) -> Self {
Parser::new(read::StrRead::new(s))
}
#[allow(clippy::should_implement_trait)]
pub fn from_str_custom(s: &'a str, options: Options) -> Self {
Parser::with_options(read::StrRead::new(s), options)
}
}
macro_rules! overflow {
($a:ident * $radix:literal + $b:ident, $c:expr) => {
$a >= $c / $radix && ($a > $c / $radix || $b > $c % $radix)
};
($a:ident * $radix:ident + $b:ident, $c:expr) => {
$a >= $c / $radix && ($a > $c / $radix || $b > $c % $radix)
};
}
impl<'de, R: Read<'de>> Parser<R> {
pub fn expect_end(&mut self) -> Result<()> {
match self.parse_whitespace()? {
Some(_) => Err(self.peek_error(ErrorCode::TrailingCharacters)),
None => Ok(()),
}
}
#[deprecated(since = "0.2.5", note = "Please use the `expect_end` method instead")]
pub fn end(&mut self) -> Result<()> {
self.expect_end()
}
fn peek(&mut self) -> Result<Option<u8>> {
self.read.peek()
}
fn peek_or_null(&mut self) -> Result<u8> {
Ok(self.peek()?.unwrap_or(b'\x00'))
}
fn eat_char(&mut self) {
self.read.discard();
}
fn next_char(&mut self) -> Result<Option<u8>> {
self.read.next()
}
fn next_char_or_null(&mut self) -> Result<u8> {
Ok(self.next_char()?.unwrap_or(b'\x00'))
}
fn error(&mut self, reason: ErrorCode) -> Error {
let pos = self.read.position();
Error::syntax(reason, pos.line(), pos.column())
}
fn peek_error(&mut self, reason: ErrorCode) -> Error {
let pos = self.read.peek_position();
Error::syntax(reason, pos.line(), pos.column())
}
fn parse_whitespace(&mut self) -> Result<Option<u8>> {
loop {
match self.peek()? {
Some(b';') => loop {
match self.next_char()? {
Some(b'\n') => break,
Some(_) => {}
None => return Ok(None),
}
},
Some(b' ') | Some(b'\n') | Some(b'\t') | Some(b'\r') | Some(0x0C) => {
self.eat_char();
}
other => {
return Ok(other);
}
}
}
}
pub fn value_iter(&mut self) -> ValueIter<'_, R> {
ValueIter(self)
}
pub fn datum_iter(&mut self) -> DatumIter<'_, R> {
DatumIter(self)
}
#[deprecated(since = "0.2.5", note = "Please use the `expect_value` method instead")]
pub fn parse_value(&mut self) -> Result<Value> {
self.expect_value()
}
pub fn expect_value(&mut self) -> Result<Value> {
self.next_value()
.and_then(|o| o.ok_or_else(|| self.peek_error(ErrorCode::EofWhileParsingValue)))
}
fn parse_token(&mut self, peek: u8) -> Result<Token> {
let token = match peek {
b'#' => {
self.eat_char();
match self.next_char()? {
Some(b't') => Token::Bool(true),
Some(b'f') => Token::Bool(false),
Some(b'n') => {
self.expect_ident(b"il")?;
Token::Nil
}
Some(b'(') => Token::VecOpen(b')'),
Some(b':') if self.options.keyword_syntax(KeywordSyntax::Octothorpe) => {
Token::Keyword(self.parse_symbol()?.into())
}
Some(b'v') => {
self.expect_ident(b"u8")?;
Token::ByteVecOpen(b')')
}
Some(b'u') => {
self.expect_ident(b"8")?;
Token::ByteVecOpen(b')')
}
Some(b'b') => Token::Number(self.parse_radix_literal(2)?),
Some(b'o') => Token::Number(self.parse_radix_literal(8)?),
Some(b'd') => Token::Number(self.parse_radix_literal(10)?),
Some(b'x') => Token::Number(self.parse_radix_literal(16)?),
Some(b'\\') => Token::Char(self.read.parse_r6rs_char(&mut self.scratch)?),
Some(_) => return Err(self.peek_error(ErrorCode::ExpectedSomeIdent)),
None => return Err(self.peek_error(ErrorCode::EofWhileParsingValue)),
}
}
b'-' => {
self.eat_char();
let next = self.peek_or_null()?;
if next == 0 || is_delimiter(next) || is_sign_subsequent(next) {
Token::Symbol(self.parse_symbol_suffix("-")?.into())
} else {
Token::Number(self.parse_num_literal(10, false)?)
}
}
b'+' => {
self.eat_char();
let next = self.peek_or_null()?;
if next == 0 || is_delimiter(next) || is_sign_subsequent(next) {
Token::Symbol(self.parse_symbol_suffix("+")?.into())
} else {
Token::Number(self.parse_num_literal(10, true)?)
}
}
b'0'..=b'9' => Token::Number(self.parse_num_literal(10, true)?),
b'"' => {
self.eat_char();
self.scratch.clear();
match self.options.string_syntax {
StringSyntax::R6RS => match self.read.parse_r6rs_str(&mut self.scratch)? {
Reference::Borrowed(s) => Token::String(s.into()),
Reference::Copied(s) => Token::String(s.into()),
},
StringSyntax::Elisp => match self.read.parse_elisp_str(&mut self.scratch)? {
ElispStr::Multibyte(mb) => match mb {
Reference::Borrowed(s) => Token::String(s.into()),
Reference::Copied(s) => Token::String(s.into()),
},
ElispStr::Unibyte(ub) => match ub {
Reference::Borrowed(b) => Token::Bytes(b.into()),
Reference::Copied(b) => Token::Bytes(b.into()),
},
},
}
}
b'(' => {
self.eat_char();
Token::ListOpen(b')')
}
b'[' => {
self.eat_char();
match self.options.brackets {
Brackets::Vector => Token::VecOpen(b']'),
Brackets::List => Token::ListOpen(b']'),
}
}
b':' => {
if self.options.keyword_syntax(KeywordSyntax::ColonPrefix) {
self.eat_char();
Token::Keyword(self.parse_symbol()?.into())
} else {
Token::Symbol(self.parse_symbol()?.into())
}
}
b'a'..=b'z' | b'A'..=b'Z' => {
let mut name = self.parse_symbol()?;
if self.options.keyword_syntax(KeywordSyntax::ColonPostfix) && name.ends_with(':') {
name.pop();
Token::Keyword(name.into())
} else if self.options.nil_symbol() != NilSymbol::Default && name == "nil" {
match self.options.nil_symbol() {
NilSymbol::EmptyList => Token::Null,
NilSymbol::Special => Token::Nil,
NilSymbol::Default => unreachable!(),
}
} else if self.options.t_symbol() != TSymbol::Default && name == "t" {
match self.options.t_symbol() {
TSymbol::True => Token::Bool(true),
TSymbol::Default => unreachable!(),
}
} else {
Token::Symbol(name.into())
}
}
b'?' if self.options.char_syntax == CharSyntax::Elisp => {
self.eat_char();
Token::Char(self.read.parse_elisp_char(&mut self.scratch)?)
}
b'\'' => {
self.eat_char();
Token::Quotation("quote")
}
b'`' => {
self.eat_char();
Token::Quotation("quasiquote")
}
b',' => {
self.eat_char();
match self.peek_or_null()? {
b'@' => {
self.eat_char();
Token::Quotation("unquote-splicing")
}
_ => Token::Quotation("unquote"),
}
}
_ => {
if SYMBOL_EXTENDED.contains(&peek) {
Token::Symbol(self.parse_symbol()?.into())
} else {
return Err(self.peek_error(ErrorCode::ExpectedSomeValue));
}
}
};
Ok(token)
}
pub fn next_value(&mut self) -> Result<Option<Value>> {
let peek = match self.parse_whitespace()? {
Some(b) => b,
None => return Ok(None),
};
let value = match self.parse_token(peek)? {
Token::Nil => Value::Nil,
Token::Null => Value::Null,
Token::Char(c) => Value::Char(c),
Token::Bool(b) => Value::Bool(b),
Token::Number(n) => Value::Number(n),
Token::Symbol(s) => Value::Symbol(s),
Token::Keyword(name) => Value::Keyword(name),
Token::String(s) => Value::String(s),
Token::Bytes(b) => Value::Bytes(b),
Token::ByteVecOpen(close) => {
Value::Bytes(self.parse_byte_list(close)?.into_boxed_slice())
}
Token::VecOpen(close) => {
self.remaining_depth -= 1;
if self.remaining_depth == 0 {
return Err(self.peek_error(ErrorCode::RecursionLimitExceeded));
}
let ret = self.parse_vector(close);
self.remaining_depth += 1;
match (ret, self.end_seq(close)) {
(Ok(elements), Ok(())) => Value::Vector(elements.into()),
(Err(err), _) | (_, Err(err)) => return Err(err),
}
}
Token::ListOpen(close) => {
self.remaining_depth -= 1;
if self.remaining_depth == 0 {
return Err(self.peek_error(ErrorCode::RecursionLimitExceeded));
}
let ret = self.parse_list(close);
self.remaining_depth += 1;
match (ret, self.end_seq(close)) {
(Ok(list), Ok(())) => list,
(Err(err), _) | (_, Err(err)) => return Err(err),
}
}
Token::Quotation(name) => {
let datum = self
.next_value()?
.ok_or_else(|| self.peek_error(ErrorCode::EofWhileParsingList))?;
Value::list(vec![Value::symbol(name), datum])
}
};
Ok(Some(value))
}
#[deprecated(since = "0.2.5", note = "Please use the `next_value` method instead")]
pub fn parse(&mut self) -> Result<Option<Value>> {
self.next_value()
}
pub fn expect_datum(&mut self) -> Result<Datum> {
self.next_datum()
.and_then(|o| o.ok_or_else(|| self.peek_error(ErrorCode::EofWhileParsingValue)))
}
pub fn next_datum(&mut self) -> Result<Option<Datum>> {
let peek = match self.parse_whitespace()? {
Some(b) => b,
None => return Ok(None),
};
let start = self.read.position();
let primitive =
|value, parser: &Self| Datum::primitive(value, start, parser.read.position());
let syntax = match self.parse_token(peek)? {
Token::Nil => primitive(Value::Nil, self),
Token::Null => primitive(Value::Null, self),
Token::Char(c) => primitive(Value::Char(c), self),
Token::Bool(b) => primitive(Value::Bool(b), self),
Token::Number(n) => primitive(Value::Number(n), self),
Token::Symbol(s) => primitive(Value::Symbol(s), self),
Token::Keyword(name) => primitive(Value::Keyword(name), self),
Token::String(s) => primitive(Value::String(s), self),
Token::Bytes(b) => primitive(Value::Bytes(b), self),
Token::ByteVecOpen(close) => primitive(
Value::Bytes(self.parse_byte_list(close)?.into_boxed_slice()),
self,
),
Token::VecOpen(close) => {
self.remaining_depth -= 1;
if self.remaining_depth == 0 {
return Err(self.peek_error(ErrorCode::RecursionLimitExceeded));
}
let ret = self.parse_vector_meta(close);
self.remaining_depth += 1;
match (ret, self.end_seq(close)) {
(Ok((elements, info)), Ok(())) => {
Datum::vec(elements, info, start, self.read.position())
}
(Err(err), _) | (_, Err(err)) => return Err(err),
}
}
Token::ListOpen(close) => {
self.remaining_depth -= 1;
if self.remaining_depth == 0 {
return Err(self.peek_error(ErrorCode::RecursionLimitExceeded));
}
let ret = self.parse_list_meta(close);
self.remaining_depth += 1;
match (ret, self.end_seq(close)) {
(Ok(Some((head, meta))), Ok(())) => {
Datum::cons(head, meta, start, self.read.position())
}
(Ok(None), Ok(())) => {
Datum::primitive(Value::Null, start, self.read.position())
}
(Err(err), _) | (_, Err(err)) => return Err(err),
}
}
Token::Quotation(name) => {
let token_end = self.read.position();
let quoted = self
.next_datum()?
.ok_or_else(|| self.peek_error(ErrorCode::EofWhileParsingList))?;
Datum::quotation(name, quoted, Span::new(start, token_end))
}
};
Ok(Some(syntax))
}
fn parse_symbol(&mut self) -> Result<String> {
self.scratch.clear();
match self.read.parse_symbol(&mut self.scratch)? {
Reference::Borrowed(s) => Ok(s.into()),
Reference::Copied(s) => Ok(s.into()),
}
}
fn parse_symbol_suffix(&mut self, prefix: &str) -> Result<String> {
self.scratch.clear();
self.scratch.extend(prefix.as_bytes());
match self.read.parse_symbol(&mut self.scratch)? {
Reference::Borrowed(s) => Ok(s.into()),
Reference::Copied(s) => Ok(s.into()),
}
}
fn expect_ident(&mut self, ident: &[u8]) -> Result<()> {
for c in ident {
if Some(*c) != self.next_char()? {
return Err(self.error(ErrorCode::ExpectedSomeIdent));
}
}
Ok(())
}
fn parse_byte_list(&mut self, close: u8) -> Result<Vec<u8>> {
match self.parse_whitespace() {
Err(e) => return Err(e),
Ok(Some(b'(')) => self.eat_char(),
Ok(Some(_)) => return Err(self.peek_error(ErrorCode::ExpectedVector)),
Ok(None) => return Err(self.peek_error(ErrorCode::EofWhileParsingList)),
}
let mut bytes = Vec::new();
loop {
match self.parse_whitespace() {
Err(e) => return Err(e),
Ok(Some(c)) => {
if c == close {
self.eat_char();
break;
} else {
let n = self
.parse_number()?
.as_u64()
.ok_or_else(|| self.peek_error(ErrorCode::ExpectedOctet))?;
if n > 255 {
return Err(self.peek_error(ErrorCode::ExpectedOctet));
}
bytes.push(n as u8);
}
}
Ok(None) => return Err(self.peek_error(ErrorCode::EofWhileParsingList)),
}
}
Ok(bytes)
}
fn parse_list(&mut self, terminator: u8) -> Result<Value> {
let mut list = Cons::new(Value::Nil, Value::Null);
let mut pair = &mut list;
let mut have_value = false;
loop {
match self.parse_whitespace() {
Err(e) => return Err(e),
Ok(Some(c)) => match c {
b')' | b']' => {
if c != terminator {
return Err(self.peek_error(ErrorCode::MismatchedParenthesis));
}
if have_value {
return Ok(Value::Cons(list));
} else {
return Ok(Value::Null);
}
}
b'.' => {
self.eat_char();
let next = self.peek_or_null()?;
if next == 0 || is_delimiter(next) {
if !have_value {
return Err(self.peek_error(ErrorCode::ExpectedSomeValue));
}
pair.set_cdr(self.expect_value()?);
match self.parse_whitespace()? {
Some(b')') => return Ok(Value::Cons(list)),
Some(_) => {
return Err(self.peek_error(ErrorCode::TrailingCharacters))
}
None => return Err(self.peek_error(ErrorCode::EofWhileParsingList)),
}
} else {
if have_value {
pair.set_cdr(Value::from((Value::Nil, Value::Null)));
pair = pair.cdr_mut().as_cons_mut().unwrap();
}
pair.set_car(Value::symbol(self.parse_symbol_suffix(".")?));
have_value = true;
}
}
_ => {
if have_value {
pair.set_cdr(Value::from((Value::Nil, Value::Null)));
pair = pair.cdr_mut().as_cons_mut().unwrap();
}
pair.set_car(self.expect_value()?);
have_value = true;
}
},
Ok(None) => return Err(self.peek_error(ErrorCode::EofWhileParsingList)),
}
}
}
fn parse_list_meta(&mut self, terminator: u8) -> Result<Option<(Cons, [SpanInfo; 2])>> {
let mut list = Cons::new(Value::Nil, Value::Null);
let null_meta = [SpanInfo::Prim(Span::empty()), SpanInfo::Prim(Span::empty())];
let mut list_meta = null_meta.clone();
let mut pair = &mut list;
let mut meta = &mut list_meta;
let mut have_value = false;
loop {
match self.parse_whitespace() {
Err(e) => return Err(e),
Ok(Some(c)) => match c {
b')' | b']' => {
if c != terminator {
return Err(self.peek_error(ErrorCode::MismatchedParenthesis));
}
if have_value {
return Ok(Some((list, list_meta)));
} else {
return Ok(None);
}
}
b'.' => {
let start = self.read.position();
self.eat_char();
let next = self.peek_or_null()?;
if next == 0 || is_delimiter(next) {
if !have_value {
return Err(self.peek_error(ErrorCode::ExpectedSomeValue));
}
let (cdr, cdr_meta) = self.expect_datum()?.into_inner();
pair.set_cdr(cdr);
meta[1] = cdr_meta;
match self.parse_whitespace()? {
Some(b')') => return Ok(Some((list, list_meta))),
Some(_) => {
return Err(self.peek_error(ErrorCode::TrailingCharacters))
}
None => return Err(self.peek_error(ErrorCode::EofWhileParsingList)),
}
} else {
if have_value {
pair.set_cdr(Value::from((Value::Nil, Value::Null)));
meta[1] =
SpanInfo::Cons(Span::empty(), Box::new(null_meta.clone()));
pair = pair.cdr_mut().as_cons_mut().unwrap();
meta = meta[1].cons_mut().unwrap();
}
pair.set_car(Value::symbol(self.parse_symbol_suffix(".")?));
meta[0] = SpanInfo::Prim(Span::new(start, self.read.position()));
have_value = true;
}
}
_ => {
if have_value {
pair.set_cdr(Value::from((Value::Nil, Value::Null)));
meta[1] = SpanInfo::Cons(Span::empty(), Box::new(null_meta.clone()));
pair = pair.cdr_mut().as_cons_mut().unwrap();
meta = meta[1].cons_mut().unwrap();
}
let (car, car_meta) = self.expect_datum()?.into_inner();
pair.set_car(car);
meta[0] = car_meta;
have_value = true;
}
},
Ok(None) => return Err(self.peek_error(ErrorCode::EofWhileParsingList)),
}
}
}
fn parse_vector(&mut self, terminator: u8) -> Result<Vec<Value>> {
let mut elements = Vec::new();
loop {
match self.parse_whitespace() {
Err(e) => return Err(e),
Ok(Some(c)) => match c {
b')' | b']' => {
if c != terminator {
return Err(self.peek_error(ErrorCode::MismatchedParenthesis));
}
return Ok(elements);
}
_ => elements.push(self.expect_value()?),
},
Ok(None) => return Err(self.peek_error(ErrorCode::EofWhileParsingVector)),
}
}
}
fn parse_vector_meta(&mut self, terminator: u8) -> Result<(Vec<Value>, Vec<SpanInfo>)> {
let mut elements = Vec::new();
let mut element_meta = Vec::new();
loop {
match self.parse_whitespace() {
Err(e) => return Err(e),
Ok(Some(c)) => match c {
b')' | b']' => {
if c != terminator {
return Err(self.peek_error(ErrorCode::MismatchedParenthesis));
}
return Ok((elements, element_meta));
}
_ => {
let (value, meta) = self.expect_datum()?.into_inner();
elements.push(value);
element_meta.push(meta);
}
},
Ok(None) => return Err(self.peek_error(ErrorCode::EofWhileParsingVector)),
}
}
}
fn parse_number(&mut self) -> Result<Number> {
match self.peek_or_null()? {
b'#' => {
self.eat_char();
match self.next_char_or_null()? {
b'b' => self.parse_radix_literal(2),
b'o' => self.parse_radix_literal(8),
b'd' => self.parse_radix_literal(10),
b'x' => self.parse_radix_literal(16),
_ => Err(self.peek_error(ErrorCode::InvalidNumber)),
}
}
_ => self.parse_radix_literal(10),
}
}
fn parse_radix_literal(&mut self, radix: u8) -> Result<Number> {
match self.peek_or_null()? {
b'-' => {
self.eat_char();
self.parse_num_literal(radix, false)
}
b'+' => {
self.eat_char();
self.parse_num_literal(radix, true)
}
_ => self.parse_num_literal(radix, true),
}
}
fn parse_num_literal(&mut self, radix: u8, pos: bool) -> Result<Number> {
let r = u64::from(radix);
let first_digit = match self.next_char_or_null()? {
c @ b'0'..=b'9' => c - b'0',
c @ b'a'..=b'f' => 10 + (c - b'a'),
c @ b'A'..=b'F' => 10 + (c - b'A'),
_ => return Err(self.peek_error(ErrorCode::InvalidNumber)),
};
if first_digit >= radix {
return Err(self.peek_error(ErrorCode::InvalidNumber));
}
let mut res = u64::from(first_digit);
loop {
let digit = match self.peek_or_null()? {
c @ b'0'..=b'9' => c - b'0',
c @ b'a'..=b'f' => 10 + (c - b'a'),
c @ b'A'..=b'F' => 10 + (c - b'A'),
_ => return self.parse_num_tail(radix, pos, res),
};
if digit >= radix {
return Err(self.peek_error(ErrorCode::InvalidNumber));
}
self.eat_char();
let digit = u64::from(digit);
if overflow!(res * r + digit, u64::MAX) {
return Ok(Number::from(self.parse_long_integer(
radix, pos, res, 1,
)?));
}
res = res * r + digit;
}
}
fn parse_long_integer(
&mut self,
radix: u8,
pos: bool,
significand: u64,
mut exponent: i32,
) -> Result<f64> {
loop {
let digit = match self.peek_or_null()? {
c @ b'0'..=b'9' => c - b'0',
c @ b'a'..=b'f' if radix >= 10 => 10 + (c - b'a'),
c @ b'A'..=b'F' if radix >= 10 => 10 + (c - b'A'),
b'.' => {
if radix != 10 {
return Err(self.peek_error(ErrorCode::InvalidNumber));
}
return self.parse_decimal(pos, significand, exponent);
}
b'e' | b'E' => {
if radix != 10 {
return Err(self.peek_error(ErrorCode::InvalidNumber));
}
return self.parse_exponent(pos, significand, exponent);
}
_ => {
return self.f64_from_parts(pos, significand, exponent);
}
};
if digit >= radix {
return Err(self.peek_error(ErrorCode::InvalidNumber));
}
self.eat_char();
exponent += 1;
}
}
fn parse_num_tail(&mut self, radix: u8, pos: bool, significand: u64) -> Result<Number> {
Ok(match self.peek_or_null()? {
b'.' => {
if radix != 10 {
return Err(self.peek_error(ErrorCode::InvalidNumber));
}
Number::from(self.parse_decimal(pos, significand, 0)?)
}
b'e' | b'E' => {
if radix != 10 {
return Err(self.peek_error(ErrorCode::InvalidNumber));
}
Number::from(self.parse_exponent(pos, significand, 0)?)
}
_ => {
if pos {
Number::from(significand)
} else {
let neg = (significand as i64).wrapping_neg();
if neg > 0 {
Number::from(-(significand as f64))
} else {
Number::from(neg)
}
}
}
})
}
fn parse_decimal(&mut self, pos: bool, mut significand: u64, mut exponent: i32) -> Result<f64> {
self.eat_char();
let mut at_least_one_digit = false;
while let c @ b'0'..=b'9' = self.peek_or_null()? {
self.eat_char();
let digit = u64::from(c - b'0');
at_least_one_digit = true;
if overflow!(significand * 10 + digit, u64::MAX) {
while let b'0'..=b'9' = self.peek_or_null()? {
self.eat_char();
}
break;
}
significand = significand * 10 + digit;
exponent -= 1;
}
if !at_least_one_digit {
return Err(self.peek_error(ErrorCode::InvalidNumber));
}
match self.peek_or_null()? {
b'e' | b'E' => self.parse_exponent(pos, significand, exponent),
_ => self.f64_from_parts(pos, significand, exponent),
}
}
fn parse_exponent(
&mut self,
positive: bool,
significand: u64,
starting_exp: i32,
) -> Result<f64> {
self.eat_char();
let positive_exp = match self.peek_or_null()? {
b'+' => {
self.eat_char();
true
}
b'-' => {
self.eat_char();
false
}
_ => true,
};
let mut exp = match self.next_char_or_null()? {
c @ b'0'..=b'9' => i32::from(c - b'0'),
_ => {
return Err(self.error(ErrorCode::InvalidNumber));
}
};
while let c @ b'0'..=b'9' = self.peek_or_null()? {
self.eat_char();
let digit = i32::from(c - b'0');
if overflow!(exp * 10 + digit, i32::max_value()) {
return self.parse_exponent_overflow(positive, significand, positive_exp);
}
exp = exp * 10 + digit;
}
let final_exp = if positive_exp {
starting_exp.saturating_add(exp)
} else {
starting_exp.saturating_sub(exp)
};
self.f64_from_parts(positive, significand, final_exp)
}
#[cold]
#[inline(never)]
fn parse_exponent_overflow(
&mut self,
positive: bool,
significand: u64,
positive_exp: bool,
) -> Result<f64> {
if significand != 0 && positive_exp {
return Err(self.error(ErrorCode::NumberOutOfRange));
}
while let b'0'..=b'9' = self.peek_or_null()? {
self.eat_char();
}
Ok(if positive { 0.0 } else { -0.0 })
}
#[cfg(not(feature = "fast-float-parsing"))]
fn f64_from_parts(&mut self, pos: bool, significand: u64, exponent: i32) -> Result<f64> {
self.scratch.clear();
itoa::write(&mut self.scratch, significand).unwrap();
self.scratch.push(b'e');
itoa::write(&mut self.scratch, exponent).unwrap();
let f: f64 = unsafe { str::from_utf8_unchecked(&self.scratch) }
.parse()
.map_err(|_| self.error(ErrorCode::NumberOutOfRange))?;
if !pos {
return Ok(f * -1.0);
}
return Ok(f);
}
#[cfg(feature = "fast-float-parsing")]
fn f64_from_parts(&mut self, pos: bool, significand: u64, mut exponent: i32) -> Result<f64> {
let mut f = significand as f64;
loop {
match POW10.get(exponent.abs() as usize) {
Some(&pow) => {
if exponent >= 0 {
f *= pow;
if f.is_infinite() {
return Err(self.error(ErrorCode::NumberOutOfRange));
}
} else {
f /= pow;
}
break;
}
None => {
if f == 0.0 {
break;
}
if exponent >= 0 {
return Err(self.error(ErrorCode::NumberOutOfRange));
}
f /= 1e308;
exponent += 308;
}
}
}
Ok(if pos { f } else { -f })
}
fn end_seq(&mut self, close: u8) -> Result<()> {
match self.parse_whitespace()? {
Some(b) if b == close => {
self.eat_char();
Ok(())
}
Some(_) => Err(self.peek_error(ErrorCode::TrailingCharacters)),
None => Err(self.peek_error(ErrorCode::EofWhileParsingList)),
}
}
}
static SYMBOL_EXTENDED: [u8; 16] = [
b'!', b'$', b'%', b'&', b'*', b'.', b'/', b':', b'<', b'=', b'>', b'?', b'@', b'^', b'_', b'~',
];
fn is_delimiter(c: u8) -> bool {
c.is_ascii_whitespace() || b"|()\"".contains(&c)
}
fn is_sign_subsequent(c: u8) -> bool {
c.is_ascii_alphabetic() || b"!$%&*/:<=>?@^_~-+@".contains(&c)
}
#[cfg(feature = "fast-float-parsing")]
#[rustfmt::skip]
static POW10: [f64; 309] =
[1e000, 1e001, 1e002, 1e003, 1e004, 1e005, 1e006, 1e007, 1e008, 1e009,
1e010, 1e011, 1e012, 1e013, 1e014, 1e015, 1e016, 1e017, 1e018, 1e019,
1e020, 1e021, 1e022, 1e023, 1e024, 1e025, 1e026, 1e027, 1e028, 1e029,
1e030, 1e031, 1e032, 1e033, 1e034, 1e035, 1e036, 1e037, 1e038, 1e039,
1e040, 1e041, 1e042, 1e043, 1e044, 1e045, 1e046, 1e047, 1e048, 1e049,
1e050, 1e051, 1e052, 1e053, 1e054, 1e055, 1e056, 1e057, 1e058, 1e059,
1e060, 1e061, 1e062, 1e063, 1e064, 1e065, 1e066, 1e067, 1e068, 1e069,
1e070, 1e071, 1e072, 1e073, 1e074, 1e075, 1e076, 1e077, 1e078, 1e079,
1e080, 1e081, 1e082, 1e083, 1e084, 1e085, 1e086, 1e087, 1e088, 1e089,
1e090, 1e091, 1e092, 1e093, 1e094, 1e095, 1e096, 1e097, 1e098, 1e099,
1e100, 1e101, 1e102, 1e103, 1e104, 1e105, 1e106, 1e107, 1e108, 1e109,
1e110, 1e111, 1e112, 1e113, 1e114, 1e115, 1e116, 1e117, 1e118, 1e119,
1e120, 1e121, 1e122, 1e123, 1e124, 1e125, 1e126, 1e127, 1e128, 1e129,
1e130, 1e131, 1e132, 1e133, 1e134, 1e135, 1e136, 1e137, 1e138, 1e139,
1e140, 1e141, 1e142, 1e143, 1e144, 1e145, 1e146, 1e147, 1e148, 1e149,
1e150, 1e151, 1e152, 1e153, 1e154, 1e155, 1e156, 1e157, 1e158, 1e159,
1e160, 1e161, 1e162, 1e163, 1e164, 1e165, 1e166, 1e167, 1e168, 1e169,
1e170, 1e171, 1e172, 1e173, 1e174, 1e175, 1e176, 1e177, 1e178, 1e179,
1e180, 1e181, 1e182, 1e183, 1e184, 1e185, 1e186, 1e187, 1e188, 1e189,
1e190, 1e191, 1e192, 1e193, 1e194, 1e195, 1e196, 1e197, 1e198, 1e199,
1e200, 1e201, 1e202, 1e203, 1e204, 1e205, 1e206, 1e207, 1e208, 1e209,
1e210, 1e211, 1e212, 1e213, 1e214, 1e215, 1e216, 1e217, 1e218, 1e219,
1e220, 1e221, 1e222, 1e223, 1e224, 1e225, 1e226, 1e227, 1e228, 1e229,
1e230, 1e231, 1e232, 1e233, 1e234, 1e235, 1e236, 1e237, 1e238, 1e239,
1e240, 1e241, 1e242, 1e243, 1e244, 1e245, 1e246, 1e247, 1e248, 1e249,
1e250, 1e251, 1e252, 1e253, 1e254, 1e255, 1e256, 1e257, 1e258, 1e259,
1e260, 1e261, 1e262, 1e263, 1e264, 1e265, 1e266, 1e267, 1e268, 1e269,
1e270, 1e271, 1e272, 1e273, 1e274, 1e275, 1e276, 1e277, 1e278, 1e279,
1e280, 1e281, 1e282, 1e283, 1e284, 1e285, 1e286, 1e287, 1e288, 1e289,
1e290, 1e291, 1e292, 1e293, 1e294, 1e295, 1e296, 1e297, 1e298, 1e299,
1e300, 1e301, 1e302, 1e303, 1e304, 1e305, 1e306, 1e307, 1e308];
fn from_trait<'de, R>(read: R, options: Options) -> Result<Value>
where
R: Read<'de>,
{
let mut parser = Parser::with_options(read, options);
let value = parser.expect_value()?;
parser.expect_end()?;
Ok(value)
}
#[deprecated(
since = "0.2.5",
note = "Please use the `value_iter` method to obtain an iterator"
)]
impl<'de, R: Read<'de>> Iterator for Parser<R> {
type Item = Result<Value>;
fn next(&mut self) -> Option<Self::Item> {
self.value_iter().next()
}
}
pub fn from_reader_custom(rdr: impl io::Read, options: Options) -> Result<Value> {
from_trait(read::IoRead::new(rdr), options)
}
pub fn from_reader(rdr: impl io::Read) -> Result<Value> {
from_reader_custom(rdr, Options::default())
}
pub fn from_reader_elisp(rdr: impl io::Read) -> Result<Value> {
from_reader_custom(rdr, Options::elisp())
}
pub fn from_slice_custom(bytes: &[u8], options: Options) -> Result<Value> {
from_trait(read::SliceRead::new(bytes), options)
}
pub fn from_slice(bytes: &[u8]) -> Result<Value> {
from_slice_custom(bytes, Options::default())
}
pub fn from_slice_elisp(bytes: &[u8]) -> Result<Value> {
from_slice_custom(bytes, Options::elisp())
}
pub fn from_str_custom(s: &str, options: Options) -> Result<Value> {
from_trait(read::StrRead::new(s), options)
}
pub fn from_str(s: &str) -> Result<Value> {
from_str_custom(s, Options::default())
}
pub fn from_str_elisp(s: &str) -> Result<Value> {
from_str_custom(s, Options::elisp())
}
pub struct ValueIter<'a, R>(&'a mut Parser<R>);
impl<'a, 'b, R> Iterator for ValueIter<'a, R>
where
R: read::Read<'b>,
{
type Item = Result<Value>;
fn next(&mut self) -> Option<Self::Item> {
match self.0.next_value() {
Ok(Some(item)) => Some(Ok(item)),
Ok(None) => None,
Err(e) => Some(Err(e)),
}
}
}
pub struct DatumIter<'a, R>(&'a mut Parser<R>);
impl<'a, 'b, R> Iterator for DatumIter<'a, R>
where
R: read::Read<'b>,
{
type Item = Result<Datum>;
fn next(&mut self) -> Option<Self::Item> {
match self.0.next_datum() {
Ok(Some(item)) => Some(Ok(item)),
Ok(None) => None,
Err(e) => Some(Err(e)),
}
}
}
pub mod error;
mod iter;
pub(crate) mod read;
#[cfg(test)]
mod tests;