Skip to content
2 changes: 1 addition & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "chumsky"
version = "1.0.0-alpha.6"
version = "1.0.0-alpha.7"
description = "A parser library for humans with powerful error recovery"
authors = ["Joshua Barretto <joshua.s.barretto@gmail.com>", "Elijah Hartvigsen <elijah.reed@hartvigsen.xyz", "Jakob Wiesmore <runetynan@gmail.com>"]
repository = "https://github.com/zesterer/chumsky"
Expand Down
126 changes: 123 additions & 3 deletions benches/cbor.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,9 @@ mod utils;
static CBOR: &[u8] = include_bytes!("samples/sample.cbor");

fn bench_cbor(c: &mut Criterion) {
// c.bench_function("cbor_nom", {
// move |b| b.iter(|| black_box(nom::cbor(black_box(CBOR)).unwrap()))
// });
c.bench_function("cbor_nom", {
move |b| b.iter(|| black_box(nom::cbor(black_box(CBOR)).unwrap()))
});

// c.bench_function("cbor_winnow", {
// move |b| b.iter(|| black_box(winnow::cbor(black_box(JSON)).unwrap()))
Expand Down Expand Up @@ -87,6 +87,8 @@ pub enum CborZero<'a> {
Array(Vec<CborZero<'a>>),
Map(Vec<(CborZero<'a>, CborZero<'a>)>),
Tag(u64, Box<CborZero<'a>>),
// Byte(u8)
// HalfFloat(f16),
SingleFloat(f32),
DoubleFloat(f64),
}
Expand Down Expand Up @@ -222,3 +224,121 @@ mod chumsky_zero_copy {
})
}
}

mod nom {
use super::CborZero;
use nom::{
bits::{bits, bytes},
branch::alt,
bytes::complete::take as take_bytes,
combinator::{map, value as to, verify},
complete::{tag, take},
multi::count,
number::complete::{be_f32, be_f64},
sequence::{pair, preceded},
IResult,
};

fn integer(i: (&[u8], usize)) -> IResult<(&[u8], usize), u64> {
alt((
verify(take(5usize), |&v| v < 24),
preceded(tag(24, 5usize), take(8usize)),
preceded(tag(25, 5usize), take(16usize)),
preceded(tag(26, 5usize), take(32usize)),
preceded(tag(27, 5usize), take(64usize)),
))(i)
}

fn uint<'a>(i: &[u8]) -> IResult<&[u8], CborZero<'a>> {
bits(preceded(
tag(0, 3usize),
map(integer, |v| CborZero::Int(v.try_into().unwrap())),
))(i)
}

fn nint<'a>(i: &[u8]) -> IResult<&[u8], CborZero<'a>> {
bits(preceded(
tag(1, 3usize),
map(integer, |v| CborZero::Int(-1 - i64::try_from(v).unwrap())),
))(i)
}

fn bstr(i: &[u8]) -> IResult<&[u8], CborZero<'_>> {
// TODO: Handle indefinite length
let (i, length) = bits(preceded(tag(2, 3usize), integer))(i)?;
let length = usize::try_from(length).unwrap();
let (i, data) = take_bytes(length)(i)?;
Ok((i, CborZero::Bytes(data)))
}

fn str(i: &[u8]) -> IResult<&[u8], CborZero<'_>> {
// TODO: Handle indefinite length
let (i, length) = bits(preceded(tag(3, 3usize), integer))(i)?;
let length = usize::try_from(length).unwrap();
let (i, data) = take_bytes(length)(i)?;
Ok((i, CborZero::String(std::str::from_utf8(data).unwrap())))
}

fn array(i: &[u8]) -> IResult<&[u8], CborZero<'_>> {
// TODO: Handle indefinite length
let (i, length) = bits(preceded(tag(4, 3usize), integer))(i)?;
let (i, data) = count(value, length as usize)(i)?;
Ok((i, CborZero::Array(data)))
}

fn cbor_map(i: &[u8]) -> IResult<&[u8], CborZero<'_>> {
// TODO: Handle indefinite length
let (i, length) = bits(preceded(tag(5, 3usize), integer))(i)?;
let (i, data) = count(pair(value, value), length as usize)(i)?;
Ok((i, CborZero::Map(data)))
}

fn cbor_tag(i: &[u8]) -> IResult<&[u8], CborZero<'_>> {
let (i, tag) = bits(preceded(tag(6, 3usize), integer))(i)?;
let (i, value) = value(i)?;
Ok((i, CborZero::Tag(tag, Box::new(value))))
}

fn float_simple<'a>(i: &'a [u8]) -> IResult<&[u8], CborZero<'a>> {
bits(preceded(
tag(7, 3usize),
alt((
to(CborZero::Bool(false), tag(20, 5usize)),
to(CborZero::Bool(true), tag(21, 5usize)),
to(CborZero::Null, tag(22, 5usize)),
to(CborZero::Undef, tag(23, 5usize)),
// preceded(tag(24, 5usize), ...), // u8
// preceded(tag(25, 5usize), map(be_f16, |v| CborZero::HalfFloat(v))),
preceded(
tag(26, 5usize),
map(bytes(be_f32::<_, nom::error::Error<&'a [u8]>>), |v| {
CborZero::SingleFloat(v)
}),
),
preceded(
tag(27, 5usize),
map(bytes(be_f64::<_, nom::error::Error<&'a [u8]>>), |v| {
CborZero::DoubleFloat(v)
}),
),
)),
))(i)
}

fn value(i: &[u8]) -> IResult<&[u8], CborZero<'_>> {
alt((
uint,
nint,
bstr,
str,
array,
cbor_map,
cbor_tag,
float_simple,
))(i)
}

pub fn cbor(i: &[u8]) -> IResult<&[u8], CborZero<'_>> {
value(i)
}
}
3 changes: 2 additions & 1 deletion examples/foo.rs
Original file line number Diff line number Diff line change
Expand Up @@ -149,9 +149,10 @@ fn eval<'a>(
.zip(arg_names.iter())
.map(|(val, name)| Ok((*name, val?)))
.collect::<Result<_, String>>()?;
let old_vars = vars.len();
vars.append(&mut args);
let output = eval(body, vars, funcs);
vars.truncate(vars.len() - args.len());
vars.truncate(old_vars);
output
} else {
Err(format!(
Expand Down
12 changes: 6 additions & 6 deletions src/combinator.rs
Original file line number Diff line number Diff line change
Expand Up @@ -354,7 +354,7 @@ where
I: Input<'a>,
E: ParserExtra<'a, I>,
A: Parser<'a, I, OA, E>,
F: Fn(OA, &mut MapExtra<'a, '_, '_, I, E>) -> O,
F: Fn(OA, &mut MapExtra<'a, '_, I, E>) -> O,
{
#[inline(always)]
fn go<M: Mode>(&self, inp: &mut InputRef<'a, '_, I, E>) -> PResult<M, O> {
Expand All @@ -373,7 +373,7 @@ where
I: Input<'a>,
E: ParserExtra<'a, I>,
A: IterParser<'a, I, OA, E>,
F: Fn(OA, &mut MapExtra<'a, '_, '_, I, E>) -> O,
F: Fn(OA, &mut MapExtra<'a, '_, I, E>) -> O,
{
type IterState<M: Mode> = A::IterState<M>
where
Expand Down Expand Up @@ -579,7 +579,7 @@ where
I: Input<'a>,
E: ParserExtra<'a, I>,
A: Parser<'a, I, OA, E>,
F: Fn(OA, &mut MapExtra<'a, '_, '_, I, E>) -> Result<O, E::Error>,
F: Fn(OA, &mut MapExtra<'a, '_, I, E>) -> Result<O, E::Error>,
{
#[inline(always)]
fn go<M: Mode>(&self, inp: &mut InputRef<'a, '_, I, E>) -> PResult<M, O> {
Expand Down Expand Up @@ -2363,7 +2363,7 @@ where
A: IterParser<'a, I, OA, E>,
B: Parser<'a, I, O, E>,
E: ParserExtra<'a, I>,
F: Fn(OA, O, &mut MapExtra<'a, '_, '_, I, E>) -> O,
F: Fn(OA, O, &mut MapExtra<'a, '_, I, E>) -> O,
{
#[inline(always)]
fn go<M: Mode>(&self, inp: &mut InputRef<'a, '_, I, E>) -> PResult<M, O>
Expand Down Expand Up @@ -2498,7 +2498,7 @@ where
A: Parser<'a, I, O, E>,
B: IterParser<'a, I, OB, E>,
E: ParserExtra<'a, I>,
F: Fn(O, OB, &mut MapExtra<'a, '_, '_, I, E>) -> O,
F: Fn(O, OB, &mut MapExtra<'a, '_, I, E>) -> O,
{
#[inline(always)]
fn go<M: Mode>(&self, inp: &mut InputRef<'a, '_, I, E>) -> PResult<M, O>
Expand Down Expand Up @@ -2689,7 +2689,7 @@ where
I: Input<'a>,
E: ParserExtra<'a, I>,
A: Parser<'a, I, OA, E>,
F: Fn(OA, &mut MapExtra<'a, '_, '_, I, E>, &mut Emitter<E::Error>) -> U,
F: Fn(OA, &mut MapExtra<'a, '_, I, E>, &mut Emitter<E::Error>) -> U,
{
#[inline(always)]
fn go<M: Mode>(&self, inp: &mut InputRef<'a, '_, I, E>) -> PResult<M, U>
Expand Down
40 changes: 24 additions & 16 deletions src/input.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1513,28 +1513,36 @@ impl<E> Emitter<E> {
}

/// See [`Parser::map_with`].
/// Note: 'a is the lifetime of the Input, 'b is the lifetime of the embedded InputRef, 'parse if the lifetime of the parser and corresponding state.
pub struct MapExtra<'a, 'b, 'parse, I: Input<'a>, E: ParserExtra<'a, I>> {
before: Offset<'a, 'parse, I>,
inp: &'b mut InputRef<'a, 'parse, I, E>,
pub struct MapExtra<'a, 'b, I: Input<'a>, E: ParserExtra<'a, I>> {
before: I::Offset,
after: I::Offset,
inp: &'b I,
state: &'b mut E::State,
ctx: &'b E::Context,
}

impl<'a, 'b, 'parse, I: Input<'a>, E: ParserExtra<'a, I>> MapExtra<'a, 'b, 'parse, I, E> {
/// Create new MapExtra
/// SAFETY: `before` Offset must be from a previous call to inp.offset().
pub(crate) fn new(
impl<'a, 'b, I: Input<'a>, E: ParserExtra<'a, I>> MapExtra<'a, 'b, I, E> {
#[inline(always)]
pub(crate) fn new<'parse>(
before: Offset<'a, 'parse, I>,
inp: &'b mut InputRef<'a, 'parse, I, E>,
) -> Self {
MapExtra { before, inp }
Self {
before: before.offset,
after: inp.offset,
ctx: inp.ctx,
state: inp.state,
inp: inp.input,
}
}

/// Get the span corresponding to the output.
// SAFETY: The offsets both came from the same input
// TODO: Should this make `MapExtra::new` unsafe? Probably, but it's an internal API and we simply wouldn't
// ever abuse it in this way, even accidentally.
#[inline(always)]
pub fn span(&self) -> I::Span {
self.inp.span(self.before..self.inp.offset())
// SAFETY: The offsets both came from the same input
// TODO: Should this make `MapExtra::new` unsafe? Probably, but it's an internal API and we simply wouldn't
// ever abuse it in this way, even accidentally.
unsafe { self.inp.span(self.before..self.after) }
}

/// Get the slice corresponding to the output.
Expand All @@ -1543,18 +1551,18 @@ impl<'a, 'b, 'parse, I: Input<'a>, E: ParserExtra<'a, I>> MapExtra<'a, 'b, 'pars
where
I: SliceInput<'a>,
{
self.inp.slice(self.before..self.inp.offset())
self.inp.slice(self.before..self.after)
}

/// Get the parser state.
#[inline(always)]
pub fn state(&mut self) -> &mut E::State {
self.inp.state()
self.state
}

/// Get the current parser context.
#[inline(always)]
pub fn ctx(&self) -> &E::Context {
self.inp.ctx()
self.ctx
}
}
44 changes: 6 additions & 38 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,13 +3,7 @@
#![cfg_attr(feature = "nightly", allow(internal_features))]
#![cfg_attr(
feature = "nightly",
feature(
never_type,
fn_traits,
tuple_trait,
unboxed_closures,
diagnostic_namespace
)
feature(never_type, fn_traits, tuple_trait, unboxed_closures)
)]
//
// README.md links these files via the main branch. For docs.rs we however want to link them
Expand Down Expand Up @@ -617,10 +611,7 @@ pub trait Parser<'a, I: Input<'a>, O, E: ParserExtra<'a, I> = extra::Default>:
/// assert_eq!(palindrome_parser().parse("hello olleh").into_result().as_deref(), Ok(" olleh"));
/// assert!(palindrome_parser().parse("abccb").into_result().is_err());
/// ```
fn map_with<U, F: Fn(O, &mut MapExtra<'a, '_, '_, I, E>) -> U>(
self,
f: F,
) -> MapWith<Self, O, F>
fn map_with<U, F: Fn(O, &mut MapExtra<'a, '_, I, E>) -> U>(self, f: F) -> MapWith<Self, O, F>
where
Self: Sized,
{
Expand Down Expand Up @@ -774,7 +765,7 @@ pub trait Parser<'a, I: Input<'a>, O, E: ParserExtra<'a, I> = extra::Default>:
/// [`Parser::validate`] instead.
///
/// The output type of this parser is `U`, the [`Ok`] return value of the function.
fn try_map_with<U, F: Fn(O, &mut MapExtra<'a, '_, '_, I, E>) -> Result<U, E::Error>>(
fn try_map_with<U, F: Fn(O, &mut MapExtra<'a, '_, I, E>) -> Result<U, E::Error>>(
self,
f: F,
) -> TryMapWith<Self, O, F>
Expand Down Expand Up @@ -1633,7 +1624,7 @@ pub trait Parser<'a, I: Input<'a>, O, E: ParserExtra<'a, I> = extra::Default>:
#[cfg_attr(debug_assertions, track_caller)]
fn foldl_with<B, F, OB>(self, other: B, f: F) -> FoldlWith<F, Self, B, OB, E>
where
F: Fn(O, OB, &mut MapExtra<'a, '_, '_, I, E>) -> O,
F: Fn(O, OB, &mut MapExtra<'a, '_, I, E>) -> O,
B: IterParser<'a, I, OB, E>,
Self: Sized,
{
Expand Down Expand Up @@ -1949,7 +1940,7 @@ pub trait Parser<'a, I: Input<'a>, O, E: ParserExtra<'a, I> = extra::Default>:
fn validate<U, F>(self, f: F) -> Validate<Self, O, F>
where
Self: Sized,
F: Fn(O, &mut MapExtra<'a, '_, '_, I, E>, &mut Emitter<E::Error>) -> U,
F: Fn(O, &mut MapExtra<'a, '_, I, E>, &mut Emitter<E::Error>) -> U,
{
Validate {
parser: self,
Expand Down Expand Up @@ -2502,7 +2493,7 @@ where
#[cfg_attr(debug_assertions, track_caller)]
fn foldr_with<B, F, OA>(self, other: B, f: F) -> FoldrWith<F, Self, B, O, E>
where
F: Fn(O, OA, &mut MapExtra<'a, '_, '_, I, E>) -> OA,
F: Fn(O, OA, &mut MapExtra<'a, '_, I, E>) -> OA,
B: Parser<'a, I, OA, E>,
Self: Sized,
{
Expand Down Expand Up @@ -3245,29 +3236,6 @@ mod tests {
// TODO what about IterConfigure and TryIterConfigure?
}

#[test]
#[should_panic]
fn recursive_define_twice() {
let mut expr = Recursive::declare();
expr.define({
let atom = any::<&str, extra::Default>()
.filter(|c: &char| c.is_alphabetic())
.repeated()
.at_least(1)
.collect();
let sum = expr
.clone()
.then_ignore(just('+'))
.then(expr.clone())
.map(|(a, b)| format!("{}{}", a, b));

sum.or(atom)
});
expr.define(expr.clone());

expr.then_ignore(end()).parse("a+b+c");
}

#[test]
#[should_panic]
fn todo_err() {
Expand Down
Loading