AFLplusplus
diff --git a/‎.github/workflows/build_and_test.yml
-14 b/‎.github/workflows/build_and_test.yml
-14
diff --git a/‎README.md
-8 b/‎README.md
-8
diff --git a/‎fuzzers/baby_fuzzer_nautilus/rust-toolchain
-1 b/‎fuzzers/baby_fuzzer_nautilus/rust-toolchain
-1
diff --git a/‎libafl/Cargo.toml
+5-11 b/‎libafl/Cargo.toml
+5-11
diff --git a/‎libafl/build.rs
+1-6 b/‎libafl/build.rs
+1-6
diff --git a/‎libafl/src/common/mod.rs
+5 b/‎libafl/src/common/mod.rs
+5
diff --git a/‎libafl/src/common/nautilus/README.md
+72 b/‎libafl/src/common/nautilus/README.md
+72
diff --git a/‎libafl/src/common/nautilus/grammartec/chunkstore.rs
+154 b/‎libafl/src/common/nautilus/grammartec/chunkstore.rs
+154
@@ -189,19 +189,6 @@ jobs:
     # Fix me plz
     # - name: Test Build libafl_libfuzzer with embed
     #  run: cargo +nightly test --features=embed-runtime --manifest-path libafl_libfuzzer/Cargo.toml
-  ubuntu-check-nightly:
-    runs-on: ubuntu-22.04
-    needs: ubuntu
-    steps:
-      - uses: actions/checkout@v3
-      - uses: ./.github/workflows/ubuntu-prepare
-      - uses: Swatinem/rust-cache@v2
-        with: { shared-key: "ubuntu" }
-      # ---- build and feature check ----
-      # cargo-hack's --feature-powerset would be nice here but libafl has a too many knobs
-      - name: Check nightly features
-        run: cargo +nightly check --features=agpl && cargo +nightly check --features=nautilus
-
 
   ubuntu-check:
     runs-on: ubuntu-22.04
@@ -218,7 +205,6 @@ jobs:
       # cargo-hack's --feature-powerset would be nice here but libafl has a too many knobs
       - name: Check each feature
         # Skipping `python` as it has to be built with the `maturin` tool
-        # `agpl`, `nautilus` require nightly
         # `sancov_pcguard_edges` is tested seperatelyc
         run: python3 ./scripts/parallellize_cargo_check.py ${{ matrix.instance_idx }}
 
 
@@ -149,11 +149,3 @@ Unless you explicitly state otherwise, any contribution intentionally submitted
 for inclusion in this crate by you, as defined in the Apache-2.0 license, shall
 be dual licensed as above, without any additional terms or conditions.
 </sub>
-
-<br>
-
-<sub>
-Dependencies under more restrictive licenses, such as GPL or AGPL, can be enabled
-using the respective feature in each crate when it is present, such as the
-'agpl' feature of the libafl crate.
-</sub>
@@ -130,13 +130,8 @@ llmp_debug = ["std", "libafl_bolts/llmp_debug"]
 ## Reduces the initial map size for llmp
 llmp_small_maps = ["libafl_bolts/llmp_small_maps"] # reduces initial map size for llmp
 
-#! ## License-Changing Dependencies(!)
-
-## Enables all features hiding dependencies licensed under `AGPL`
-agpl = ["nautilus"]
-
-## Enables the [`Nautilus`](https://wcventure.github.io/FuzzingPaper/Paper/NDSS19_Nautilus.pdf) Grammar Mutator (AGPL-licensed)
-nautilus = ["grammartec", "std", "serde_json/std"]
+## Grammar mutator. Requires nightly.
+nautilus = ["std", "serde_json/std", "pyo3", "rand_trait", "regex-syntax"]
 
 [build-dependencies]
 rustversion = "1.0"
@@ -193,16 +188,15 @@ arrayvec = { version = "0.7.4", optional = true, default-features = false } # us
 const_format = "0.2.32" # used for providing helpful compiler output
 const_panic = "0.2.8" # similarly, for formatting const panic output
 
+pyo3 = { version = "0.18.3", optional = true } # For nautilus
+regex-syntax = { version = "0.8.3", optional = true } # For nautilus
+
 # optional-dev deps (change when target.'cfg(accessible(::std))'.test-dependencies will be stable)
 serial_test = { version = "3", optional = true, default-features = false, features = ["logging"] }
 
 # Document all features of this crate (for `cargo doc`)
 document-features = { version = "0.2", optional = true }
 
-# AGPL
-# !!! this create requires nightly
-grammartec = { version = "0.3.1", optional = true }
-
 [target.'cfg(unix)'.dependencies]
 libc = "0.2" # For (*nix) libc
 
 
@@ -6,12 +6,7 @@ fn nightly() {
 }
 
 #[rustversion::not(nightly)]
-fn nightly() {
-    assert!(
-        cfg!(all(not(docrs), not(feature = "nautilus"))),
-        "The 'nautilus' feature of libafl requires a nightly compiler"
-    );
-}
+fn nightly() {}
 
 fn main() {
     println!("cargo:rustc-check-cfg=cfg(nightly)");
 
@@ -1,8 +1,13 @@
 //! This module defines trait shared across different `LibAFL` modules
 
+#![allow(unused, missing_docs)]
+
 use alloc::boxed::Box;
 use core::any::type_name;
 
+#[cfg(feature = "nautilus")]
+pub mod nautilus;
+
 use libafl_bolts::{
     serdeany::{NamedSerdeAnyMap, SerdeAny, SerdeAnyMap},
     Error,
 
@@ -0,0 +1,72 @@
+# Nautilus 2.0 LibAFL Mutator
+
+Nautilus is a coverage guided, grammar-based mutator. You can use it to improve your test coverage and find more bugs. By specifying the grammar of semi-valid inputs, Nautilus is able to perform complex mutation and to uncover more interesting test cases. Many of the ideas behind the original fuzzer are documented in a paper published at NDSS 2019.
+
+<p>
+<a href="https://www.syssec.ruhr-uni-bochum.de/media/emma/veroeffentlichungen/2018/12/17/NDSS19-Nautilus.pdf"> <img align="right" width="200"  src="https://github.com/RUB-SysSec/nautilus/raw/master/paper.png"> </a>
+</p>
+
+Version 2.0 has added many improvements to this early prototype.
+Features from version 2.0 we support in LibAFL:
+
+* Support for grammars specified in python
+* Support for non-context free grammars using python scripts to generate inputs from the structure
+* Support for specifying binary protocols/formats
+* Support for specifying regex based terminals that aren't part of the directed mutations
+* Better ability to avoid generating the same very short inputs over and over
+* Helpful error output on invalid grammars
+
+## How Does Nautilus Work?
+
+You specify a grammar using rules such as `EXPR -> EXPR + EXPR` or `EXPR -> NUM` and `NUM -> 1`. From these rules, the fuzzer constructs a tree. This internal representation allows to apply much more complex mutations than raw bytes. This tree is then turned into a real input for the target application. In normal Context Free Grammars, this process is straightforward: all leaves are concatenated. The left tree in the example below would unparse to the input `a=1+2` and the right one to `a=1+1+1+2`. To increase the expressiveness of your grammars, using Nautilus you are able to provide python functions for the unparsing process to allow much more complex specifications.
+
+<p align="center">
+<img width="400" align="center" src="https://github.com/RUB-SysSec/nautilus/raw/master/tree.png">
+</p>
+
+## Examples
+
+Here, we use python to generate a grammar for valid XML-like inputs. Notice the use of a script rule to ensure the opening
+and closing tags match.
+
+```python
+#ctx.rule(NONTERM: string, RHS: string|bytes) adds a rule NONTERM->RHS. We can use {NONTERM} in the RHS to request a recursion. 
+ctx.rule("START","<document>{XML_CONTENT}</document>")
+ctx.rule("XML_CONTENT","{XML}{XML_CONTENT}")
+ctx.rule("XML_CONTENT","")
+
+#ctx.script(NONTERM:string, RHS: [string]], func) adds a rule NONTERM->func(*RHS). 
+# In contrast to normal `rule`, RHS is an array of nonterminals. 
+# It's up to the function to combine the values returned for the NONTERMINALS with any fixed content used.
+ctx.script("XML",["TAG","ATTR","XML_CONTENT"], lambda tag,attr,body: b"<%s %s>%s</%s>"%(tag,attr,body,tag) )
+ctx.rule("ATTR","foo=bar")
+ctx.rule("TAG","some_tag")
+ctx.rule("TAG","other_tag")
+
+#sometimes we don't want to explore the set of possible inputs in more detail. For example, if we fuzz a script
+#interpreter, we don't want to spend time on fuzzing all different variable names. In such cases we can use Regex
+#terminals. Regex terminals are only mutated during generation, but not during normal mutation stages, saving a lot of time. 
+#The fuzzer still explores different values for the regex, but it won't be able to learn interesting values incrementally. 
+#Use this when incremantal exploration would most likely waste time.
+
+ctx.regex("TAG","[a-z]+")
+```
+
+To test your [grammars](https://github.com/nautilus-fuzz/nautilus/tree/mit-main/grammars) you can use the generator:
+
+```sh
+$ cargo run --bin generator -- -g grammars/grammar_py_exmaple.py -t 100 
+<document><some_tag foo=bar><other_tag foo=bar><other_tag foo=bar><some_tag foo=bar></some_tag></other_tag><some_tag foo=bar><other_tag foo=bar></other_tag></some_tag><other_tag foo=bar></other_tag><some_tag foo=bar></some_tag></other_tag><other_tag foo=bar></other_tag><some_tag foo=bar></some_tag></some_tag></document>
+```
+
+## Trophies
+
+* <https://github.com/Microsoft/ChakraCore/issues/5503>
+* <https://github.com/mruby/mruby/issues/3995>  (**CVE-2018-10191**)
+* <https://github.com/mruby/mruby/issues/4001>  (**CVE-2018-10199**)
+* <https://github.com/mruby/mruby/issues/4038>  (**CVE-2018-12248**)
+* <https://github.com/mruby/mruby/issues/4027>  (**CVE-2018-11743**)
+* <https://github.com/mruby/mruby/issues/4036>  (**CVE-2018-12247**)
+* <https://github.com/mruby/mruby/issues/4037>  (**CVE-2018-12249**)
+* <https://bugs.php.net/bug.php?id=76410>
+* <https://bugs.php.net/bug.php?id=76244>
@@ -0,0 +1,154 @@
+use alloc::{string::String, vec::Vec};
+use std::{
+    fs::File,
+    io::Write,
+    sync::{atomic::AtomicBool, RwLock},
+};
+
+use hashbrown::{HashMap, HashSet};
+use libafl_bolts::rands::Rand;
+use serde::{Deserialize, Serialize};
+
+use super::{
+    context::Context,
+    newtypes::{NTermId, NodeId, RuleId},
+    rule::RuleIdOrCustom,
+    tree::{Tree, TreeLike},
+};
+
+#[derive(Debug)]
+pub struct ChunkStoreWrapper {
+    pub chunkstore: RwLock<ChunkStore>,
+    pub is_locked: AtomicBool,
+}
+impl ChunkStoreWrapper {
+    #[must_use]
+    pub fn new(work_dir: String) -> Self {
+        ChunkStoreWrapper {
+            chunkstore: RwLock::new(ChunkStore::new(work_dir)),
+            is_locked: AtomicBool::new(false),
+        }
+    }
+}
+
+#[derive(Debug, Serialize, Deserialize)]
+pub struct ChunkStore {
+    nts_to_chunks: HashMap<NTermId, Vec<(usize, NodeId)>>,
+    seen_outputs: HashSet<Vec<u8>>,
+    trees: Vec<Tree>,
+    work_dir: String,
+    number_of_chunks: usize,
+}
+
+impl ChunkStore {
+    #[must_use]
+    pub fn new(work_dir: String) -> Self {
+        ChunkStore {
+            nts_to_chunks: HashMap::new(),
+            seen_outputs: HashSet::new(),
+            trees: vec![],
+            work_dir,
+            number_of_chunks: 0,
+        }
+    }
+
+    pub fn add_tree(&mut self, tree: Tree, ctx: &Context) {
+        let mut buffer = vec![];
+        let id = self.trees.len();
+        let mut contains_new_chunk = false;
+        for i in 0..tree.size() {
+            buffer.truncate(0);
+            if tree.sizes[i] > 30 {
+                continue;
+            }
+            let n = NodeId::from(i);
+            tree.unparse(n, ctx, &mut buffer);
+            if !self.seen_outputs.contains(&buffer) {
+                self.seen_outputs.insert(buffer.clone());
+                self.nts_to_chunks
+                    .entry(tree.get_rule(n, ctx).nonterm())
+                    .or_insert_with(Vec::new)
+                    .push((id, n));
+                let mut file = File::create(format!(
+                    "{}/outputs/chunks/chunk_{:09}",
+                    self.work_dir, self.number_of_chunks
+                ))
+                .expect("RAND_596689790");
+                self.number_of_chunks += 1;
+                file.write_all(&buffer).expect("RAND_606896756");
+                contains_new_chunk = true;
+            }
+        }
+        if contains_new_chunk {
+            self.trees.push(tree);
+        }
+    }
+
+    pub fn get_alternative_to<R: Rand>(
+        &self,
+        rand: &mut R,
+        r: RuleId,
+        ctx: &Context,
+    ) -> Option<(&Tree, NodeId)> {
+        let chunks = self
+            .nts_to_chunks
+            .get(&ctx.get_nt(&RuleIdOrCustom::Rule(r)));
+        let relevant = chunks.map(|vec| {
+            vec.iter()
+                .filter(move |&&(tid, nid)| self.trees[tid].get_rule_id(nid) != r)
+        });
+        //The unwrap_or is just a quick and dirty fix to catch Errors from the sampler
+        let selected = relevant.and_then(|iter| rand.choose(iter));
+        selected.map(|&(tid, nid)| (&self.trees[tid], nid))
+    }
+
+    #[must_use]
+    pub fn trees(&self) -> usize {
+        self.trees.len()
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use alloc::string::ToString;
+    use std::fs;
+
+    use libafl_bolts::rands::StdRand;
+
+    use crate::common::nautilus::grammartec::{
+        chunkstore::ChunkStore, context::Context, tree::TreeLike,
+    };
+
+    #[test]
+    fn chunk_store() {
+        let mut rand = StdRand::new();
+        let mut ctx = Context::new();
+        let r1 = ctx.add_rule("A", b"a {B:a}");
+        let r2 = ctx.add_rule("B", b"b {C:a}");
+        let _ = ctx.add_rule("C", b"c");
+        ctx.initialize(101);
+        let random_size = ctx.get_random_len_for_ruleid(&r1);
+        println!("random_size: {random_size}");
+        let tree = ctx.generate_tree_from_rule(&mut rand, r1, random_size);
+        fs::create_dir_all("/tmp/outputs/chunks").expect("40234068");
+        let mut cks = ChunkStore::new("/tmp/".to_string());
+        cks.add_tree(tree, &ctx);
+        // assert!(cks.seen_outputs.contains("a b c".as_bytes()));
+        // assert!(cks.seen_outputs.contains("b c".as_bytes()));
+        // assert!(cks.seen_outputs.contains("c".as_bytes()));
+        assert_eq!(cks.nts_to_chunks[&ctx.nt_id("A")].len(), 1);
+        let (tree_id, _) = cks.nts_to_chunks[&ctx.nt_id("A")][0];
+        assert_eq!(cks.trees[tree_id].unparse_to_vec(&ctx), "a b c".as_bytes());
+
+        let random_size = ctx.get_random_len_for_ruleid(&r2);
+        let tree = ctx.generate_tree_from_rule(&mut rand, r2, random_size);
+        cks.add_tree(tree, &ctx);
+        // assert_eq!(cks.seen_outputs.len(), 3);
+        // assert_eq!(cks.nts_to_chunks[&ctx.nt_id("B")].len(), 1);
+        let (tree_id, node_id) = cks.nts_to_chunks[&ctx.nt_id("B")][0];
+        assert_eq!(
+            cks.trees[tree_id].unparse_node_to_vec(node_id, &ctx),
+            "b c".as_bytes()
+        );
+    }
+}