Skip to content

Commit 58c3968

Browse files
authored
Move Nautilus to LibAFL, remove AGPL dependencies (#2265)
* Copy choose method for unbounded iterators * Add choose method for unbounded iterators * Copy&paste in nautilus grammartec * cargo * fmt * Initial Nautilus in LibAFL * missing link * clippy * clippy * more clippy * docs * docs * more docs * remove nautilus default * fix doctest * fmt * less vec * test * less flakey * clippy * clippy
1 parent 0f9c82f commit 58c3968

File tree

33 files changed

+2971
-109
lines changed

33 files changed

+2971
-109
lines changed

.github/workflows/build_and_test.yml

-14
Original file line numberDiff line numberDiff line change
@@ -189,19 +189,6 @@ jobs:
189189
# Fix me plz
190190
# - name: Test Build libafl_libfuzzer with embed
191191
# run: cargo +nightly test --features=embed-runtime --manifest-path libafl_libfuzzer/Cargo.toml
192-
ubuntu-check-nightly:
193-
runs-on: ubuntu-22.04
194-
needs: ubuntu
195-
steps:
196-
- uses: actions/checkout@v3
197-
- uses: ./.github/workflows/ubuntu-prepare
198-
- uses: Swatinem/rust-cache@v2
199-
with: { shared-key: "ubuntu" }
200-
# ---- build and feature check ----
201-
# cargo-hack's --feature-powerset would be nice here but libafl has a too many knobs
202-
- name: Check nightly features
203-
run: cargo +nightly check --features=agpl && cargo +nightly check --features=nautilus
204-
205192

206193
ubuntu-check:
207194
runs-on: ubuntu-22.04
@@ -218,7 +205,6 @@ jobs:
218205
# cargo-hack's --feature-powerset would be nice here but libafl has a too many knobs
219206
- name: Check each feature
220207
# Skipping `python` as it has to be built with the `maturin` tool
221-
# `agpl`, `nautilus` require nightly
222208
# `sancov_pcguard_edges` is tested seperatelyc
223209
run: python3 ./scripts/parallellize_cargo_check.py ${{ matrix.instance_idx }}
224210

README.md

-8
Original file line numberDiff line numberDiff line change
@@ -149,11 +149,3 @@ Unless you explicitly state otherwise, any contribution intentionally submitted
149149
for inclusion in this crate by you, as defined in the Apache-2.0 license, shall
150150
be dual licensed as above, without any additional terms or conditions.
151151
</sub>
152-
153-
<br>
154-
155-
<sub>
156-
Dependencies under more restrictive licenses, such as GPL or AGPL, can be enabled
157-
using the respective feature in each crate when it is present, such as the
158-
'agpl' feature of the libafl crate.
159-
</sub>

fuzzers/baby_fuzzer_nautilus/rust-toolchain

-1
This file was deleted.

libafl/Cargo.toml

+5-11
Original file line numberDiff line numberDiff line change
@@ -130,13 +130,8 @@ llmp_debug = ["std", "libafl_bolts/llmp_debug"]
130130
## Reduces the initial map size for llmp
131131
llmp_small_maps = ["libafl_bolts/llmp_small_maps"] # reduces initial map size for llmp
132132

133-
#! ## License-Changing Dependencies(!)
134-
135-
## Enables all features hiding dependencies licensed under `AGPL`
136-
agpl = ["nautilus"]
137-
138-
## Enables the [`Nautilus`](https://wcventure.github.io/FuzzingPaper/Paper/NDSS19_Nautilus.pdf) Grammar Mutator (AGPL-licensed)
139-
nautilus = ["grammartec", "std", "serde_json/std"]
133+
## Grammar mutator. Requires nightly.
134+
nautilus = ["std", "serde_json/std", "pyo3", "rand_trait", "regex-syntax"]
140135

141136
[build-dependencies]
142137
rustversion = "1.0"
@@ -193,16 +188,15 @@ arrayvec = { version = "0.7.4", optional = true, default-features = false } # us
193188
const_format = "0.2.32" # used for providing helpful compiler output
194189
const_panic = "0.2.8" # similarly, for formatting const panic output
195190

191+
pyo3 = { version = "0.18.3", optional = true } # For nautilus
192+
regex-syntax = { version = "0.8.3", optional = true } # For nautilus
193+
196194
# optional-dev deps (change when target.'cfg(accessible(::std))'.test-dependencies will be stable)
197195
serial_test = { version = "3", optional = true, default-features = false, features = ["logging"] }
198196

199197
# Document all features of this crate (for `cargo doc`)
200198
document-features = { version = "0.2", optional = true }
201199

202-
# AGPL
203-
# !!! this create requires nightly
204-
grammartec = { version = "0.3.1", optional = true }
205-
206200
[target.'cfg(unix)'.dependencies]
207201
libc = "0.2" # For (*nix) libc
208202

libafl/build.rs

+1-6
Original file line numberDiff line numberDiff line change
@@ -6,12 +6,7 @@ fn nightly() {
66
}
77

88
#[rustversion::not(nightly)]
9-
fn nightly() {
10-
assert!(
11-
cfg!(all(not(docrs), not(feature = "nautilus"))),
12-
"The 'nautilus' feature of libafl requires a nightly compiler"
13-
);
14-
}
9+
fn nightly() {}
1510

1611
fn main() {
1712
println!("cargo:rustc-check-cfg=cfg(nightly)");

libafl/src/common/mod.rs

+5
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,13 @@
11
//! This module defines trait shared across different `LibAFL` modules
22
3+
#![allow(unused, missing_docs)]
4+
35
use alloc::boxed::Box;
46
use core::any::type_name;
57

8+
#[cfg(feature = "nautilus")]
9+
pub mod nautilus;
10+
611
use libafl_bolts::{
712
serdeany::{NamedSerdeAnyMap, SerdeAny, SerdeAnyMap},
813
Error,

libafl/src/common/nautilus/README.md

+72
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,72 @@
1+
# Nautilus 2.0 LibAFL Mutator
2+
3+
Nautilus is a coverage guided, grammar-based mutator. You can use it to improve your test coverage and find more bugs. By specifying the grammar of semi-valid inputs, Nautilus is able to perform complex mutation and to uncover more interesting test cases. Many of the ideas behind the original fuzzer are documented in a paper published at NDSS 2019.
4+
5+
<p>
6+
<a href="https://www.syssec.ruhr-uni-bochum.de/media/emma/veroeffentlichungen/2018/12/17/NDSS19-Nautilus.pdf"> <img align="right" width="200" src="https://github.com/RUB-SysSec/nautilus/raw/master/paper.png"> </a>
7+
</p>
8+
9+
Version 2.0 has added many improvements to this early prototype.
10+
Features from version 2.0 we support in LibAFL:
11+
12+
* Support for grammars specified in python
13+
* Support for non-context free grammars using python scripts to generate inputs from the structure
14+
* Support for specifying binary protocols/formats
15+
* Support for specifying regex based terminals that aren't part of the directed mutations
16+
* Better ability to avoid generating the same very short inputs over and over
17+
* Helpful error output on invalid grammars
18+
19+
## How Does Nautilus Work?
20+
21+
You specify a grammar using rules such as `EXPR -> EXPR + EXPR` or `EXPR -> NUM` and `NUM -> 1`. From these rules, the fuzzer constructs a tree. This internal representation allows to apply much more complex mutations than raw bytes. This tree is then turned into a real input for the target application. In normal Context Free Grammars, this process is straightforward: all leaves are concatenated. The left tree in the example below would unparse to the input `a=1+2` and the right one to `a=1+1+1+2`. To increase the expressiveness of your grammars, using Nautilus you are able to provide python functions for the unparsing process to allow much more complex specifications.
22+
23+
<p align="center">
24+
<img width="400" align="center" src="https://github.com/RUB-SysSec/nautilus/raw/master/tree.png">
25+
</p>
26+
27+
## Examples
28+
29+
Here, we use python to generate a grammar for valid XML-like inputs. Notice the use of a script rule to ensure the opening
30+
and closing tags match.
31+
32+
```python
33+
#ctx.rule(NONTERM: string, RHS: string|bytes) adds a rule NONTERM->RHS. We can use {NONTERM} in the RHS to request a recursion.
34+
ctx.rule("START","<document>{XML_CONTENT}</document>")
35+
ctx.rule("XML_CONTENT","{XML}{XML_CONTENT}")
36+
ctx.rule("XML_CONTENT","")
37+
38+
#ctx.script(NONTERM:string, RHS: [string]], func) adds a rule NONTERM->func(*RHS).
39+
# In contrast to normal `rule`, RHS is an array of nonterminals.
40+
# It's up to the function to combine the values returned for the NONTERMINALS with any fixed content used.
41+
ctx.script("XML",["TAG","ATTR","XML_CONTENT"], lambda tag,attr,body: b"<%s %s>%s</%s>"%(tag,attr,body,tag) )
42+
ctx.rule("ATTR","foo=bar")
43+
ctx.rule("TAG","some_tag")
44+
ctx.rule("TAG","other_tag")
45+
46+
#sometimes we don't want to explore the set of possible inputs in more detail. For example, if we fuzz a script
47+
#interpreter, we don't want to spend time on fuzzing all different variable names. In such cases we can use Regex
48+
#terminals. Regex terminals are only mutated during generation, but not during normal mutation stages, saving a lot of time.
49+
#The fuzzer still explores different values for the regex, but it won't be able to learn interesting values incrementally.
50+
#Use this when incremantal exploration would most likely waste time.
51+
52+
ctx.regex("TAG","[a-z]+")
53+
```
54+
55+
To test your [grammars](https://github.com/nautilus-fuzz/nautilus/tree/mit-main/grammars) you can use the generator:
56+
57+
```sh
58+
$ cargo run --bin generator -- -g grammars/grammar_py_exmaple.py -t 100
59+
<document><some_tag foo=bar><other_tag foo=bar><other_tag foo=bar><some_tag foo=bar></some_tag></other_tag><some_tag foo=bar><other_tag foo=bar></other_tag></some_tag><other_tag foo=bar></other_tag><some_tag foo=bar></some_tag></other_tag><other_tag foo=bar></other_tag><some_tag foo=bar></some_tag></some_tag></document>
60+
```
61+
62+
## Trophies
63+
64+
* <https://github.com/Microsoft/ChakraCore/issues/5503>
65+
* <https://github.com/mruby/mruby/issues/3995> (**CVE-2018-10191**)
66+
* <https://github.com/mruby/mruby/issues/4001> (**CVE-2018-10199**)
67+
* <https://github.com/mruby/mruby/issues/4038> (**CVE-2018-12248**)
68+
* <https://github.com/mruby/mruby/issues/4027> (**CVE-2018-11743**)
69+
* <https://github.com/mruby/mruby/issues/4036> (**CVE-2018-12247**)
70+
* <https://github.com/mruby/mruby/issues/4037> (**CVE-2018-12249**)
71+
* <https://bugs.php.net/bug.php?id=76410>
72+
* <https://bugs.php.net/bug.php?id=76244>
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,154 @@
1+
use alloc::{string::String, vec::Vec};
2+
use std::{
3+
fs::File,
4+
io::Write,
5+
sync::{atomic::AtomicBool, RwLock},
6+
};
7+
8+
use hashbrown::{HashMap, HashSet};
9+
use libafl_bolts::rands::Rand;
10+
use serde::{Deserialize, Serialize};
11+
12+
use super::{
13+
context::Context,
14+
newtypes::{NTermId, NodeId, RuleId},
15+
rule::RuleIdOrCustom,
16+
tree::{Tree, TreeLike},
17+
};
18+
19+
#[derive(Debug)]
20+
pub struct ChunkStoreWrapper {
21+
pub chunkstore: RwLock<ChunkStore>,
22+
pub is_locked: AtomicBool,
23+
}
24+
impl ChunkStoreWrapper {
25+
#[must_use]
26+
pub fn new(work_dir: String) -> Self {
27+
ChunkStoreWrapper {
28+
chunkstore: RwLock::new(ChunkStore::new(work_dir)),
29+
is_locked: AtomicBool::new(false),
30+
}
31+
}
32+
}
33+
34+
#[derive(Debug, Serialize, Deserialize)]
35+
pub struct ChunkStore {
36+
nts_to_chunks: HashMap<NTermId, Vec<(usize, NodeId)>>,
37+
seen_outputs: HashSet<Vec<u8>>,
38+
trees: Vec<Tree>,
39+
work_dir: String,
40+
number_of_chunks: usize,
41+
}
42+
43+
impl ChunkStore {
44+
#[must_use]
45+
pub fn new(work_dir: String) -> Self {
46+
ChunkStore {
47+
nts_to_chunks: HashMap::new(),
48+
seen_outputs: HashSet::new(),
49+
trees: vec![],
50+
work_dir,
51+
number_of_chunks: 0,
52+
}
53+
}
54+
55+
pub fn add_tree(&mut self, tree: Tree, ctx: &Context) {
56+
let mut buffer = vec![];
57+
let id = self.trees.len();
58+
let mut contains_new_chunk = false;
59+
for i in 0..tree.size() {
60+
buffer.truncate(0);
61+
if tree.sizes[i] > 30 {
62+
continue;
63+
}
64+
let n = NodeId::from(i);
65+
tree.unparse(n, ctx, &mut buffer);
66+
if !self.seen_outputs.contains(&buffer) {
67+
self.seen_outputs.insert(buffer.clone());
68+
self.nts_to_chunks
69+
.entry(tree.get_rule(n, ctx).nonterm())
70+
.or_insert_with(Vec::new)
71+
.push((id, n));
72+
let mut file = File::create(format!(
73+
"{}/outputs/chunks/chunk_{:09}",
74+
self.work_dir, self.number_of_chunks
75+
))
76+
.expect("RAND_596689790");
77+
self.number_of_chunks += 1;
78+
file.write_all(&buffer).expect("RAND_606896756");
79+
contains_new_chunk = true;
80+
}
81+
}
82+
if contains_new_chunk {
83+
self.trees.push(tree);
84+
}
85+
}
86+
87+
pub fn get_alternative_to<R: Rand>(
88+
&self,
89+
rand: &mut R,
90+
r: RuleId,
91+
ctx: &Context,
92+
) -> Option<(&Tree, NodeId)> {
93+
let chunks = self
94+
.nts_to_chunks
95+
.get(&ctx.get_nt(&RuleIdOrCustom::Rule(r)));
96+
let relevant = chunks.map(|vec| {
97+
vec.iter()
98+
.filter(move |&&(tid, nid)| self.trees[tid].get_rule_id(nid) != r)
99+
});
100+
//The unwrap_or is just a quick and dirty fix to catch Errors from the sampler
101+
let selected = relevant.and_then(|iter| rand.choose(iter));
102+
selected.map(|&(tid, nid)| (&self.trees[tid], nid))
103+
}
104+
105+
#[must_use]
106+
pub fn trees(&self) -> usize {
107+
self.trees.len()
108+
}
109+
}
110+
111+
#[cfg(test)]
112+
mod tests {
113+
use alloc::string::ToString;
114+
use std::fs;
115+
116+
use libafl_bolts::rands::StdRand;
117+
118+
use crate::common::nautilus::grammartec::{
119+
chunkstore::ChunkStore, context::Context, tree::TreeLike,
120+
};
121+
122+
#[test]
123+
fn chunk_store() {
124+
let mut rand = StdRand::new();
125+
let mut ctx = Context::new();
126+
let r1 = ctx.add_rule("A", b"a {B:a}");
127+
let r2 = ctx.add_rule("B", b"b {C:a}");
128+
let _ = ctx.add_rule("C", b"c");
129+
ctx.initialize(101);
130+
let random_size = ctx.get_random_len_for_ruleid(&r1);
131+
println!("random_size: {random_size}");
132+
let tree = ctx.generate_tree_from_rule(&mut rand, r1, random_size);
133+
fs::create_dir_all("/tmp/outputs/chunks").expect("40234068");
134+
let mut cks = ChunkStore::new("/tmp/".to_string());
135+
cks.add_tree(tree, &ctx);
136+
// assert!(cks.seen_outputs.contains("a b c".as_bytes()));
137+
// assert!(cks.seen_outputs.contains("b c".as_bytes()));
138+
// assert!(cks.seen_outputs.contains("c".as_bytes()));
139+
assert_eq!(cks.nts_to_chunks[&ctx.nt_id("A")].len(), 1);
140+
let (tree_id, _) = cks.nts_to_chunks[&ctx.nt_id("A")][0];
141+
assert_eq!(cks.trees[tree_id].unparse_to_vec(&ctx), "a b c".as_bytes());
142+
143+
let random_size = ctx.get_random_len_for_ruleid(&r2);
144+
let tree = ctx.generate_tree_from_rule(&mut rand, r2, random_size);
145+
cks.add_tree(tree, &ctx);
146+
// assert_eq!(cks.seen_outputs.len(), 3);
147+
// assert_eq!(cks.nts_to_chunks[&ctx.nt_id("B")].len(), 1);
148+
let (tree_id, node_id) = cks.nts_to_chunks[&ctx.nt_id("B")][0];
149+
assert_eq!(
150+
cks.trees[tree_id].unparse_node_to_vec(node_id, &ctx),
151+
"b c".as_bytes()
152+
);
153+
}
154+
}

0 commit comments

Comments
 (0)