Skip to content

Commit

Permalink
preinitialize slow regex
Browse files Browse the repository at this point in the history
  • Loading branch information
bglw committed May 21, 2022
1 parent 1fbf488 commit 24ff5a2
Show file tree
Hide file tree
Showing 2 changed files with 11 additions and 7 deletions.
1 change: 1 addition & 0 deletions pagefind/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ minifier = "0.0.43"
sha-1 = "0.10"
serde_json = "1"
serde = { version = "1", features = ["derive"] }
lazy_static = "1.4.0"

[dev-dependencies]
json_dotpath = "1.1.0"
Expand Down
17 changes: 10 additions & 7 deletions pagefind/src/fossick/mod.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
use hashbrown::HashMap;
use lazy_static::lazy_static;
use lol_html::html_content::ContentType;
use lol_html::{element, text, HtmlRewriter, Settings};
use regex::Regex;
Expand All @@ -17,6 +18,12 @@ use crate::fragments::{PageFragment, PageFragmentData};
use crate::utils::full_hash;
use crate::SearchOptions;

lazy_static! {
static ref EXTRANEOUS_NEWLINES: Regex = Regex::new("(^|\\s)*((\n|\r\n)\\s*)+($|\\s)*").unwrap();
static ref TRIM_NEWLINES: Regex = Regex::new("^\n|\n$").unwrap();
static ref EXTRANEOUS_SPACES: Regex = Regex::new("\\s{2,}").unwrap();
}

pub struct FossickedData {
pub file_path: PathBuf,
pub fragment: PageFragment,
Expand Down Expand Up @@ -205,13 +212,9 @@ fn build_url(page_url: &Path, options: &SearchOptions) -> String {
}

fn normalize_content(content: &str) -> String {
let extraneous_newlines = Regex::new("(^|\\s)*((\n|\r\n)\\s*)+($|\\s)*").unwrap();
let trim_newlines = Regex::new("^\n|\n$").unwrap();
let extraneous_spaces = Regex::new("\\s{2,}").unwrap();

let content = extraneous_newlines.replace_all(content, "\n");
let content = trim_newlines.replace_all(&content, "");
let content = extraneous_spaces.replace_all(&content, " ");
let content = EXTRANEOUS_NEWLINES.replace_all(content, "\n");
let content = TRIM_NEWLINES.replace_all(&content, "");
let content = EXTRANEOUS_SPACES.replace_all(&content, " ");

content.to_string()
}

0 comments on commit 24ff5a2

Please sign in to comment.