Skip to content

Commit b010eae

Browse files
committed
chore(website): fix background spawn
1 parent 08235ec commit b010eae

File tree

2 files changed

+28
-30
lines changed

2 files changed

+28
-30
lines changed

Diff for: Cargo.toml

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
[package]
22
edition = "2021"
33
name = "spider_rs"
4-
version = "0.0.56"
4+
version = "0.0.57"
55
repository = "https://github.com/spider-rs/spider-py"
66
license = "MIT"
77
description = "The fastest web crawler and indexer."

Diff for: src/website.rs

+27-29
Original file line numberDiff line numberDiff line change
@@ -163,8 +163,9 @@ impl Website {
163163
let mut rx2 = website
164164
.subscribe(*BUFFER / 2)
165165
.expect("sync feature should be enabled");
166+
let rt = pyo3_async_runtimes::tokio::get_runtime();
166167

167-
let handle = spider::tokio::spawn(async move {
168+
let handle = rt.spawn(async move {
168169
while let Ok(res) = rx2.recv().await {
169170
let page = new_page(&res, raw_content);
170171
Python::with_gil(|py| {
@@ -178,7 +179,7 @@ impl Website {
178179
_ => 0,
179180
};
180181

181-
let crawl_handle = spider::tokio::spawn(async move {
182+
let crawl_handle = rt.spawn(async move {
182183
if headless {
183184
website.crawl().await;
184185
} else {
@@ -237,8 +238,9 @@ impl Website {
237238
Some(handle) => handle.0 + 1,
238239
_ => 0,
239240
};
241+
let rt = pyo3_async_runtimes::tokio::get_runtime();
240242

241-
let crawl_handle = spider::tokio::spawn(async move {
243+
let crawl_handle = rt.spawn(async move {
242244
if headless {
243245
website.crawl().await;
244246
} else {
@@ -283,8 +285,9 @@ impl Website {
283285
let mut rx2 = website
284286
.subscribe(*BUFFER / 2)
285287
.expect("sync feature should be enabled");
288+
let rt = pyo3_async_runtimes::tokio::get_runtime();
286289

287-
let handle = spider::tokio::spawn(async move {
290+
let handle = rt.spawn(async move {
288291
while let Ok(res) = rx2.recv().await {
289292
let page = new_page(&res, raw_content);
290293
Python::with_gil(|py| {
@@ -298,7 +301,7 @@ impl Website {
298301
_ => 0,
299302
};
300303

301-
let crawl_handle = spider::tokio::spawn(async move {
304+
let crawl_handle = rt.spawn(async move {
302305
website.crawl_smart().await;
303306
});
304307

@@ -344,13 +347,14 @@ impl Website {
344347
_ => {
345348
if background {
346349
let mut website = slf.inner.clone();
350+
let rt = pyo3_async_runtimes::tokio::get_runtime();
347351

348352
let crawl_id = match slf.crawl_handles.last() {
349353
Some(handle) => handle.0 + 1,
350354
_ => 0,
351355
};
352356

353-
let crawl_handle = spider::tokio::spawn(async move {
357+
let crawl_handle = rt.spawn(async move {
354358
website.crawl_smart().await;
355359
});
356360

@@ -388,8 +392,9 @@ impl Website {
388392
let mut rx2 = website
389393
.subscribe(*BUFFER / 2)
390394
.expect("sync feature should be enabled");
395+
let rt = pyo3_async_runtimes::tokio::get_runtime();
391396

392-
let handle = spider::tokio::spawn(async move {
397+
let handle = rt.spawn(async move {
393398
while let Ok(res) = rx2.recv().await {
394399
let page = new_page(&res, raw_content);
395400

@@ -405,7 +410,7 @@ impl Website {
405410
_ => 0,
406411
};
407412

408-
let crawl_handle = spider::tokio::spawn(async move {
413+
let crawl_handle = rt.spawn(async move {
409414
if headless {
410415
website.scrape().await;
411416
} else {
@@ -459,13 +464,14 @@ impl Website {
459464
_ => {
460465
if background {
461466
let mut website = slf.inner.clone();
467+
let rt = pyo3_async_runtimes::tokio::get_runtime();
462468

463469
let crawl_id = match slf.crawl_handles.last() {
464470
Some(handle) => handle.0 + 1,
465471
_ => 0,
466472
};
467473

468-
let crawl_handle = spider::tokio::spawn(async move {
474+
let crawl_handle = rt.spawn(async move {
469475
if headless {
470476
website.scrape().await;
471477
} else {
@@ -498,8 +504,9 @@ impl Website {
498504
.subscribe(*BUFFER / 2)
499505
.expect("sync feature should be enabled");
500506
let raw_content = slf.raw_content;
507+
let rt = pyo3_async_runtimes::tokio::get_runtime();
501508

502-
let handler = spider::tokio::spawn(async move {
509+
let handler = rt.spawn(async move {
503510
while let Ok(res) = rx2.recv().await {
504511
Python::with_gil(|py| {
505512
let _ = callback.call(py, (new_page(&res, raw_content),), None);
@@ -603,28 +610,19 @@ impl Website {
603610
let py = slf.py();
604611
let dict = obj.downcast_bound::<pyo3::types::PyDict>(py);
605612

606-
match dict {
607-
Ok(keys) => {
608-
for key in keys.into_iter() {
609-
let header_key = spider::reqwest::header::HeaderName::from_str(&key.0.to_string());
610-
611-
match header_key {
612-
Ok(hn) => {
613-
let header_value = key.1.to_string();
614-
615-
match spider::reqwest::header::HeaderValue::from_str(&header_value) {
616-
Ok(hk) => {
617-
h.append(hn, hk);
618-
}
619-
_ => (),
620-
}
621-
}
622-
_ => (),
613+
if let Ok(keys) = dict {
614+
for key in keys.into_iter() {
615+
let header_key = spider::reqwest::header::HeaderName::from_str(&key.0.to_string());
616+
617+
if let Ok(hn) = header_key {
618+
let header_value = key.1.to_string();
619+
620+
if let Ok(hk) = spider::reqwest::header::HeaderValue::from_str(&header_value) {
621+
h.append(hn, hk);
623622
}
624623
}
625-
slf.inner.with_headers(Some(h));
626624
}
627-
_ => (),
625+
slf.inner.with_headers(Some(h));
628626
}
629627
}
630628
_ => {

0 commit comments

Comments
 (0)