|
10 | 10 | }, |
11 | 11 | { |
12 | 12 | "cell_type": "code", |
13 | | - "execution_count": 17, |
| 13 | + "execution_count": 1, |
14 | 14 | "id": "58b40aa6", |
15 | 15 | "metadata": {}, |
16 | | - "outputs": [], |
| 16 | + "outputs": [ |
| 17 | + { |
| 18 | + "name": "stderr", |
| 19 | + "output_type": "stream", |
| 20 | + "text": [ |
| 21 | + "/home/jwinter/TBD/proj2/polars-bio/venv/lib/python3.12/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n", |
| 22 | + " from .autonotebook import tqdm as notebook_tqdm\n", |
| 23 | + "INFO:polars_bio:Creating BioSessionContext\n" |
| 24 | + ] |
| 25 | + } |
| 26 | + ], |
17 | 27 | "source": [ |
18 | 28 | "import pandas as pd\n", |
19 | 29 | "\n", |
|
28 | 38 | "### Usage examples" |
29 | 39 | ] |
30 | 40 | }, |
| 41 | + { |
| 42 | + "cell_type": "code", |
| 43 | + "execution_count": 14, |
| 44 | + "id": "b0d81403", |
| 45 | + "metadata": {}, |
| 46 | + "outputs": [], |
| 47 | + "source": [ |
| 48 | + "pb.set_option(\"datafusion.execution.target_partitions\", \"2\")" |
| 49 | + ] |
| 50 | + }, |
31 | 51 | { |
32 | 52 | "cell_type": "markdown", |
33 | 53 | "id": "b238193d", |
|
38 | 58 | }, |
39 | 59 | { |
40 | 60 | "cell_type": "code", |
41 | | - "execution_count": 18, |
| 61 | + "execution_count": null, |
42 | 62 | "id": "0420c240", |
43 | 63 | "metadata": {}, |
44 | 64 | "outputs": [ |
45 | 65 | { |
46 | 66 | "name": "stdout", |
47 | 67 | "output_type": "stream", |
48 | 68 | "text": [ |
49 | | - " pos avg q1 median q3 lower upper\n", |
50 | | - "87 0 30.135 31.0 33.0 34.0 26.5 38.5\n", |
51 | | - "66 1 31.210 31.0 34.0 34.0 26.5 38.5\n", |
52 | | - "69 2 32.015 31.0 34.0 34.0 26.5 38.5\n", |
53 | | - "45 3 35.690 35.0 37.0 37.0 32.0 40.0\n", |
54 | | - "14 4 35.680 35.0 37.0 37.0 32.0 40.0\n", |
55 | | - ".. ... ... ... ... ... ... ...\n", |
56 | | - "40 96 31.315 32.0 34.0 35.0 27.5 39.5\n", |
57 | | - "23 97 30.670 31.0 34.0 35.0 25.0 41.0\n", |
58 | | - "37 98 31.550 32.0 34.0 35.0 27.5 39.5\n", |
59 | | - "6 99 31.250 32.0 34.0 35.0 27.5 39.5\n", |
60 | | - "4 100 31.105 31.0 34.0 35.0 25.0 41.0\n", |
| 69 | + " pos avg q1 median q3 lower upper\n", |
| 70 | + "88 0 32.548723 31.0 34.0 34.0 26.5 38.5\n", |
| 71 | + "46 1 32.719772 31.0 34.0 34.0 26.5 38.5\n", |
| 72 | + "99 2 32.789697 31.0 34.0 34.0 26.5 38.5\n", |
| 73 | + "75 3 36.162011 37.0 37.0 37.0 37.0 37.0\n", |
| 74 | + "84 4 36.122733 37.0 37.0 37.0 37.0 37.0\n", |
| 75 | + ".. ... ... ... ... ... ... ...\n", |
| 76 | + "19 96 32.998462 34.0 35.0 35.0 32.5 36.5\n", |
| 77 | + "64 97 32.922582 33.0 35.0 35.0 30.0 38.0\n", |
| 78 | + "70 98 32.883908 33.0 35.0 35.0 30.0 38.0\n", |
| 79 | + "80 99 32.836223 33.0 35.0 35.0 30.0 38.0\n", |
| 80 | + "51 100 31.190304 30.0 34.0 35.0 22.5 42.5\n", |
61 | 81 | "\n", |
62 | 82 | "[101 rows x 7 columns]\n" |
63 | 83 | ] |
64 | 84 | } |
65 | 85 | ], |
66 | 86 | "source": [ |
67 | | - "result = pb.base_sequence_quality(\"example.fastq\", output_type=\"pandas.DataFrame\", target_partitions=2).sort_values(by=\"pos\")\n", |
| 87 | + "result = pb.base_sequence_quality(\"example.fastq\", output_type=\"pandas.DataFrame\").sort_values(by=\"pos\")\n", |
68 | 88 | "print(result)" |
69 | 89 | ] |
70 | 90 | }, |
|
107 | 127 | "| -------------- | ----------------- | ----- |\n", |
108 | 128 | "| fastqc-rs | - | 22.9s |\n", |
109 | 129 | "| polars_bio | 1 | 9.0s |\n", |
110 | | - "| polars_bio | 2 | 8.5s |\n", |
111 | | - "| polars_bio | 4 | 15.6s |\n", |
112 | | - "| polars_bio | 8 | 7.8s |\n", |
| 130 | + "| polars_bio | 2 | 7.8s |\n", |
| 131 | + "| polars_bio | 4 | 14.9s |\n", |
| 132 | + "| polars_bio | 8 | 7.4s |\n", |
113 | 133 | "\n", |
114 | 134 | "- The measured execution time is for the algorithm to run on file ftp://ftp.sra.ebi.ac.uk/vol1/fastq/ERR194/ERR194147/ERR194147.fastq.gz, which contains about 8,240,000 records.\n", |
115 | 135 | "- The `fastqc-rs` execution time applies only to the base sequence quality task (pieces of code relating to other tasks have been removed for the purpose of this comparison).\n" |
|
0 commit comments