From 995907273bdb7dfc344e9282e8ca72888a8708b2 Mon Sep 17 00:00:00 2001 From: hello Date: Tue, 11 Apr 2023 18:01:18 +0800 Subject: [PATCH 1/3] merge pdf --- merge.py | 45 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 45 insertions(+) create mode 100644 merge.py diff --git a/merge.py b/merge.py new file mode 100644 index 0000000..17d97d4 --- /dev/null +++ b/merge.py @@ -0,0 +1,45 @@ +#!/bin/env python3 + +import os +import datetime + +# pip3 install pypdf +from pypdf import PdfWriter, PdfReader, PageRange + + +mfname= "book4_power_of_matrix" +def main(): + + pdfwrtr = PdfWriter() + t_pages = 0 + net_start = False + for r, dirs, files in os.walk("."): + del dirs[:] + + for fitem in files: + fname, fext = os.path.splitext(fitem) + + if fext.lower() != ".pdf": + continue + if not fitem.startswith("Book"): + continue + + pdfrd = PdfReader(fitem, 'rb') + + meta = pdfrd.metadata + + fname = fname.replace("Book4_", "") + fname = fname.split("__")[0] + fname = fname.replace("_", " ") + + pdfwrtr.append( pdfrd, fname ) + pdfwrtr.add_metadata(meta) + + now = datetime.datetime.now() + now_str = now.strftime("%Y.%m.%d_%H_%M_%S") + pdfwrtr.write("{mfname}.{nw}.pdf".format(mfname=mfname, nw=now_str)) + pdfwrtr.close() + + +if __name__ == '__main__': + main() From df3d5eb8a9259f98f0e25e22a11f7a48e4853027 Mon Sep 17 00:00:00 2001 From: nono Date: Mon, 20 May 2024 15:55:10 +0800 Subject: [PATCH 2/3] nat sort --- merge.py | 34 ++++++++++++++++++++++++++-------- 1 file changed, 26 insertions(+), 8 deletions(-) diff --git a/merge.py b/merge.py index 17d97d4..a3cef79 100644 --- a/merge.py +++ b/merge.py @@ -3,8 +3,9 @@ import os import datetime -# pip3 install pypdf +# pip3 install pypdf natsort from pypdf import PdfWriter, PdfReader, PageRange +from natsort import natsorted mfname= "book4_power_of_matrix" @@ -13,6 +14,9 @@ def main(): pdfwrtr = PdfWriter() t_pages = 0 net_start = False + + pdfs = [] + for r, dirs, files in os.walk("."): del dirs[:] @@ -24,16 +28,30 @@ def main(): if not fitem.startswith("Book"): continue - pdfrd = PdfReader(fitem, 'rb') - - meta = pdfrd.metadata - fname = fname.replace("Book4_", "") fname = fname.split("__")[0] fname = fname.replace("_", " ") - - pdfwrtr.append( pdfrd, fname ) - pdfwrtr.add_metadata(meta) + + pdfs.append(fitem) + + + pwd = os.getcwd() + + pdfs = natsorted(pdfs) + + # print(" ----- ===== ", pdfs) + for fitem in pdfs: + # print(" ---- === --- ", fitem ) + fname, fext = os.path.splitext(fitem) + + fname = fname.replace("Book4_", "") + fname = fname.split("__")[0] + fname = fname.replace("_", " ") + + pdfrd = PdfReader(os.path.join(pwd, fitem), 'rb') + meta = pdfrd.metadata + pdfwrtr.append( pdfrd, fname ) + pdfwrtr.add_metadata(meta) now = datetime.datetime.now() now_str = now.strftime("%Y.%m.%d_%H_%M_%S") From a8267495cc7b9c529b549da34ccd178fb8f45137 Mon Sep 17 00:00:00 2001 From: nono Date: Mon, 20 May 2024 16:42:17 +0800 Subject: [PATCH 3/3] merge book --- merge.py | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/merge.py b/merge.py index a3cef79..ed77664 100644 --- a/merge.py +++ b/merge.py @@ -27,10 +27,6 @@ def main(): continue if not fitem.startswith("Book"): continue - - fname = fname.replace("Book4_", "") - fname = fname.split("__")[0] - fname = fname.replace("_", " ") pdfs.append(fitem) @@ -45,7 +41,7 @@ def main(): fname, fext = os.path.splitext(fitem) fname = fname.replace("Book4_", "") - fname = fname.split("__")[0] + fname = fname.rsplit("__", 2)[0] fname = fname.replace("_", " ") pdfrd = PdfReader(os.path.join(pwd, fitem), 'rb')