diff --git a/.gitignore b/.gitignore
index 2cc2704f..aa8e3410 100644
--- a/.gitignore
+++ b/.gitignore
@@ -52,3 +52,6 @@ MANIFEST
 .mr.developer.cfg
 .project
 .pydevproject
+*.txt
+*.csv
+.idea
\ No newline at end of file
diff --git a/.vscode/settings.json b/.vscode/settings.json
new file mode 100644
index 00000000..b881eff7
--- /dev/null
+++ b/.vscode/settings.json
@@ -0,0 +1,3 @@
+{
+    "python.analysis.autoImportCompletions": true
+}
\ No newline at end of file
diff --git a/samples/commonlib/use_datetime.py b/samples/commonlib/use_datetime.py
index 625fad85..7ae5f27f 100755
--- a/samples/commonlib/use_datetime.py
+++ b/samples/commonlib/use_datetime.py
@@ -38,3 +38,18 @@
 utc8_dt = utc_dt.astimezone(timezone(timedelta(hours=8)))
 print('UTC+0:00 now =', utc_dt)
 print('UTC+8:00 now =', utc8_dt)
+
+import datetime
+
+target_date = datetime.datetime(2024, 2, 2, 18, 0, 0)  # 目标日期和时间
+current_datetime = datetime.datetime.now()  # 当前日期和时间
+
+time_left = target_date - current_datetime  # 计算剩余时间
+time_left = max(time_left, datetime.timedelta(0))  # 确保剩余时间不为负值
+
+# 提取剩余时间的小时、分钟和秒
+hours, remainder = divmod(time_left.total_seconds(), 3600)
+minutes, seconds = divmod(remainder, 60)
+
+# 输出倒计时信息
+print(f"当前时间与2024-02-02 18:00:00之间相隔 {int(hours)} 小时 {int(minutes)} 分钟 {int(seconds)} 秒")
diff --git a/samples/commonlib/use_datetime_copy.py b/samples/commonlib/use_datetime_copy.py
new file mode 100644
index 00000000..f5d7b563
--- /dev/null
+++ b/samples/commonlib/use_datetime_copy.py
@@ -0,0 +1,15 @@
+
+import datetime
+
+target_date = datetime.datetime(2024, 2, 2, 18, 0, 0)  # 目标日期和时间
+current_datetime = datetime.datetime.now()  # 当前日期和时间
+
+time_left = target_date - current_datetime  # 计算剩余时间
+time_left = max(time_left, datetime.timedelta(0))  # 确保剩余时间不为负值
+
+# 提取剩余时间的小时、分钟和秒
+hours, remainder = divmod(time_left.total_seconds(), 3600)
+minutes, seconds = divmod(remainder, 60)
+
+# 输出倒计时信息
+print(f"当前时间与2024-02-02 18:00:00之间相隔 {int(hours)} 小时 {int(minutes)} 分钟 {int(seconds)} 秒")
diff --git a/samples/db/do_mysql.py b/samples/db/do_mysql.py
index d1d1db8d..7e3a2b19 100755
--- a/samples/db/do_mysql.py
+++ b/samples/db/do_mysql.py
@@ -7,25 +7,59 @@
 # pip3 install mysql-connector-python --allow-external mysql-connector-python
 
 import mysql.connector
+import pandas as pd
+import numpy as np
+import re
 
 # change root password to yours:
-conn = mysql.connector.connect(user='root', password='password', database='test')
-
-cursor = conn.cursor()
-# 创建user表:
-cursor.execute('create table user (id varchar(20) primary key, name varchar(20))')
-# 插入一行记录，注意MySQL的占位符是%s:
-cursor.execute('insert into user (id, name) values (%s, %s)', ('1', 'Michael'))
-print('rowcount =', cursor.rowcount)
-# 提交事务:
-conn.commit()
-cursor.close()
+conn = mysql.connector.connect(user='root', password='root', database='sys')
 
 # 运行查询:
 cursor = conn.cursor()
-cursor.execute('select * from user where id = %s', ('1',))
+cursor.execute('select 患者编号,中药组成 from Sheet4 ')
 values = cursor.fetchall()
-print(values)
-# 关闭Cursor和Connection:
+
+# 获取查询结果的字段名
+columns = [i[0] for i in cursor.description]
+
+# 关闭 Cursor 和 Connection
 cursor.close()
 conn.close()
+
+# 创建 DataFrame，并指定列名
+df = pd.DataFrame(values, columns=columns)
+
+# 提取中文和数字字母的正则表达式
+pattern = re.compile(r'([\u4e00-\u9fa5]+)(\d+\w+)')
+result = df['中药组成'].str.extractall(pattern)
+
+
+# 保存原始的患者编号列
+patient_ids = df['患者编号'].iloc[result.index.get_level_values(0)].reset_index(drop=True)
+
+# 重置索引
+result.reset_index(drop=True, inplace=True)
+
+
+# 将患者编号加回结果中
+result['患者编号'] = patient_ids
+
+# 重新排列列的顺序
+result = result[['患者编号', 0, 1]]
+# 重命名列名
+result.columns = ['患者编号', '草药', '克数']
+# 打印 result 的所有内容
+print(result)
+
+# 将数据插入新表
+connInsert = mysql.connector.connect(user='root', password='root', database='sys')
+cursorInsert = connInsert.cursor()
+
+# 使用 iterrows() 迭代 DataFrame 中的行
+for _, row in result.iterrows():
+    cursorInsert.execute('INSERT INTO Sheet4_detail (患者编号, 中药, 克数) VALUES (%s, %s, %s)', (row['患者编号'], row['草药'], row['克数']))
+
+# 提交更改并关闭连接
+connInsert.commit()
+cursorInsert.close()
+connInsert.close()
diff --git a/samples/matplotlib/matplotlibPd copy 2.py b/samples/matplotlib/matplotlibPd copy 2.py
new file mode 100644
index 00000000..58cd525e
--- /dev/null
+++ b/samples/matplotlib/matplotlibPd copy 2.py	
@@ -0,0 +1,49 @@
+import pandas as pd
+import matplotlib.pyplot as plt
+from matplotlib.ticker import MultipleLocator
+
+# 读取CSV文件数据
+df = pd.read_csv(r'D:\pyspace\learn-python3\samples\matplotlib\含远至功效.csv')
+
+# 设置中文显示的字体
+plt.rcParams['font.sans-serif'] = ['SimHei']
+plt.rcParams['axes.unicode_minus'] = False
+
+# 若百分比数据为字符串形式，需要将其转换为数值形式
+# 此行代码假设百分比列的值包含%符号，如'22.42%'
+df['百分比'] = df['百分比'].str.rstrip('%').astype('float')
+
+# 绘制图表
+fig, ax = plt.subplots(figsize=(12, 6))
+
+# 以天蓝色作为颜色绘制频次柱状图
+ax.bar(df['功效统计分析'], df['频次'], color='skyblue')
+
+# 设置坐标轴标签
+# ax.set_xlabel('功效统计分析')
+ax.set_ylabel('频次')
+
+# 以红色圆圈绘制百分比折线图
+ax2 = ax.twinx()
+ax2.plot(df['功效统计分析'], df['百分比'], 'ro-')
+ax2.set_ylabel('百分比 (%)')
+
+# Set Y axis major ticks to a multiple of 1
+ax2.yaxis.set_major_locator(MultipleLocator(1))
+
+# 去掉网格线
+ax.grid(False)
+
+# 在每个柱子上方标识频次数量值
+for bar in ax.patches:
+    ax.text(bar.get_x() + bar.get_width()/2, 
+            bar.get_height(), 
+            '{:.0f}'.format(bar.get_height()), 
+            ha='center', 
+            va='bottom')
+
+# 调整整体空白，防止标签重叠
+fig.tight_layout()
+
+# 显示图表
+plt.show()
\ No newline at end of file
diff --git a/samples/matplotlib/matplotlibPd copy.py b/samples/matplotlib/matplotlibPd copy.py
new file mode 100644
index 00000000..5e77eb05
--- /dev/null
+++ b/samples/matplotlib/matplotlibPd copy.py	
@@ -0,0 +1,45 @@
+import pandas as pd
+import matplotlib.pyplot as plt
+
+# 读取CSV文件数据
+df = pd.read_csv(r'D:\pyspace\learn-python3\samples\matplotlib\去远至功效.csv')
+
+# 设置中文显示的字体
+plt.rcParams['font.sans-serif'] = ['SimHei']
+plt.rcParams['axes.unicode_minus'] = False
+
+# 若百分比数据为字符串形式，需要将其转换为数值形式
+# 此行代码假设百分比列的值包含%符号，如'22.42%'
+df['百分比'] = df['百分比'].str.rstrip('%').astype('float')
+
+# 绘制图表
+fig, ax = plt.subplots(figsize=(12, 6))
+
+# 以天蓝色作为颜色绘制频次柱状图
+ax.bar(df['去远至功效'], df['频次'], color='skyblue')
+
+# 设置坐标轴标签
+# ax.set_xlabel('去远至功效')
+ax.set_ylabel('频次')
+
+# 以红色圆圈绘制百分比折线图
+ax2 = ax.twinx()
+ax2.plot(df['去远至功效'], df['百分比'], 'ro-')
+ax2.set_ylabel('百分比 (%)')
+
+# 去掉网格线
+ax.grid(False)
+
+# 在每个柱子上方标识频次数量值
+for bar in ax.patches:
+    ax.text(bar.get_x() + bar.get_width()/2, 
+            bar.get_height(), 
+            '{:.0f}'.format(bar.get_height()), 
+            ha='center', 
+            va='bottom')
+
+# 调整整体空白，防止标签重叠
+fig.tight_layout()
+
+# 显示图表
+plt.show()
diff --git a/samples/matplotlib/matplotlibPd.py b/samples/matplotlib/matplotlibPd.py
new file mode 100644
index 00000000..6a8dc0d4
--- /dev/null
+++ b/samples/matplotlib/matplotlibPd.py
@@ -0,0 +1,45 @@
+import pandas as pd
+import matplotlib.pyplot as plt
+
+# 读取CSV文件数据
+df = pd.read_csv(r'D:\pyspace\learn-python3\samples\matplotlib\中医症候.csv')
+
+# 设置中文显示的字体
+plt.rcParams['font.sans-serif'] = ['SimHei']
+plt.rcParams['axes.unicode_minus'] = False
+
+# 若百分比数据为字符串形式，需要将其转换为数值形式
+# 此行代码假设百分比列的值包含%符号，如'22.42%'
+df['百分比'] = df['百分比'].str.rstrip('%').astype('float')
+
+# 绘制图表
+fig, ax = plt.subplots(figsize=(12, 6))
+
+# 以天蓝色作为颜色绘制频次柱状图
+ax.bar(df['中医证候'], df['频次'], color='skyblue')
+
+# 设置坐标轴标签
+# ax.set_xlabel('中医证候')
+ax.set_ylabel('频次')
+
+# 以红色圆圈绘制百分比折线图
+ax2 = ax.twinx()
+ax2.plot(df['中医证候'], df['百分比'], 'ro-')
+ax2.set_ylabel('百分比 (%)')
+
+# 去掉网格线
+ax.grid(False)
+
+# 在每个柱子上方标识频次数量值
+for bar in ax.patches:
+    ax.text(bar.get_x() + bar.get_width()/2, 
+            bar.get_height(), 
+            '{:.0f}'.format(bar.get_height()), 
+            ha='center', 
+            va='bottom')
+
+# 调整整体空白，防止标签重叠
+fig.tight_layout()
+
+# 显示图表
+plt.show()
diff --git a/samples/matplotlib/output.png b/samples/matplotlib/output.png
new file mode 100644
index 00000000..5931b065
Binary files /dev/null and b/samples/matplotlib/output.png differ
diff --git a/samples/pandas/do_mysql copy.py b/samples/pandas/do_mysql copy.py
new file mode 100644
index 00000000..088dc91e
--- /dev/null
+++ b/samples/pandas/do_mysql copy.py	
@@ -0,0 +1,171 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+
+########## prepare ##########
+
+# install mysql-connector-python:
+# pip3 install mysql-connector-python --allow-external mysql-connector-python
+
+import mysql.connector
+import pandas as pd
+import numpy as np
+import re
+from fractions import Fraction
+
+# change root password to yours:
+conn = mysql.connector.connect(user='root', password='root', database='sys')
+
+# 运行查询:
+cursor = conn.cursor()
+cursor.execute('SELECT a.患者编号,姓名,性别,年龄,就诊日期,既往史,并发症,临床表现,舌质,中药组成,中药,克数 FROM Sheet4 a LEFT JOIN Sheet4_detail b ON a.`患者编号`=b.`患者编号`')
+values = cursor.fetchall()
+
+# 获取查询结果的字段名
+columns = [i[0] for i in cursor.description]
+
+# 关闭 Cursor 和 Connection
+cursor.close()
+conn.close()
+
+# 创建 DataFrame，并指定列名
+df = pd.DataFrame(values, columns=columns)
+# print(df)
+
+
+
+## 1.	远志、石菖蒲、川芎的用量及用量的频次、频率，平均用量
+print("1.	远志、石菖蒲、川芎的用量及用量的频次、频率，平均用量")
+# 将克数转换为数字
+df['克数'] = df['克数'].str.extract('(\d+)').astype(float)
+
+# 筛选出远志、石菖蒲、川芎的数据
+selected_herbs = ['远志', '石菖蒲', '川芎']
+selected_df = df[df['中药'].isin(selected_herbs)]
+
+# 获取用量及用量的频次
+usage_info = selected_df.groupby('中药')['克数'].agg(['sum', 'count'])
+
+# 计算平均用量
+usage_info['平均用量'] = usage_info['sum'] / usage_info['count']
+
+# 重命名列名
+usage_info = usage_info.rename(columns={'sum': '用量总和', 'count': '用量频次'})
+
+# print(usage_info)
+
+# 验证 1 MySQL，验证通过
+# SELECT
+#     COUNT(1) AS total_count,
+#     SUM(CAST(REPLACE(c.克数, 'g', '') AS SIGNED)) AS sum_of_numbers
+# FROM
+#     (
+#         SELECT a.患者编号,姓名,性别,年龄,就诊日期,既往史,并发症,临床表现,舌质,中药组成,中药,克数
+#         FROM Sheet4 a
+#         LEFT JOIN Sheet4_detail b ON a.`患者编号` = b.`患者编号`
+#     ) AS c 
+# WHERE
+#     c.`中药` = '石菖蒲';
+
+
+#
+
+## 2.	远志与石菖蒲、远志与川芎用量比例及比例的频次、频率
+print(" 2.	远志与石菖蒲、远志与川芎用量比例及比例的频次、频率 ") 
+
+
+# 为远志与石菖蒲、远志与川芎创建透视表
+# pivot_table = df.pivot_table(index='患者编号', columns='中药', values='克数', aggfunc='sum', fill_value=0)
+# 是用来指定在执行 unstack 操作时，对于缺失值（NaN）的填充值
+pivot_table = df.groupby(['患者编号', '中药'])['克数'].sum().unstack(fill_value=0)
+
+# 计算远志与石菖蒲的用量比例
+pivot_table['远志:石菖蒲比例'] = pivot_table['远志'] / pivot_table['石菖蒲']
+pivot_table['远志:石菖蒲比例'] = pivot_table['远志:石菖蒲比例'].replace([np.inf, -np.inf, np.nan], 0).round(2)
+
+
+# 获取远志与石菖蒲比例的频次和频率
+pivot_table_prop1 = pivot_table['远志:石菖蒲比例']
+count_vc1 = pivot_table['远志:石菖蒲比例'].value_counts().sort_index()
+freq_vc1 = count_vc1 / count_vc1.sum() * 100
+
+# 将 pivot_table_prop1 转换为 DataFrame
+df_prop1 = pivot_table_prop1.reset_index()
+df_prop1.columns = ['患者编号', '远志:石菖蒲比例']
+
+# 合并 df_prop1、count_vc1 和 freq_vc1
+result_df = pd.merge(df_prop1.drop(columns='患者编号'), pd.DataFrame({'远志:石菖蒲比例频次': count_vc1}),
+                     left_on='远志:石菖蒲比例', right_index=True, how='left')
+
+result_df = pd.merge(result_df, pd.DataFrame({'远志:石菖蒲比例频率(%)': freq_vc1}),
+                     left_on='远志:石菖蒲比例', right_index=True, how='left')
+
+# 去除患者编号这一列
+result_df = result_df[['远志:石菖蒲比例', '远志:石菖蒲比例频次', '远志:石菖蒲比例频率(%)']]
+
+# 格式化 '远志:石菖蒲比例' 列
+result_df['远志:石菖蒲比例'] = result_df['远志:石菖蒲比例'].apply(
+    lambda x: f"{Fraction(x).limit_denominator()}" if not pd.isna(x) and x != float('inf') else 'Infinity')
+
+
+# 基于 '远志:石菖蒲比例' 列分组，取每组的第一个值
+grouped_result_df = result_df.groupby('远志:石菖蒲比例').first()
+
+# 输出结果
+print(grouped_result_df)
+
+# 计算远志与川芎的用量比例
+pivot_table['远志:川芎比例'] = pivot_table['远志'] / pivot_table['川芎']
+pivot_table['远志:川芎比例'] = pivot_table['远志:川芎比例'].replace([np.inf, -np.inf, np.nan], 0).round(2)
+# 获取远志与石菖蒲比例的频次和频率
+pivot_table_prop2 = pivot_table['远志:川芎比例']
+# 获取远志与川芎比例的频次和频率
+count_vc2 = pivot_table['远志:川芎比例'].value_counts().sort_index()
+freq_vc2 = count_vc2 / count_vc2.sum() * 100
+
+# 将 pivot_table_prop2 转换为 DataFrame
+df_prop2 = pivot_table_prop2.reset_index()
+df_prop2.columns = ['患者编号', '远志:川芎比例']
+
+# 合并 df_prop2、count_vc2 和 freq_vc1
+result_df2 = pd.merge(df_prop2.drop(columns='患者编号'), pd.DataFrame({'远志:川芎比例频次': count_vc2}),
+                     left_on='远志:川芎比例', right_index=True, how='left')
+
+result_df2 = pd.merge(result_df2, pd.DataFrame({'远志:川芎比例频率(%)': freq_vc2}),
+                     left_on='远志:川芎比例', right_index=True, how='left')
+
+
+result_df2['远志:川芎比例'] = result_df2['远志:川芎比例'].apply(lambda x: f"{Fraction(x).limit_denominator()}" if not pd.isna(x) and x != float('inf') else 'Infinity')
+
+# 基于 '远志:石菖蒲比例' 列分组，取每组的第一个值
+grouped_result_df2 = result_df2.groupby('远志:川芎比例').first()
+# 输出结果
+print(grouped_result_df2)
+
+
+## 3.	远志与石菖蒲、远志与川芎用量比例归纳表 
+print(" 3.	远志与石菖蒲、远志与川芎用量比例归纳表 ")
+
+# 选取远志、石菖蒲和川芎的数据
+selected_herbs = ['远志', '石菖蒲', '川芎']
+selected_df = df[df['中药'].isin(selected_herbs)]
+
+# 计算远志与石菖蒲的用量比例
+yunzhi_shichangpu = selected_df[selected_df['中药'].isin(['远志', '石菖蒲'])].groupby('患者编号')['克数'].agg('sum')
+yunzhi_shichangpu_ratio = yunzhi_shichangpu.div(yunzhi_shichangpu.sum())
+
+# 计算远志与川芎的用量比例
+yunzhi_chuanxiong = selected_df[selected_df['中药'].isin(['远志', '川芎'])].groupby('患者编号')['克数'].agg('sum')
+yunzhi_chuanxiong_ratio = yunzhi_chuanxiong.div(yunzhi_chuanxiong.sum())
+
+# 创建归纳表
+summary_table = pd.DataFrame(index=['<1', '=1', '>1'])
+
+# 添加远志与石菖蒲的用量比例信息
+summary_table['远志:石菖蒲 频次'] = pd.cut(yunzhi_shichangpu_ratio, bins=[0, 0.1, 0.2, 1], labels=['<1', '=1', '>1']).value_counts().sort_index()
+summary_table['远志:石菖蒲 频率(%)'] = (summary_table['远志:石菖蒲 频次'] / summary_table['远志:石菖蒲 频次'].sum() * 100).round(2)
+
+# # 添加远志与川芎的用量比例信息
+# summary_table['远志:川芎 频次'] = pd.cut(yunzhi_chuanxiong_ratio, bins=[0, 0.1, 0.2, 1], labels=['<1', '=1', '>1']).value_counts().sort_index()
+# summary_table['远志:川芎 频率(%)'] = (summary_table['远志:川芎 频次'] / summary_table['远志:川芎 频次'].sum() * 100).round(2)
+
+# print(summary_table)
diff --git a/samples/pdf/megerPdf.py b/samples/pdf/megerPdf.py
new file mode 100644
index 00000000..2b73c0f2
--- /dev/null
+++ b/samples/pdf/megerPdf.py
@@ -0,0 +1,56 @@
+import os
+import fitz  # PyMuPDF
+
+def merge_pdfs(folder_path, output_path):
+    # 创建 PyMuPDF 的文档对象
+    merger = fitz.open()
+
+    # 递归处理文件夹内的所有 PDF 文件
+    for root, dirs, files in os.walk(folder_path):
+        for pdf_file in files:
+            if pdf_file.endswith('.pdf'):
+                pdf_path = os.path.join(root, pdf_file)
+
+                # 打印文件名，用于调试
+                print(f'Merging: {pdf_file}')
+
+                # 尝试打开 PDF 文件
+                try:
+                    pdf_document = fitz.open(pdf_path)
+
+                    # 打印输入 PDF 文件的页面数量
+                    print(f'Input PDF pages: {pdf_document.page_count}')
+
+                    # 检查页面数量，只有当有页面时才合并
+                    if pdf_document.page_count > 0:
+                        merger.insert_pdf(pdf_document)
+
+                    pdf_document.close()
+                except Exception as e:
+                    print(f'Error merging {pdf_file}: {e}')
+                    import traceback
+                    traceback.print_exc()
+
+    # 打印合并后的文档的页面数量
+    print(f'Merged PDF pages: {merger.page_count}')
+
+    # 保存合并后的 PDF
+    try:
+        # 只有当合并后的文档有页面时才保存
+        if merger.page_count > 0:
+            merger.save(output_path)
+        else:
+            print('No pages to save. Merged PDF will not be created.')
+    except Exception as e:
+        print(f'Error saving merged PDF: {e}')
+    
+    merger.close()
+
+# 指定文件夹路径和输出文件路径
+folder_path = r'D:\personSpace\Math'
+output_path = os.path.join(folder_path, 'merged.pdf')
+
+# 调用合并函数
+merge_pdfs(folder_path, output_path)
+
+print(f'合并完成，输出文件保存在: {output_path}')
diff --git a/samples/pdf/read_from_pdf.py b/samples/pdf/read_from_pdf.py
new file mode 100644
index 00000000..6079c26d
--- /dev/null
+++ b/samples/pdf/read_from_pdf.py
@@ -0,0 +1,100 @@
+from pdfminer.pdfparser import PDFParser, PDFDocument
+from pdfminer.pdfinterp import PDFResourceManager, PDFPageInterpreter
+from pdfminer.converter import PDFPageAggregator
+from pdfminer.layout import LAParams, LTTextBox
+from pdfminer.pdfinterp import PDFTextExtractionNotAllowed
+from collections import defaultdict, deque
+from itertools import count
+
+def aho_corasick():
+    G = defaultdict(count(1).__next__)  # transitions
+    W = defaultdict(set)                # alphabet
+    F = defaultdict(lambda: 0)          # fallbacks
+    O = defaultdict(set)                # outputs
+    
+    # automaton
+    return G, W, F, O
+
+def add_word(word, G, W, F, O):
+    state = 0
+    # add transitions between states
+    for w in word:
+        W[state].add(w)
+        state = G[state, w]
+        
+    # add output
+    O[state].add(word)
+
+def build_fsa(G, W, F, O):
+    # initial states
+    queue = deque(G[0, w] for w in W[0])
+    
+    while queue:
+        state = queue.popleft()
+        
+        # for each letter in alphabet
+        for w in W[state]:
+            # find fallback state
+            t = F[state]
+            while t and (t, w) not in G:
+                t = F[t]
+                
+            # for next state define its fallback and output
+            s = G[state, w]
+            F[s] = G[t, w] if (t, w) in G else 0
+            O[s] |= O[F[s]]
+            
+            queue.append(s)
+
+def search_in_pdf(text, G, W, F, O):
+    state = 0
+    
+    for i, t in enumerate(text):
+        # fallback
+        while state and (state, t) not in G:
+            state = F[state]
+            
+        # transition
+        state = G[state, t] if (state, t) in G else 0
+        
+        # output
+        if O[state]:
+            print('@', i, O[state])
+
+# PDF文档解析
+path = r"D:\Documents\WeChat Files\wxid_lsootbrkhf4x22\FileStorage\File\2024-01\录入文献\录入文献\袁梦石以天地转气汤为主治疗中重度阿尔茨海默病经验总结_罗慧.pdf"
+
+praser = PDFParser(open(path, 'rb'))
+doc = PDFDocument()
+praser.set_document(doc)
+doc.set_parser(praser)
+doc.initialize()
+
+if not doc.is_extractable:
+    raise PDFTextExtractionNotAllowed
+else:
+    rsrcmgr = PDFResourceManager()
+    laparams = LAParams()
+    device = PDFPageAggregator(rsrcmgr, laparams=laparams)
+    interpreter = PDFPageInterpreter(rsrcmgr, device)
+
+    # 获取PDF中的文本内容
+    pdf_text = ""
+    for page in doc.get_pages():
+        interpreter.process_page(page)                        
+        layout = device.get_result()
+        for x in layout:
+            if isinstance(x, LTTextBox):
+                pdf_text += x.get_text()
+
+    # 示例中药名称搜索
+    G, W, F, O = aho_corasick()
+    add_word("远志", G, W, F, O)
+    add_word("石菖蒲", G, W, F, O)
+    add_word("川芎", G, W, F, O)
+
+    build_fsa(G, W, F, O)
+
+    # 在PDF文本中搜索中药名称
+    search_in_pdf(pdf_text, G, W, F, O)
+    print(pdf_text)
diff --git a/samples/pyWxDumpUtils/cloud1.png b/samples/pyWxDumpUtils/cloud1.png
new file mode 100644
index 00000000..ee40e47e
Binary files /dev/null and b/samples/pyWxDumpUtils/cloud1.png differ
diff --git a/samples/pyWxDumpUtils/cloud2.png b/samples/pyWxDumpUtils/cloud2.png
new file mode 100644
index 00000000..3573ffcd
Binary files /dev/null and b/samples/pyWxDumpUtils/cloud2.png differ
diff --git a/samples/pyWxDumpUtils/cloud3.png b/samples/pyWxDumpUtils/cloud3.png
new file mode 100644
index 00000000..ab145d1a
Binary files /dev/null and b/samples/pyWxDumpUtils/cloud3.png differ
diff --git a/samples/pyWxDumpUtils/cloud4.png b/samples/pyWxDumpUtils/cloud4.png
new file mode 100644
index 00000000..67ba60bb
Binary files /dev/null and b/samples/pyWxDumpUtils/cloud4.png differ
diff --git a/samples/pyWxDumpUtils/cloud5.png b/samples/pyWxDumpUtils/cloud5.png
new file mode 100644
index 00000000..1485cceb
Binary files /dev/null and b/samples/pyWxDumpUtils/cloud5.png differ
diff --git a/samples/pyWxDumpUtils/me/cloud01.png b/samples/pyWxDumpUtils/me/cloud01.png
new file mode 100644
index 00000000..81f0c60b
Binary files /dev/null and b/samples/pyWxDumpUtils/me/cloud01.png differ
diff --git a/samples/pyWxDumpUtils/me/cloud02.png b/samples/pyWxDumpUtils/me/cloud02.png
new file mode 100644
index 00000000..4b560ea4
Binary files /dev/null and b/samples/pyWxDumpUtils/me/cloud02.png differ
diff --git a/samples/pyWxDumpUtils/me/cloud03.png b/samples/pyWxDumpUtils/me/cloud03.png
new file mode 100644
index 00000000..9c591715
Binary files /dev/null and b/samples/pyWxDumpUtils/me/cloud03.png differ
diff --git a/samples/pyWxDumpUtils/me/cloud04.png b/samples/pyWxDumpUtils/me/cloud04.png
new file mode 100644
index 00000000..517f2aa8
Binary files /dev/null and b/samples/pyWxDumpUtils/me/cloud04.png differ
diff --git a/samples/pyWxDumpUtils/me/cloud05.png b/samples/pyWxDumpUtils/me/cloud05.png
new file mode 100644
index 00000000..688066b2
Binary files /dev/null and b/samples/pyWxDumpUtils/me/cloud05.png differ
diff --git a/samples/pyWxDumpUtils/muban3.png b/samples/pyWxDumpUtils/muban3.png
new file mode 100644
index 00000000..33099c58
Binary files /dev/null and b/samples/pyWxDumpUtils/muban3.png differ
diff --git a/samples/pyWxDumpUtils/stopwords.dat b/samples/pyWxDumpUtils/stopwords.dat
new file mode 100644
index 00000000..4b650c1c
--- /dev/null
+++ b/samples/pyWxDumpUtils/stopwords.dat
@@ -0,0 +1,1893 @@
+!
+"
+#
+$
+%
+&
+'
+(
+)
+*
++
+,
+-
+--
+.
+..
+...
+......
+...................
+./
+.一
+.数
+.日
+/
+//
+0
+1
+2
+3
+4
+5
+6
+7
+8
+9
+:
+://
+::
+;
+<
+=
+>
+>>
+?
+@
+A
+Lex
+[
+\
+]
+^
+_
+`
+exp
+sub
+sup
+|
+}
+~
+~~~~
+·
+×
+×××
+Δ
+Ψ
+γ
+μ
+φ
+φ．
+В
+—
+——
+———
+‘
+’
+’‘
+“
+”
+”，
+…
+……
+…………………………………………………③
+′∈
+′｜
+℃
+Ⅲ
+↑
+→
+∈［
+∪φ∈
+≈
+①
+②
+②ｃ
+③
+③］
+④
+⑤
+⑥
+⑦
+⑧
+⑨
+⑩
+──
+■
+▲
+　
+、
+。
+〈
+〉
+《
+》
+》），
+」
+『
+』
+【
+】
+〔
+〕
+〕〔
+㈧
+一
+一.
+一一
+一下
+一个
+一些
+一何
+一切
+一则
+一则通过
+一天
+一定
+一方面
+一旦
+一时
+一来
+一样
+一次
+一片
+一番
+一直
+一致
+一般
+一起
+一转眼
+一边
+一面
+七
+万一
+三
+三天两头
+三番两次
+三番五次
+上
+上下
+上升
+上去
+上来
+上述
+上面
+下
+下列
+下去
+下来
+下面
+不
+不一
+不下
+不久
+不了
+不亦乐乎
+不仅
+不仅...而且
+不仅仅
+不仅仅是
+不会
+不但
+不但...而且
+不光
+不免
+不再
+不力
+不单
+不变
+不只
+不可
+不可开交
+不可抗拒
+不同
+不外
+不外乎
+不够
+不大
+不如
+不妨
+不定
+不对
+不少
+不尽
+不尽然
+不巧
+不已
+不常
+不得
+不得不
+不得了
+不得已
+不必
+不怎么
+不怕
+不惟
+不成
+不拘
+不择手段
+不敢
+不料
+不断
+不日
+不时
+不是
+不曾
+不止
+不止一次
+不比
+不消
+不满
+不然
+不然的话
+不特
+不独
+不由得
+不知不觉
+不管
+不管怎样
+不经意
+不胜
+不能
+不能不
+不至于
+不若
+不要
+不论
+不起
+不足
+不过
+不迭
+不问
+不限
+与
+与其
+与其说
+与否
+与此同时
+专门
+且
+且不说
+且说
+两者
+严格
+严重
+个
+个人
+个别
+中小
+中间
+丰富
+串行
+临
+临到
+为
+为主
+为了
+为什么
+为什麽
+为何
+为止
+为此
+为着
+主张
+主要
+举凡
+举行
+乃
+乃至
+乃至于
+么
+之
+之一
+之前
+之后
+之後
+之所以
+之类
+乌乎
+乎
+乒
+乘
+乘势
+乘机
+乘胜
+乘虚
+乘隙
+九
+也
+也好
+也就是说
+也是
+也罢
+了
+了解
+争取
+二
+二来
+二话不说
+二话没说
+于
+于是
+于是乎
+云云
+云尔
+互
+互相
+五
+些
+交口
+亦
+产生
+亲口
+亲手
+亲眼
+亲自
+亲身
+人
+人人
+人们
+人家
+人民
+什么
+什么样
+什麽
+仅
+仅仅
+今
+今后
+今天
+今年
+今後
+介于
+仍
+仍旧
+仍然
+从
+从不
+从严
+从中
+从事
+从今以后
+从优
+从古到今
+从古至今
+从头
+从宽
+从小
+从新
+从无到有
+从早到晚
+从未
+从来
+从此
+从此以后
+从而
+从轻
+从速
+从重
+他
+他人
+他们
+他是
+他的
+代替
+以
+以上
+以下
+以为
+以便
+以免
+以前
+以及
+以后
+以外
+以後
+以故
+以期
+以来
+以至
+以至于
+以致
+们
+任
+任何
+任凭
+任务
+企图
+伙同
+会
+伟大
+传
+传说
+传闻
+似乎
+似的
+但
+但凡
+但愿
+但是
+何
+何乐而不为
+何以
+何况
+何处
+何妨
+何尝
+何必
+何时
+何止
+何苦
+何须
+余外
+作为
+你
+你们
+你是
+你的
+使
+使得
+使用
+例如
+依
+依据
+依照
+依靠
+便
+便于
+促进
+保持
+保管
+保险
+俺
+俺们
+倍加
+倍感
+倒不如
+倒不如说
+倒是
+倘
+倘使
+倘或
+倘然
+倘若
+借
+借以
+借此
+假使
+假如
+假若
+偏偏
+做到
+偶尔
+偶而
+傥然
+像
+儿
+允许
+元／吨
+充其极
+充其量
+充分
+先不先
+先后
+先後
+先生
+光
+光是
+全体
+全力
+全年
+全然
+全身心
+全部
+全都
+全面
+八
+八成
+公然
+六
+兮
+共
+共同
+共总
+关于
+其
+其一
+其中
+其二
+其他
+其余
+其后
+其它
+其实
+其次
+具体
+具体地说
+具体来说
+具体说来
+具有
+兼之
+内
+再
+再其次
+再则
+再有
+再次
+再者
+再者说
+再说
+冒
+冲
+决不
+决定
+决非
+况且
+准备
+凑巧
+凝神
+几
+几乎
+几度
+几时
+几番
+几经
+凡
+凡是
+凭
+凭借
+出
+出于
+出去
+出来
+出现
+分别
+分头
+分期
+分期分批
+切
+切不可
+切切
+切勿
+切莫
+则
+则甚
+刚
+刚好
+刚巧
+刚才
+初
+别
+别人
+别处
+别是
+别的
+别管
+别说
+到
+到了儿
+到处
+到头
+到头来
+到底
+到目前为止
+前后
+前此
+前者
+前进
+前面
+加上
+加之
+加以
+加入
+加强
+动不动
+动辄
+勃然
+匆匆
+十分
+千
+千万
+千万千万
+半
+单
+单单
+单纯
+即
+即令
+即使
+即便
+即刻
+即如
+即将
+即或
+即是说
+即若
+却
+却不
+历
+原来
+去
+又
+又及
+及
+及其
+及时
+及至
+双方
+反之
+反之亦然
+反之则
+反倒
+反倒是
+反应
+反手
+反映
+反而
+反过来
+反过来说
+取得
+取道
+受到
+变成
+古来
+另
+另一个
+另一方面
+另外
+另悉
+另方面
+另行
+只
+只当
+只怕
+只是
+只有
+只消
+只要
+只限
+叫
+叫做
+召开
+叮咚
+叮当
+可
+可以
+可好
+可是
+可能
+可见
+各
+各个
+各人
+各位
+各地
+各式
+各种
+各级
+各自
+合理
+同
+同一
+同时
+同样
+后
+后来
+后者
+后面
+向
+向使
+向着
+吓
+吗
+否则
+吧
+吧哒
+吱
+呀
+呃
+呆呆地
+呐
+呕
+呗
+呜
+呜呼
+呢
+周围
+呵
+呵呵
+呸
+呼哧
+呼啦
+咋
+和
+咚
+咦
+咧
+咱
+咱们
+咳
+哇
+哈
+哈哈
+哉
+哎
+哎呀
+哎哟
+哗
+哗啦
+哟
+哦
+哩
+哪
+哪个
+哪些
+哪儿
+哪天
+哪年
+哪怕
+哪样
+哪边
+哪里
+哼
+哼唷
+唉
+唯有
+啊
+啊呀
+啊哈
+啊哟
+啐
+啥
+啦
+啪达
+啷当
+喀
+喂
+喏
+喔唷
+喽
+嗡
+嗡嗡
+嗬
+嗯
+嗳
+嘎
+嘎嘎
+嘎登
+嘘
+嘛
+嘻
+嘿
+嘿嘿
+四
+因
+因为
+因了
+因此
+因着
+因而
+固
+固然
+在
+在下
+在于
+地
+均
+坚决
+坚持
+基于
+基本
+基本上
+处在
+处处
+处理
+复杂
+多
+多么
+多亏
+多多
+多多少少
+多多益善
+多少
+多年前
+多年来
+多数
+多次
+够瞧的
+大
+大不了
+大举
+大事
+大体
+大体上
+大凡
+大力
+大多
+大多数
+大大
+大家
+大张旗鼓
+大批
+大抵
+大概
+大略
+大约
+大致
+大都
+大量
+大面儿上
+失去
+奇
+奈
+奋勇
+她
+她们
+她是
+她的
+好
+好在
+好的
+好象
+如
+如上
+如上所述
+如下
+如今
+如何
+如其
+如前所述
+如同
+如常
+如是
+如期
+如果
+如次
+如此
+如此等等
+如若
+始而
+姑且
+存在
+存心
+孰料
+孰知
+宁
+宁可
+宁愿
+宁肯
+它
+它们
+它们的
+它是
+它的
+安全
+完全
+完成
+定
+实现
+实际
+宣布
+容易
+密切
+对
+对于
+对应
+对待
+对方
+对比
+将
+将才
+将要
+将近
+小
+少数
+尔
+尔后
+尔尔
+尔等
+尚且
+尤其
+就
+就地
+就是
+就是了
+就是说
+就此
+就算
+就要
+尽
+尽可能
+尽如人意
+尽心尽力
+尽心竭力
+尽快
+尽早
+尽然
+尽管
+尽管如此
+尽量
+局外
+居然
+届时
+属于
+屡
+屡屡
+屡次
+屡次三番
+岂
+岂但
+岂止
+岂非
+川流不息
+左右
+巨大
+巩固
+差一点
+差不多
+己
+已
+已矣
+已经
+巴
+巴巴
+带
+帮助
+常
+常常
+常言说
+常言说得好
+常言道
+平素
+年复一年
+并
+并不
+并不是
+并且
+并排
+并无
+并没
+并没有
+并肩
+并非
+广大
+广泛
+应当
+应用
+应该
+庶乎
+庶几
+开外
+开始
+开展
+引起
+弗
+弹指之间
+强烈
+强调
+归
+归根到底
+归根结底
+归齐
+当
+当下
+当中
+当儿
+当前
+当即
+当口儿
+当地
+当场
+当头
+当庭
+当时
+当然
+当真
+当着
+形成
+彻夜
+彻底
+彼
+彼时
+彼此
+往
+往往
+待
+待到
+很
+很多
+很少
+後来
+後面
+得
+得了
+得出
+得到
+得天独厚
+得起
+心里
+必
+必定
+必将
+必然
+必要
+必须
+快
+快要
+忽地
+忽然
+怎
+怎么
+怎么办
+怎么样
+怎奈
+怎样
+怎麽
+怕
+急匆匆
+怪
+怪不得
+总之
+总是
+总的来看
+总的来说
+总的说来
+总结
+总而言之
+恍然
+恐怕
+恰似
+恰好
+恰如
+恰巧
+恰恰
+恰恰相反
+恰逢
+您
+您们
+您是
+惟其
+惯常
+意思
+愤然
+愿意
+慢说
+成为
+成年
+成年累月
+成心
+我
+我们
+我是
+我的
+或
+或则
+或多或少
+或是
+或曰
+或者
+或许
+战斗
+截然
+截至
+所
+所以
+所在
+所幸
+所有
+所谓
+才
+才能
+扑通
+打
+打从
+打开天窗说亮话
+扩大
+把
+抑或
+抽冷子
+拦腰
+拿
+按
+按时
+按期
+按照
+按理
+按说
+挨个
+挨家挨户
+挨次
+挨着
+挨门挨户
+挨门逐户
+换句话说
+换言之
+据
+据实
+据悉
+据我所知
+据此
+据称
+据说
+掌握
+接下来
+接着
+接著
+接连不断
+放量
+故
+故意
+故此
+故而
+敞开儿
+敢
+敢于
+敢情
+数/
+整个
+断然
+方
+方便
+方才
+方能
+方面
+旁人
+无
+无宁
+无法
+无论
+既
+既...又
+既往
+既是
+既然
+日复一日
+日渐
+日益
+日臻
+日见
+时候
+昂然
+明显
+明确
+是
+是不是
+是以
+是否
+是的
+显然
+显著
+普通
+普遍
+暗中
+暗地里
+暗自
+更
+更为
+更加
+更进一步
+曾
+曾经
+替
+替代
+最
+最后
+最大
+最好
+最後
+最近
+最高
+有
+有些
+有关
+有利
+有力
+有及
+有所
+有效
+有时
+有点
+有的
+有的是
+有着
+有著
+望
+朝
+朝着
+末##末
+本
+本人
+本地
+本着
+本身
+权时
+来
+来不及
+来得及
+来看
+来着
+来自
+来讲
+来说
+极
+极为
+极了
+极其
+极力
+极大
+极度
+极端
+构成
+果然
+果真
+某
+某个
+某些
+某某
+根据
+根本
+格外
+梆
+概
+次第
+欢迎
+欤
+正值
+正在
+正如
+正巧
+正常
+正是
+此
+此中
+此后
+此地
+此处
+此外
+此时
+此次
+此间
+殆
+毋宁
+每
+每个
+每天
+每年
+每当
+每时每刻
+每每
+每逢
+比
+比及
+比如
+比如说
+比方
+比照
+比起
+比较
+毕竟
+毫不
+毫无
+毫无例外
+毫无保留地
+汝
+沙沙
+没
+没奈何
+没有
+沿
+沿着
+注意
+活
+深入
+清楚
+满
+满足
+漫说
+焉
+然
+然则
+然后
+然後
+然而
+照
+照着
+牢牢
+特别是
+特殊
+特点
+犹且
+犹自
+独
+独自
+猛然
+猛然间
+率尔
+率然
+现代
+现在
+理应
+理当
+理该
+瑟瑟
+甚且
+甚么
+甚或
+甚而
+甚至
+甚至于
+用
+用来
+甫
+甭
+由
+由于
+由是
+由此
+由此可见
+略
+略为
+略加
+略微
+白
+白白
+的
+的确
+的话
+皆可
+目前
+直到
+直接
+相似
+相信
+相反
+相同
+相对
+相对而言
+相应
+相当
+相等
+省得
+看
+看上去
+看出
+看到
+看来
+看样子
+看看
+看见
+看起来
+真是
+真正
+眨眼
+着
+着呢
+矣
+矣乎
+矣哉
+知道
+砰
+确定
+碰巧
+社会主义
+离
+种
+积极
+移动
+究竟
+穷年累月
+突出
+突然
+窃
+立
+立刻
+立即
+立地
+立时
+立马
+竟
+竟然
+竟而
+第
+第二
+等
+等到
+等等
+策略地
+简直
+简而言之
+简言之
+管
+类如
+粗
+精光
+紧接着
+累年
+累次
+纯
+纯粹
+纵
+纵令
+纵使
+纵然
+练习
+组成
+经
+经常
+经过
+结合
+结果
+给
+绝
+绝不
+绝对
+绝非
+绝顶
+继之
+继后
+继续
+继而
+维持
+综上所述
+缕缕
+罢了
+老
+老大
+老是
+老老实实
+考虑
+者
+而
+而且
+而况
+而又
+而后
+而外
+而已
+而是
+而言
+而论
+联系
+联袂
+背地里
+背靠背
+能
+能否
+能够
+腾
+自
+自个儿
+自从
+自各儿
+自后
+自家
+自己
+自打
+自身
+臭
+至
+至于
+至今
+至若
+致
+般的
+良好
+若
+若夫
+若是
+若果
+若非
+范围
+莫
+莫不
+莫不然
+莫如
+莫若
+莫非
+获得
+藉以
+虽
+虽则
+虽然
+虽说
+蛮
+行为
+行动
+表明
+表示
+被
+要
+要不
+要不是
+要不然
+要么
+要是
+要求
+见
+规定
+觉得
+譬喻
+譬如
+认为
+认真
+认识
+让
+许多
+论
+论说
+设使
+设或
+设若
+诚如
+诚然
+话说
+该
+该当
+说明
+说来
+说说
+请勿
+诸
+诸位
+诸如
+谁
+谁人
+谁料
+谁知
+谨
+豁然
+贼死
+赖以
+赶
+赶快
+赶早不赶晚
+起
+起先
+起初
+起头
+起来
+起见
+起首
+趁
+趁便
+趁势
+趁早
+趁机
+趁热
+趁着
+越是
+距
+跟
+路经
+转动
+转变
+转贴
+轰然
+较
+较为
+较之
+较比
+边
+达到
+达旦
+迄
+迅速
+过
+过于
+过去
+过来
+运用
+近
+近几年来
+近年来
+近来
+还
+还是
+还有
+还要
+这
+这一来
+这个
+这么
+这么些
+这么样
+这么点儿
+这些
+这会儿
+这儿
+这就是说
+这时
+这样
+这次
+这点
+这种
+这般
+这边
+这里
+这麽
+进入
+进去
+进来
+进步
+进而
+进行
+连
+连同
+连声
+连日
+连日来
+连袂
+连连
+迟早
+迫于
+适应
+适当
+适用
+逐步
+逐渐
+通常
+通过
+造成
+逢
+遇到
+遭到
+遵循
+遵照
+避免
+那
+那个
+那么
+那么些
+那么样
+那些
+那会儿
+那儿
+那时
+那末
+那样
+那般
+那边
+那里
+那麽
+部分
+都
+鄙人
+采取
+里面
+重大
+重新
+重要
+鉴于
+针对
+长期以来
+长此下去
+长线
+长话短说
+问题
+间或
+防止
+阿
+附近
+陈年
+限制
+陡然
+除
+除了
+除却
+除去
+除外
+除开
+除此
+除此之外
+除此以外
+除此而外
+除非
+随
+随后
+随时
+随着
+随著
+隔夜
+隔日
+难得
+难怪
+难说
+难道
+难道说
+集中
+零
+需要
+非但
+非常
+非徒
+非得
+非特
+非独
+靠
+顶多
+顷
+顷刻
+顷刻之间
+顷刻间
+顺
+顺着
+顿时
+颇
+风雨无阻
+饱
+首先
+马上
+高低
+高兴
+默然
+默默地
+齐
+︿
+！
+＃
+＄
+％
+＆
+＇
+（
+）
+）÷（１－
+）、
+＊
+＋
+＋ξ
+＋＋
+，
+，也
+－
+－β
+－－
+－［＊］－
+．
+／
+０
+０：２
+１
+１．
+１２％
+２
+２．３％
+３
+４
+５
+５：０
+６
+７
+８
+９
+：
+；
+＜
+＜±
+＜Δ
+＜λ
+＜φ
+＜＜
+＝
+＝″
+＝☆
+＝（
+＝－
+＝［
+＝｛
+＞
+＞λ
+？
+＠
+Ａ
+ＬＩ
+Ｒ．Ｌ．
+ＺＸＦＩＴＬ
+［
+［①①］
+［①②］
+［①③］
+［①④］
+［①⑤］
+［①⑥］
+［①⑦］
+［①⑧］
+［①⑨］
+［①Ａ］
+［①Ｂ］
+［①Ｃ］
+［①Ｄ］
+［①Ｅ］
+［①］
+［①ａ］
+［①ｃ］
+［①ｄ］
+［①ｅ］
+［①ｆ］
+［①ｇ］
+［①ｈ］
+［①ｉ］
+［①ｏ］
+［②
+［②①］
+［②②］
+［②③］
+［②④
+［②⑤］
+［②⑥］
+［②⑦］
+［②⑧］
+［②⑩］
+［②Ｂ］
+［②Ｇ］
+［②］
+［②ａ］
+［②ｂ］
+［②ｃ］
+［②ｄ］
+［②ｅ］
+［②ｆ］
+［②ｇ］
+［②ｈ］
+［②ｉ］
+［②ｊ］
+［③①］
+［③⑩］
+［③Ｆ］
+［③］
+［③ａ］
+［③ｂ］
+［③ｃ］
+［③ｄ］
+［③ｅ］
+［③ｇ］
+［③ｈ］
+［④］
+［④ａ］
+［④ｂ］
+［④ｃ］
+［④ｄ］
+［④ｅ］
+［⑤］
+［⑤］］
+［⑤ａ］
+［⑤ｂ］
+［⑤ｄ］
+［⑤ｅ］
+［⑤ｆ］
+［⑥］
+［⑦］
+［⑧］
+［⑨］
+［⑩］
+［＊］
+［－
+［］
+］
+］∧′＝［
+］［
+＿
+ａ］
+ｂ］
+ｃ］
+ｅ］
+ｆ］
+ｎｇ昉
+｛
+｛－
+｜
+｝
+｝＞
+～
+～±
+～＋
+￥
diff --git a/samples/pyWxDumpUtils/test.py b/samples/pyWxDumpUtils/test.py
new file mode 100644
index 00000000..b8a7ef3f
--- /dev/null
+++ b/samples/pyWxDumpUtils/test.py
@@ -0,0 +1,96 @@
+# 单独使用各模块，返回值一般为字典，参数参考命令行
+from pywxdump import *
+
+# ************************************************************************************************ #
+# 获取微信基址偏移
+args = {
+    "mode": "bias",
+    "mobile": "13207194214",  # 手机号
+    "name": "isNotBlank",  # 微信昵称
+    "account": "juniorshy",  # 微信账号
+    "key": "dc04f757625448c08b71f96dc03270c6ffa5e8dcc3ad49e58e3e8e91edc18242",  # 密钥（可选）
+    # "db_path": "已登录账号的微信文件夹路径",  # 微信文件夹路径（可选）
+    "db_path": "D:\\Documents\\WeChat Files",  # 微信文件夹路径（可选）
+    # "version_list_path": "微信版本偏移文件路径"  # 微信版本偏移文件路径（可选）
+}
+bias_addr = BiasAddr(args["account"], args["mobile"], args["name"], args["key"], args["db_path"])##
+result = bias_addr.run(True)
+# ************************************************************************************************ #
+# 获取微信信息
+wx_info = read_info(VERSION_LIST, True)
+
+# 获取微信文件夹路径
+args = {
+    "mode": "db_path",
+    "require_list": "all",  # 需要的数据库名称（可选）
+    "wx_files": "WeChat Files",  # 'WeChat Files'路径（可选）
+    "wxid": "wxid_",  # wxid_，用于确认用户文件夹（可选）
+}
+user_dirs = get_wechat_db(args["require_list"], args["wx_files"], args["wxid"], True)
+# ************************************************************************************************ #
+# 解密微信数据库
+args = {
+    "mode": "decrypt",
+    "key": "dc04f757625448c08b71f96dc03270c6ffa5e8dcc3ad49e58e3e8e91edc18242",  # 密钥
+    "db_path": "D:\\Documents\\MuMu\\EnMicroMsg.db",  # 数据库路径
+    "out_path": "/path/to/decrypted"  # 输出路径（必须是目录）[默认为当前路径下decrypted文件夹]
+}
+result = batch_decrypt(args["key"], args["db_path"], args["out_path"], True)
+
+from pywxdump import VERSION_LIST_PATH, VERSION_LIST
+from pywxdump import batch_decrypt
+key = "dc04f75"  # 解密密钥
+db_path = "D:\\Documents\\MuMu\\EnMicroMsg.db"  # 数据库路径（文件or文件list）
+out_path = "D:\\Documents\\MuMu"  # 输出路径（目录）
+
+result = batch_decrypt(key, db_path, out_path, True)
+# ************************************************************************************************ #
+# 查看聊天记录
+args = {
+    "mode": "dbshow",
+    "msg_path": "解密后的 MSG.db 的路径",  # 解密后的 MSG.db 的路径
+    "micro_path": "解密后的 MicroMsg.db 的路径",  # 解密后的 MicroMsg.db 的路径
+    "media_path": "解密后的 MediaMSG.db 的路径",  # 解密后的 MediaMSG.db 的路径
+    "filestorage_path": "文件夹FileStorage的路径"  # 文件夹 FileStorage 的路径（用于显示图片）
+}
+from flask import Flask, request, jsonify, render_template, g
+import logging
+
+app = Flask(__name__, template_folder='./show_chat/templates')
+app.logger.setLevel(logging.ERROR)
+
+
+@app.before_request
+def before_request():
+    g.MSG_ALL_db_path = args["msg_path"]
+    g.MicroMsg_db_path = args["micro_path"]
+    g.MediaMSG_all_db_path = args["media_path"]
+    g.FileStorage_path = args["filestorage_path"]
+    g.USER_LIST = get_user_list(args["msg_path"], args["micro_path"])
+
+
+app.register_blueprint(app_show_chat)
+print("[+] 请使用浏览器访问 http://127.0.0.1:5000/ 查看聊天记录")
+app.run(debug=False)
+# ************************************************************************************************ #
+# 导出聊天记录为 HTML
+args = {
+    "mode": "export",
+    "username": "luomi1998",  # 微信账号（聊天对象账号）
+    "outpath": "/path/to/export",  # 导出路径
+    "msg_path": "D:/Documents/WeChat Files/MSG",  # 解密后的 MSG.db 的路径
+    "micro_path": "D:/Documents/WeChat Files/MicroMsg",  # 解密后的 MicroMsg.db 的路径
+    "media_path": "D:/Documents/WeChat Files/MediaMSG"  # 解密后的 MediaMSG.db 的路径
+    ,"filestorage_path": "D:/Documents/WeChat Files/FileStorage"  # 文件夹 FileStorage 的路径（用于显示图片）
+}
+{
+    "test": "",
+    "msg_path": "C:\\Users\\junio\\wxdump_tmp\\decrypted\\wxid_lsootbrkhf4x22\\merge_all.db",
+    "micro_path": "C:\\Users\\junio\\wxdump_tmp\\decrypted\\wxid_lsootbrkhf4x22\\merge_all.db",
+    "media_path": "C:\\Users\\junio\\wxdump_tmp\\decrypted\\wxid_lsootbrkhf4x22\\merge_all.db",
+    "wx_path": "D:\\Documents\\WeChat Files\\wxid_lsootbrkhf4x22",
+    "key": "dc04f757625448c08b71f96dc03270c6ffa5e8dcc3ad49e58e3e8e91edc18242",
+    "my_wxid": "wxid_lsootbrkhf4x22"
+}
+export(args["username"], args["outpath"], args["msg_path"], args["micro_path"], args["media_path"]
+       ,args["filestorage_path"])
\ No newline at end of file
diff --git a/samples/pyWxDumpUtils/testwordcloud copy.py b/samples/pyWxDumpUtils/testwordcloud copy.py
new file mode 100644
index 00000000..18927031
--- /dev/null
+++ b/samples/pyWxDumpUtils/testwordcloud copy.py	
@@ -0,0 +1,95 @@
+from collections import Counter  
+import pandas as pd
+import re
+import pymysql
+import numpy as np
+import jieba
+from wordcloud import WordCloud
+from PIL import Image
+
+
+def read_file(file_name):
+    with open(file_name, "r", encoding="utf-8") as fp:
+        lines = fp.readlines()
+        lines = [line.rstrip("\n") for line in lines]
+    return lines
+
+
+# 从message表提取原始聊天记录并保存
+def extract():
+    # 连接到MySQL数据库
+    conn = pymysql.connect(host='localhost', port=3308, user='root', password='root', database='demo')
+ 
+    cursor = conn.cursor()
+    cursor.execute('SELECT JSON_EXTRACT(content, "$.msg") AS msg_value  FROM wxid_p34mlvra9brc22_0_20230207_20240201 WHERE talker = "我";')
+    contents = cursor.fetchall()
+
+    with open('原始聊天记录me.txt', 'w+', encoding='utf-8') as file:
+        for content in contents:
+            file.write(content[0] + '\n')
+
+    cursor.close()
+    conn.close()
+
+
+# 对聊天记录进行处理
+def process():
+    emoj_regx = re.compile(r"\[[^\]]+\]")  # 匹配表情图片
+    wxid_regx = re.compile(r"wxid.*")
+    english_regx = re.compile(r"[a-zA-Z]+")  # 匹配英文字符
+    chinese_regx = re.compile(r"表情|图片|未知|时长|逼|秒|撤回|没意思|翻译结果|用户上传的GIF表情|卡片式链接|带有引用的文本消息")  # 匹配中文字符“表情”和“图片”
+    space_regx = re.compile(r"\s+")  # 匹配空格
+    digit_regx = re.compile(r"\d+")  # 匹配数字
+    content_lines = read_file('原始聊天记录me.txt')
+    for i in range(len(content_lines)):
+        content_lines[i] = emoj_regx.sub(r"", content_lines[i])  # 去除表情图片
+        content_lines[i] = wxid_regx.sub(r"", content_lines[i])
+        content_lines[i] = english_regx.sub(r"", content_lines[i])  # 去除英文字符
+        content_lines[i] = chinese_regx.sub(r"", content_lines[i])  # 去除中文字符“表情”和“图片”
+        content_lines[i] = space_regx.sub(r"", content_lines[i])  # 去除空格
+        content_lines[i] = digit_regx.sub(r"", content_lines[i])  # 去除数字
+    content_lines = [line for line in content_lines if line != '']
+
+    return content_lines
+
+
+def cut(content_lines):
+    jieba.load_userdict('./原始聊天记录me.txt')
+    stopwords = read_file('./stopwords.dat')
+    all_words = []
+    for line in content_lines:
+        all_words += [word for word in jieba.cut(line) if word not in stopwords]
+    # 使用Counter计算所有词频
+    all_word_freq = Counter(all_words)
+    # 过滤掉出现次数小于等于10的词
+    dict_words = {word: count for word, count in all_word_freq.items() if count > 10}
+    return dict_words
+
+def get_cloud(sorted_words, num):
+    mask_image = np.array(Image.open('muban3.png'))
+    word_counts = Counter(dict(sorted_words))
+    chunk_size = len(sorted_words) // num  # 总词数除以词云图数量
+    
+    for i in range(num):
+        start = i * chunk_size
+        end = (i + 1) * chunk_size if i < num - 1 else len(sorted_words)  # 确保最后一个chunk包含所有剩余的词
+        chunk = {word: count for word, count in sorted_words[start:end]}
+        
+        wordcloud = WordCloud(background_color='white', mask=mask_image, font_path='simhei.ttf')
+        wordcloud.generate_from_frequencies(chunk)
+        wordcloud.to_file('./me/cloud0{}.png'.format(i + 1))  # 保存词云图，文件名按1开始编号
+
+if __name__ == '__main__':
+    # 提取聊天记录
+    extract()
+    # 聊天记录处理
+    content_lines = process()
+    # 分词和停用词去除
+    dict_words = cut(content_lines)
+    # 降序排序，并过滤掉出现次数小于等于10的词
+    sorted_words = sorted(dict_words.items(), key=lambda item: item[1], reverse=True)
+    print(sorted_words)
+    
+    # 词云生成
+    # 确保按照词频超过十次的词来构成五张词云图
+    get_cloud(sorted_words, 5)
\ No newline at end of file
diff --git a/samples/pyWxDumpUtils/testwordcloud.py b/samples/pyWxDumpUtils/testwordcloud.py
new file mode 100644
index 00000000..c5c3f780
--- /dev/null
+++ b/samples/pyWxDumpUtils/testwordcloud.py
@@ -0,0 +1,95 @@
+from collections import Counter  
+import pandas as pd
+import re
+import pymysql
+import numpy as np
+import jieba
+from wordcloud import WordCloud
+from PIL import Image
+
+
+def read_file(file_name):
+    with open(file_name, "r", encoding="utf-8") as fp:
+        lines = fp.readlines()
+        lines = [line.rstrip("\n") for line in lines]
+    return lines
+
+
+# 从message表提取原始聊天记录并保存
+def extract():
+    # 连接到MySQL数据库
+    conn = pymysql.connect(host='localhost', port=3308, user='root', password='root', database='demo')
+ 
+    cursor = conn.cursor()
+    cursor.execute('SELECT JSON_EXTRACT(content, "$.msg") AS msg_value  FROM wxid_p34mlvra9brc22_0_20230207_20240201 WHERE talker = "wxid_p34mlvra9brc22";')
+    contents = cursor.fetchall()
+
+    with open('原始聊天记录ta.txt', 'w+', encoding='utf-8') as file:
+        for content in contents:
+            file.write(content[0] + '\n')
+
+    cursor.close()
+    conn.close()
+
+
+# 对聊天记录进行处理
+def process():
+    emoj_regx = re.compile(r"\[[^\]]+\]")  # 匹配表情图片
+    wxid_regx = re.compile(r"wxid.*")
+    english_regx = re.compile(r"[a-zA-Z]+")  # 匹配英文字符
+    chinese_regx = re.compile(r"表情|图片|未知|时长|逼|秒|没意思|翻译结果|卡片式链接|带有引用的文本消息")  # 匹配中文字符“表情”和“图片”
+    space_regx = re.compile(r"\s+")  # 匹配空格
+    digit_regx = re.compile(r"\d+")  # 匹配数字
+    content_lines = read_file('原始聊天记录ta.txt')
+    for i in range(len(content_lines)):
+        content_lines[i] = emoj_regx.sub(r"", content_lines[i])  # 去除表情图片
+        content_lines[i] = wxid_regx.sub(r"", content_lines[i])
+        content_lines[i] = english_regx.sub(r"", content_lines[i])  # 去除英文字符
+        content_lines[i] = chinese_regx.sub(r"", content_lines[i])  # 去除中文字符“表情”和“图片”
+        content_lines[i] = space_regx.sub(r"", content_lines[i])  # 去除空格
+        content_lines[i] = digit_regx.sub(r"", content_lines[i])  # 去除数字
+    content_lines = [line for line in content_lines if line != '']
+
+    return content_lines
+
+
+def cut(content_lines):
+    jieba.load_userdict('./原始聊天记录ta.txt')
+    stopwords = read_file('stopwords.dat')
+    all_words = []
+    for line in content_lines:
+        all_words += [word for word in jieba.cut(line) if word not in stopwords]
+    # 使用Counter计算所有词频
+    all_word_freq = Counter(all_words)
+    # 过滤掉出现次数小于等于10的词
+    dict_words = {word: count for word, count in all_word_freq.items() if count > 10}
+    return dict_words
+
+def get_cloud(sorted_words, num):
+    mask_image = np.array(Image.open('muban3.png'))
+    word_counts = Counter(dict(sorted_words))
+    chunk_size = len(sorted_words) // num  # 总词数除以词云图数量
+    
+    for i in range(num):
+        start = i * chunk_size
+        end = (i + 1) * chunk_size if i < num - 1 else len(sorted_words)  # 确保最后一个chunk包含所有剩余的词
+        chunk = {word: count for word, count in sorted_words[start:end]}
+        
+        wordcloud = WordCloud(background_color='white', mask=mask_image, font_path='simhei.ttf')
+        wordcloud.generate_from_frequencies(chunk)
+        wordcloud.to_file('cloud{}.png'.format(i + 1))  # 保存词云图，文件名按1开始编号
+
+if __name__ == '__main__':
+    # 提取聊天记录
+    # extract()
+    # 聊天记录处理
+    content_lines = process()
+    # 分词和停用词去除
+    dict_words = cut(content_lines)
+    # 降序排序，并过滤掉出现次数小于等于10的词
+    sorted_words = sorted(dict_words.items(), key=lambda item: item[1], reverse=True)
+    print(sorted_words)
+    
+    # 词云生成
+    # 确保按照词频超过十次的词来构成五张词云图
+    get_cloud(sorted_words, 5)
\ No newline at end of file
diff --git a/samples/pyWxDumpUtils/testwordcloudgeneral.py b/samples/pyWxDumpUtils/testwordcloudgeneral.py
new file mode 100644
index 00000000..7f2acc31
--- /dev/null
+++ b/samples/pyWxDumpUtils/testwordcloudgeneral.py
@@ -0,0 +1,96 @@
+from collections import Counter  
+import pandas as pd
+import re
+import pymysql
+import numpy as np
+import jieba
+from wordcloud import WordCloud
+from PIL import Image
+import re
+import numpy as np
+from PIL import Image
+from wordcloud import WordCloud, STOPWORDS
+import matplotlib.pyplot as plt
+from collections import Counter
+
+def read_file(file_name):
+    with open(file_name, "r", encoding="utf-8") as fp:
+        lines = fp.readlines()
+        lines = [line.rstrip("\n") for line in lines]
+    return lines
+
+
+def extract():
+    # 读取json文件
+    content_lines = read_file('D:/Documents/WeChat Files/wxid_lsootbrkhf4x22/FileStorage/File/2024-02/38988759337@chatroom_0_5539.json')
+
+    # 将聊天记录保存到txt文件中
+    with open('原始聊天记录chatroom_0_5539.txt', 'w+', encoding='utf-8') as file:
+        for line in content_lines:
+            file.write(line + '\n')
+
+
+# 对聊天记录进行处理
+
+
+def process():
+    emoj_regx = re.compile(r"\[[^\]]+\]")  # 匹配表情图片
+    wxid_regx = re.compile(r"wxid.*")
+    english_regx = re.compile(r"[a-zA-Z]+")  # 匹配英文字符
+    chinese_regx = re.compile(r"[^\u4e00-\u9fa5]+")  # 匹配非中文字符
+    chinese_regx_txt = re.compile(r"表情|图片|未知|文本|时长|逼|秒|撤回|没意思|翻译结果|用户上传的GIF表情|卡片式链接|带有引用的文本消息")  # 匹配中文字符“表情”和“图片”
+
+    space_regx = re.compile(r"\s+")  # 匹配空格
+    digit_regx = re.compile(r"\d+")  # 匹配数字
+    content_lines = read_file('.\原始聊天记录chatroom_0_5539.txt')
+    for i in range(len(content_lines)):
+        content_lines[i] = emoj_regx.sub(r"", content_lines[i])  # 去除表情图片
+        content_lines[i] = wxid_regx.sub(r"", content_lines[i])
+        content_lines[i] = english_regx.sub(r"", content_lines[i])  # 去除英文字符
+        content_lines[i] = chinese_regx.sub(r"", content_lines[i])  # 去除非中文字符
+        content_lines[i] = chinese_regx_txt.sub(r"", content_lines[i])  # 去除非中文字符
+        content_lines[i] = space_regx.sub(r"", content_lines[i])  # 去除空格
+        content_lines[i] = digit_regx.sub(r"", content_lines[i])  # 去除数字
+    content_lines = [line for line in content_lines if line != '']
+
+    return content_lines
+
+
+
+def cut(content_lines):
+    jieba.load_userdict('.\原始聊天记录chatroom_0_5539.txt')
+    stopwords = read_file('stopwords.dat')
+    all_words = []
+    for line in content_lines:
+        all_words += [word for word in jieba.cut(line) if word not in stopwords]
+    # 使用Counter计算所有词频
+    all_word_freq = Counter(all_words)
+    # 过滤掉出现次数小于等于10的词
+    dict_words = {word: count for word, count in all_word_freq.items() if count > 10}
+    return dict_words
+
+
+def get_cloud(sorted_words):
+    word_counts = Counter(dict(sorted_words))
+   # 保存词云图，只生成一张图，指定长宽161.8:100
+    wordcloud = WordCloud(background_color='white', mask=None, font_path='simhei.ttf', stopwords=STOPWORDS, width=809, height=500)
+    wordcloud.generate_from_frequencies(word_counts)
+    plt.figure(figsize=(8.09, 5))
+    plt.imshow(wordcloud, interpolation="bilinear")
+    plt.axis("off")
+    plt.show()
+ 
+
+
+if __name__ == '__main__':
+    extract()
+    # 聊天记录处理
+    content_lines = process()
+    # 分词和停用词去除
+    dict_words = cut(content_lines)
+    # 降序排序，并过滤掉出现次数小于等于10的词
+    sorted_words = sorted(dict_words.items(), key=lambda item: item[1], reverse=True)
+    print(sorted_words)
+    
+    # 词云生成
+    get_cloud(sorted_words)
\ No newline at end of file
diff --git a/samples/pyWxDumpUtils/wodclodmedicine.py b/samples/pyWxDumpUtils/wodclodmedicine.py
new file mode 100644
index 00000000..9c856304
--- /dev/null
+++ b/samples/pyWxDumpUtils/wodclodmedicine.py
@@ -0,0 +1,56 @@
+import mysql.connector
+import pandas as pd
+import numpy as np
+import re
+from fractions import Fraction
+
+import pandas as pd
+from wordcloud import WordCloud
+import matplotlib.pyplot as plt
+
+
+# change root password to yours:
+conn = mysql.connector.connect(user='root', password='root', database='sys')
+
+# 运行查询:
+cursor = conn.cursor()
+cursor.execute('SELECT * FROM `Sheet4_detail`')
+values = cursor.fetchall()
+
+# 获取查询结果的字段名
+columns = [i[0] for i in cursor.description]
+
+# 关闭 Cursor 和 Connection
+cursor.close()
+conn.close()
+
+# 创建 DataFrame，并指定列名
+df = pd.DataFrame(values, columns=columns)
+
+# print(df)
+
+
+# 创建中药频次字典
+freq_dict = {}
+for medicine in df['中药']:
+    if medicine != '远志':
+        if medicine in freq_dict:
+            freq_dict[medicine] += 1
+        else:
+            freq_dict[medicine] = 1
+
+# 对字典按频次进行排序，并取前100个
+sorted_freq_dict = sorted(freq_dict.items(), key=lambda x: x[1], reverse=True)[:100]
+
+print(sorted_freq_dict)
+
+# 生成词云图时，指定字体
+wordcloud = WordCloud(width=800, height=400, background_color='white',font_path='simhei.ttf').generate_from_frequencies(dict(sorted_freq_dict))
+
+
+
+# 显示词云图
+plt.figure(figsize=(10, 8))
+plt.imshow(wordcloud, interpolation='bilinear')
+plt.axis('off')
+plt.show()
\ No newline at end of file
diff --git a/samples/pyWxDumpUtils/wodclodmedicineByCsv.py b/samples/pyWxDumpUtils/wodclodmedicineByCsv.py
new file mode 100644
index 00000000..dbf4be73
--- /dev/null
+++ b/samples/pyWxDumpUtils/wodclodmedicineByCsv.py
@@ -0,0 +1,23 @@
+import pandas as pd
+from wordcloud import WordCloud
+import matplotlib.pyplot as plt
+
+df = pd.read_csv(r'D:\pyspace\learn-python3\samples\pyWxDumpUtils\中药频次.csv')
+
+# 去除远志
+df = df[df['中药统计分析'] != '远志']
+
+# 生成词云
+wordcloud = WordCloud(
+    background_color='white',
+    width=1000,
+    height=700,
+    font_path='simhei.ttf',  # 使用中文字体
+    colormap='YlGnBu'  # 设置颜色方案
+).generate(df['中药统计分析'].to_string(index=False))
+
+# 显示词云
+plt.figure(figsize=(16, 9))
+plt.imshow(wordcloud, interpolation='bilinear')
+plt.axis('off')
+plt.show()
diff --git a/samples/regex/regexTestDemo.py b/samples/regex/regexTestDemo.py
new file mode 100644
index 00000000..19357b02
--- /dev/null
+++ b/samples/regex/regexTestDemo.py
@@ -0,0 +1,15 @@
+import re
+
+# 打开文件
+# with open(r"D:\Documents\WeChat Files\wxid_lsootbrkhf4x22\FileStorage\File\2024-02\报错-药店-2.txt", "r", encoding="gbk") as f:
+#     data = f.read()
+with open(r"D:\NeuSoftResources\海南\l_hosp_appr_info_d_药店.sql", "r", encoding="utf") as f:
+    data = f.read()
+
+# 提取数据中的 P 开头后 11 位的字符串
+pattern = r"('P[0-9]{11}')"
+matches = re.findall(pattern, data)
+
+# 打印提取到的字符串
+for match in matches:
+    print(match)