Skip to content

Commit 12016bf

Browse files
committed
add automatically add internal link feature
1 parent b05dffa commit 12016bf

File tree

8 files changed

+256
-15
lines changed

8 files changed

+256
-15
lines changed

.gitignore

-2
Original file line numberDiff line numberDiff line change
@@ -127,8 +127,6 @@ venv/
127127
ENV/
128128
env.bak/
129129
venv.bak/
130-
.pixi
131-
pixi.lock
132130

133131
# Spyder project settings
134132
.spyderproject

README.md

+27-1
Original file line numberDiff line numberDiff line change
@@ -549,7 +549,8 @@ res = ea.sort_note_content('y6hROhWjNmHQ', 'zh_CN.UTF-8')
549549

550550
## (Advanced Usage) 🧹 Delete empty `new note`
551551

552-
Sometimes I inadvertently create numerous "new notes" which remain undeleted within my note tree. These "new notes" clutter my
552+
Sometimes I inadvertently create numerous "new notes" which remain undeleted within my note tree. These "new notes"
553+
clutter my
553554
workspace, scattered across various locations. I made this bulk deletion of these empty "new notes." Additionally, it
554555
generates warning messages for "new notes" that contain content, maybe we should change the title for those notes.
555556

@@ -581,6 +582,31 @@ ea.optimize_image_attachments_to_webp('H2q3901uFDCH')
581582

582583
This action can save significant space if you have many clipped pages. Whoever invented `WebP` is a genius.
583584

585+
## (Advanced Usage) Automatically add internal link
586+
587+
Experimental feature. Backup your database before do anything with this feature. It may completely destroy your notes.
588+
589+
If you find something wrong after using this feature. Please provide me a minimal note sample to test and fix potential
590+
bugs.
591+
592+
Add internal link in note
593+
594+
```
595+
auto_create_internal_link('HfAnsf8XiarY')
596+
```
597+
598+
For multiple notes
599+
600+
```
601+
auto_create_internal_link(target_notes=['gLmmsIM8yPqx', 'T4Ui3wNByO03'])
602+
```
603+
604+
(Dangerous action, backup first) Add internal notes for all text notes
605+
606+
```
607+
auto_create_internal_link(prcess_all_note=True)
608+
```
609+
584610
## 🛠️ Develop
585611

586612
Install with pip egg link to make package change without reinstall.

README_CN.md

+26
Original file line numberDiff line numberDiff line change
@@ -508,6 +508,32 @@ ea.optimize_image_attachments_to_webp('H2q3901uFDCH')
508508

509509
如果你有很多剪辑的页面,这个操作可以节省大量空间。发明 `WebP` 的人真是个天才。
510510

511+
512+
## (高级用法)自动添加内部链接
513+
514+
实验性功能。在使用此功能之前,请备份您的数据库。它可能会完全破坏您的笔记。
515+
516+
如果在使用此功能后发现任何问题,请提供一个最小化的笔记示例,以便测试和修复潜在的错误。
517+
518+
在笔记中添加内部链接:
519+
520+
```
521+
auto_create_internal_link('HfAnsf8XiarY')
522+
```
523+
524+
针对多个笔记:
525+
526+
```
527+
auto_create_internal_link(target_notes=['gLmmsIM8yPqx', 'T4Ui3wNByO03'])
528+
```
529+
530+
(危险操作,请先备份)为所有文本笔记添加内部链接:
531+
532+
```
533+
auto_create_internal_link(process_all_note=True)
534+
```
535+
536+
511537
## 🛠️ 开发
512538

513539
使用pip egg link进行安装,以便在不重新安装的情况下进行包更改。

requirements.txt

+1
Original file line numberDiff line numberDiff line change
@@ -9,3 +9,4 @@ loguru
99
minify-html
1010
pillow
1111
python-dateutil
12+
tqdm

setup.py

+1
Original file line numberDiff line numberDiff line change
@@ -148,6 +148,7 @@
148148
'minify-html',
149149
'pillow',
150150
'python-dateutil',
151+
'tqdm',
151152
],
152153
# Optional
153154
# List additional groups of dependencies here (e.g. development

src/trilium_py/client.py

+110-11
Original file line numberDiff line numberDiff line change
@@ -16,12 +16,20 @@
1616
from bs4 import BeautifulSoup
1717
from loguru import logger
1818
from natsort import natsort
19+
from tqdm import tqdm
1920

2021
from .utils.file_util import replace_extension
22+
from .utils.html_util import add_internal_links
2123
from .utils.markdown_math import reconstructMath, sanitizeInput
22-
from .utils.note_util import beautify_content, sort_note_by_headings
24+
from .utils.note_util import beautify_content, sort_note_by_headings, preprocess_note_title_list
2325
from .utils.param_util import clean_param, format_query_string
24-
from .utils.time_util import get_today, get_yesterday, synchronize_dates, format_date_to_etapi, get_local_timezone
26+
from .utils.time_util import (
27+
get_today,
28+
get_yesterday,
29+
synchronize_dates,
30+
format_date_to_etapi,
31+
get_local_timezone,
32+
)
2533
from .utils.image_util import compress_image_bytes, get_extension_from_image_mime
2634

2735

@@ -373,9 +381,10 @@ def patch_note(
373381
}
374382
res = requests.patch(url, json=clean_param(params), headers=self.get_header())
375383
return res.json()
376-
def handle_dates(self,
377-
dateCreated: Optional[datetime] = None,
378-
utcDateCreated: Optional[datetime] = None):
384+
385+
def handle_dates(
386+
self, dateCreated: Optional[datetime] = None, utcDateCreated: Optional[datetime] = None
387+
):
379388
'''Ensure that both local and UTC times are defined, and have same time
380389
(adjusted for timezone)'''
381390
if not dateCreated and not utcDateCreated:
@@ -384,21 +393,21 @@ def handle_dates(self,
384393
print(f"dateCreated is not datetime object, is {type(dateCreated)}")
385394
raise TypeError("dateCreated must be a datetime object")
386395
if utcDateCreated and not isinstance(utcDateCreated, datetime):
387-
print(f"utcdateCreated is not datetime object, is {type(utcDateCreated)}")
396+
print(f"utcdateCreated is not datetime object, is {type(utcDateCreated)}")
388397
raise TypeError("utcDateCreated must be a datetime object")
389398

390399
if dateCreated and dateCreated.tzinfo is None:
391400
tzinfo = get_local_timezone()
392401
dateCreated = dateCreated.replace(tzinfo=tzinfo)
393402
print(f"dateCreated.tzinfo was None. Changed to: {dateCreated.tzinfo}.")
394-
403+
395404
if utcDateCreated and utcDateCreated.tzinfo is None:
396405
utcDateCreated = utcDateCreated.replace(tzinfo=dateutil.tz.tzutc())
397406
print(f'utc date tzinfo was None, forced to UTC ({utcDateCreated})')
398407

399408
# After ensuring TZ is set for one of the date types, synchronize them
400409
synchronized_dates = synchronize_dates(local_date=dateCreated, utc_date=utcDateCreated)
401-
410+
402411
return synchronized_dates
403412

404413
# def synchronize_dates(self, local_date: Optional[datetime], utc_date: Optional[datetime]) -> tuple[Optional[datetime], Optional[datetime]]:
@@ -423,10 +432,10 @@ def handle_dates(self,
423432
# print(f"\tlocal_date: {local_date}")
424433
# print(f"\tutc_date : {utc_date}")
425434
# return local_date, utc_date
426-
435+
427436
# def get_local_timezone(self=None):
428437
# print("Getting local_timezone...")
429-
438+
430439
# # this is short and sweet
431440
# local_timezone = datetime.now().astimezone().tzinfo
432441

@@ -446,7 +455,7 @@ def handle_dates(self,
446455
# UTC : '2023-08-22 01:38:51.110Z'
447456
# and exactly 3 decimal places for seconds.'''
448457
# if kind == 'local':
449-
# date = date.strftime('%Y-%m-%d %H:%M:%S.%d3%z')
458+
# date = date.strftime('%Y-%m-%d %H:%M:%S.%d3%z')
450459
# if kind == 'utc':
451460
# date = date.astimezone(dateutil.tz.tzstr('Z')) # use Zulu time
452461
# date = date.strftime('%Y-%m-%d %H:%M:%S.%d3%Z')
@@ -1499,6 +1508,96 @@ def delete_empty_note(self, note_title=None, verbose=False):
14991508
if verbose:
15001509
logger.info(content)
15011510

1511+
def auto_create_internal_link(
1512+
self,
1513+
target_note_id=None,
1514+
target_notes=None,
1515+
process_all_notes=False,
1516+
skip_clipped_notes=True,
1517+
skip_day_notes=True,
1518+
verbose=True,
1519+
):
1520+
"""
1521+
Create internal link for notes
1522+
"""
1523+
1524+
# Prepare note title and note id list
1525+
# Get all note titles and note ids
1526+
all_notes = self.search_note(search="note.title %= '.*'")
1527+
all_note_title_list = []
1528+
for x in all_notes['results']:
1529+
if x['isProtected']:
1530+
# Remove protected notes, they are not editable via ETAPI
1531+
continue
1532+
title = x['title']
1533+
note_id = x['noteId']
1534+
all_note_title_list.append([title, note_id])
1535+
1536+
# Process the note titles, handling duplicates and sorting
1537+
processed_note_title_list = preprocess_note_title_list(all_note_title_list)
1538+
1539+
# prepare target note id
1540+
if target_note_id:
1541+
target_notes = [
1542+
target_note_id,
1543+
]
1544+
elif target_notes:
1545+
pass
1546+
elif process_all_notes:
1547+
# process all notes if not provided a note id list
1548+
target_notes = [x[1] for x in all_note_title_list]
1549+
1550+
# Add internal link
1551+
1552+
def get_child_note_title_note_id_list(note_id):
1553+
res = self.get_note(note_id)
1554+
result = []
1555+
for child_note_id in res['childNoteIds']:
1556+
x = self.get_note(child_note_id)
1557+
result.append([x['title'], x['noteId']])
1558+
return preprocess_note_title_list(result)
1559+
1560+
for note_id in tqdm(target_notes):
1561+
1562+
# only process text note here
1563+
current_note = self.get_note(note_id)
1564+
1565+
if verbose:
1566+
logger.info(f'current note id: {note_id} title: {current_note["title"]}')
1567+
1568+
if not current_note['type'] == 'text':
1569+
if verbose:
1570+
logger.info('skip: not text note')
1571+
continue
1572+
1573+
if skip_clipped_notes and any(
1574+
[x['name'] == 'pageUrl' for x in current_note['attributes']]
1575+
):
1576+
if verbose:
1577+
logger.info('skip: clipped note')
1578+
continue
1579+
1580+
if skip_day_notes and any(
1581+
[x['name'] == 'dateNote' for x in current_note['attributes']]
1582+
):
1583+
if verbose:
1584+
logger.info('skip: day note')
1585+
continue
1586+
1587+
# add child note, we can handle sub notes with same name from different parent notes
1588+
processed_child_note_title_list = get_child_note_title_note_id_list(note_id)
1589+
tmp_list_for_current_note = processed_child_note_title_list + processed_note_title_list
1590+
1591+
content = self.get_note_content(note_id)
1592+
updated_content, replaced = add_internal_links(
1593+
content, tmp_list_for_current_note, current_note_id=note_id
1594+
)
1595+
# If content has changed, update the note
1596+
if replaced:
1597+
self.update_note_content(note_id, updated_content)
1598+
if verbose:
1599+
logger.info(f"Added internal link to note {note_id}.")
1600+
15021601

15031602
class ListTemplate(string.Template):
15041603
"""Encapsulate To Do List HTML details

src/trilium_py/utils/html_util.py

+63-1
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,11 @@
11
import locale
22
import re
3+
import warnings
34

4-
from bs4 import BeautifulSoup
5+
from bs4 import BeautifulSoup, MarkupResemblesLocatorWarning
6+
7+
# Disable MarkupResemblesLocatorWarning globally
8+
warnings.filterwarnings('ignore', category=MarkupResemblesLocatorWarning)
59

610
TAG_LEVELS = {'h1': 1, 'h2': 2, 'h3': 3, 'h4': 4, 'h5': 5, 'h6': 6}
711

@@ -73,3 +77,61 @@ def convert_to_tree(data):
7377
# Concatenate data in a depth-first search manner
7478
html_string = dfs_concat_data(tree)
7579
return html_string
80+
81+
82+
def add_internal_links(
83+
html_content, keyword_note_id_list, current_note_id=None, exclude_headings=True
84+
):
85+
"""
86+
Adds internal links to the HTML content by replacing keywords with anchor tags.
87+
88+
Args:
89+
html_content (str): The HTML content to process.
90+
keyword_note_id_list (list of tuples): List of (keyword, note_id).
91+
exclude_headings (bool): Whether to exclude heading tags from processing.
92+
current_note_id (str): The ID of the current note to prevent self-referencing.
93+
94+
Returns:
95+
tuple: A tuple containing the updated HTML content and a boolean indicating if replacements were made.
96+
"""
97+
# Use BeautifulSoup to parse the HTML content
98+
soup = BeautifulSoup(html_content, "html.parser")
99+
replaced = False # Flag to check if any replacement happens
100+
101+
# Precompile the keywords and links into a dictionary, excluding self-referencing notes
102+
keyword_to_link = {
103+
keyword: f'<a class="reference-link" href="#root/{note_id}">{keyword}</a>'
104+
for keyword, note_id in keyword_note_id_list
105+
if note_id != current_note_id # Exclude the current note's ID
106+
}
107+
108+
# Create a regex pattern to match any keyword
109+
if not keyword_to_link:
110+
return str(soup), replaced # No keywords to process
111+
112+
keyword_pattern = re.compile(
113+
r'\b(' + '|'.join(re.escape(k) for k in keyword_to_link.keys()) + r')\b'
114+
)
115+
116+
# Tags to exclude from replacement
117+
exclude_tags = ['a']
118+
if exclude_headings:
119+
exclude_tags.extend(['h2', 'h3', 'h4', 'h5', 'h6'])
120+
121+
# Traverse all text nodes once
122+
for text_node in soup.find_all(string=True):
123+
# Skip nodes inside tags that shouldn't contain links
124+
if text_node.parent.name in exclude_tags:
125+
continue
126+
127+
# Replace keywords in the text
128+
def replace_keyword(match):
129+
replaced_keyword = match.group(0)
130+
return keyword_to_link[replaced_keyword]
131+
132+
new_text = keyword_pattern.sub(replace_keyword, text_node)
133+
if new_text != text_node: # If the text has actually changed
134+
text_node.replace_with(BeautifulSoup(new_text, "html.parser"))
135+
replaced = True # Mark that replacement has occurred
136+
137+
return str(soup), replaced

src/trilium_py/utils/note_util.py

+28
Original file line numberDiff line numberDiff line change
@@ -110,3 +110,31 @@ def sort_note_by_headings(html_content, locale_str='zh_CN.UTF-8'):
110110
sorted_html_string = content_before_first_h + sorted_html
111111

112112
return sorted_html_string
113+
114+
115+
def preprocess_note_title_list(data):
116+
"""
117+
Optimized version of the function to preprocess the list of [title, note_id].
118+
Cleans titles, removes duplicates and previous matching entries, and sorts by title length.
119+
"""
120+
121+
def clean_title(title):
122+
return title.strip()
123+
124+
# Use an ordered dictionary to maintain insertion order while ensuring uniqueness
125+
from collections import OrderedDict
126+
127+
cleaned_data = OrderedDict()
128+
129+
# Traverse the data and process each title
130+
for title, note_id in data:
131+
cleaned_title = clean_title(title)
132+
if cleaned_title in cleaned_data:
133+
# If the title already exists, remove it
134+
del cleaned_data[cleaned_title]
135+
else:
136+
# Otherwise, add it to the dictionary
137+
cleaned_data[cleaned_title] = note_id
138+
139+
# Convert the dictionary back to a list and sort by title length (descending)
140+
return sorted(cleaned_data.items(), key=lambda x: len(x[0]), reverse=True)

0 commit comments

Comments
 (0)