@@ -25,17 +25,15 @@ fn md_extensions() -> Options {
2525
2626/// Extract unparsed URL strings from a Markdown string.
2727// TODO: Refactor the extractor to reduce the complexity and number of lines.
28- #[ allow ( clippy:: too_many_lines) ]
28+ #[ expect ( clippy:: too_many_lines) ]
2929pub ( crate ) fn extract_markdown (
3030 input : & str ,
3131 include_verbatim : bool ,
3232 include_wikilinks : bool ,
3333) -> Vec < RawUri > {
34- // In some cases it is undesirable to extract links from within code blocks,
35- // which is why we keep track of entries and exits while traversing the input.
3634 let mut inside_code_block = false ;
37- let mut inside_link_block = false ;
38- let mut inside_wikilink_block = false ;
35+ let mut inside_link_label = false ; // encountering `X` in `[X]()`
36+ let mut inside_extracted_link = false ; // prevent double extraction when encountering `Text(X)` in `<X>` or `[[X]]`
3937
4038 // HTML blocks come in chunks from pulldown_cmark, so we need to accumulate them
4139 let mut inside_html_block = false ;
@@ -53,14 +51,11 @@ pub(crate) fn extract_markdown(
5351 dest_url,
5452 ..
5553 } ) => {
56- // Note: Explicitly listing all link types below to make it easier to
57- // change the behavior for a specific link type in the future.
58- #[ allow( clippy:: match_same_arms) ]
5954 match link_type {
6055 // Inline link like `[foo](bar)`
6156 // This is the most common link type
6257 LinkType :: Inline => {
63- inside_link_block = true ;
58+ inside_link_label = true ;
6459 Some ( raw_uri ( & dest_url, span_provider. span ( span. start ) ) )
6560 }
6661 // Reference without destination in the document, but resolved by the `broken_link_callback`
@@ -75,7 +70,7 @@ pub(crate) fn extract_markdown(
7570 LinkType :: Shortcut |
7671 // Shortcut without destination in the document, but resolved by the `broken_link_callback`
7772 LinkType :: ShortcutUnknown => {
78- inside_link_block = true ;
73+ inside_link_label = true ;
7974 // For reference links, create RawUri directly to handle relative file paths
8075 // that linkify doesn't recognize as URLs
8176 Some ( raw_uri ( & dest_url, span_provider. span ( span. start ) ) )
@@ -84,6 +79,7 @@ pub(crate) fn extract_markdown(
8479 LinkType :: Autolink |
8580 // Email address in autolink like `<john@example.org>`
8681 LinkType :: Email => {
82+ inside_extracted_link = true ;
8783 let span_provider = get_email_span_provider ( & span_provider, & span, link_type) ;
8884 Some ( extract_raw_uri_from_plaintext ( & dest_url, & span_provider) )
8985 }
@@ -93,7 +89,7 @@ pub(crate) fn extract_markdown(
9389 if !include_wikilinks {
9490 return None ;
9591 }
96- inside_wikilink_block = true ;
92+ inside_extracted_link = true ;
9793 // Ignore gitlab toc notation: https://docs.gitlab.com/user/markdown/#table-of-contents
9894 if [ "_TOC_" . to_string ( ) , "TOC" . to_string ( ) ] . contains ( & dest_url. to_string ( ) ) {
9995 return None ;
@@ -129,8 +125,8 @@ pub(crate) fn extract_markdown(
129125
130126 // A text node.
131127 Event :: Text ( txt) => {
132- if inside_wikilink_block
133- || ( inside_link_block && !include_verbatim)
128+ if inside_extracted_link
129+ || ( inside_link_label && !include_verbatim)
134130 || ( inside_code_block && !include_verbatim) {
135131 None
136132 } else {
@@ -205,13 +201,12 @@ pub(crate) fn extract_markdown(
205201 }
206202
207203 Event :: End ( TagEnd :: Link ) => {
208- inside_link_block = false ;
209- inside_wikilink_block = false ;
204+ inside_link_label = false ;
205+ inside_extracted_link = false ;
210206 None
211207 }
212208
213- // Skip footnote references and definitions explicitly - they're not links to check
214- #[ allow( clippy:: match_same_arms) ]
209+ #[ expect( clippy:: match_same_arms, reason = "Skip footnote references and definitions explicitly - they're not links to check" ) ]
215210 Event :: FootnoteReference ( _) | Event :: Start ( Tag :: FootnoteDefinition ( _) ) | Event :: End ( TagEnd :: FootnoteDefinition ) => None ,
216211
217212 // Silently skip over other events
575570 assert ! ( uris. is_empty( ) ) ;
576571 }
577572
573+ /// Don't extract the text of autolinks, as this is the link itself already.
574+ /// Prevents a regression of <https://github.com/lycheeverse/lychee/issues/2150>
575+ #[ test]
576+ fn test_autolink ( ) {
577+ let markdown = "<http://example>" ;
578+ assert_eq ! ( extract_markdown( markdown, false , false ) . len( ) , 1 ) ;
579+ assert_eq ! ( extract_markdown( markdown, true , false ) . len( ) , 1 ) ;
580+ }
581+
578582 #[ test]
579583 fn test_link_text_not_checked ( ) {
580584 // Test that link text is not extracted as a separate link by default
0 commit comments