diff --git a/.github/ISSUE_TEMPLATE/02-name-correction.yml b/.github/ISSUE_TEMPLATE/02-name-correction.yml index 3aeca7882c..c01f8f1cf5 100644 --- a/.github/ISSUE_TEMPLATE/02-name-correction.yml +++ b/.github/ISSUE_TEMPLATE/02-name-correction.yml @@ -37,8 +37,8 @@ body: attributes: label: Author ORCID description: | - Provide a valid [ORCID](https://orcid.org) link here. This will be used to help match future papers to your Anthology ID. - placeholder: ex. https://orcid.org/my-orcid?orcid=0009-0003-8868-7504 + Provide a valid [ORCID](https://orcid.org) link here, and [add your name variants to your ORCID profile](https://aclanthology.org/info/orcid_id). This will be used to match papers to your Anthology ID. + placeholder: ex. https://orcid.org/0009-0003-8868-7504 validations: required: true - type: textarea diff --git a/.github/ISSUE_TEMPLATE/99-bulk-metadata-correction.yml b/.github/ISSUE_TEMPLATE/99-bulk-metadata-correction.yml index 6e7b56e043..e7b95c8bbf 100644 --- a/.github/ISSUE_TEMPLATE/99-bulk-metadata-correction.yml +++ b/.github/ISSUE_TEMPLATE/99-bulk-metadata-correction.yml @@ -7,11 +7,11 @@ body: - type: markdown attributes: value: > - **This form is not meant to be used manually.** Instead, it is activated by clicking the yellow "Fix data" button found on each paper page in the Anthology (e.g., https://aclanthology.org/K17-1003/). Clicking this button displays a UI tool for modifying the title, abstract, and author list. Submission of that form will automatically populate the title above and data block below. + **Please do not edit the JSON below.** This form is not meant to be used manually; instead, it is activated by clicking the yellow "Fix data" button found on each paper page in the Anthology (e.g., https://aclanthology.org/K17-1003/). Doing so displays a UI tool for modifying the title, abstract, and author list. Submission of that form will automatically populate the title above and data block below. - type: markdown attributes: value: > - Corrections will be processed in bulk on a weekly basis after verification by Anthology staff. + Corrections will be manually reviewed by Anthology staff and processed in bulk on a roughly weekly basis. - type: textarea id: data attributes: diff --git a/bin/create_hugo_data.py b/bin/create_hugo_data.py index 1ed8bc656e..3e653f35f0 100755 --- a/bin/create_hugo_data.py +++ b/bin/create_hugo_data.py @@ -53,6 +53,7 @@ from acl_anthology.collections.paper import PaperDeletionType from acl_anthology.collections.volume import VolumeType from acl_anthology.utils.logging import setup_rich_logging +from acl_anthology.utils.ids import is_verified_person_id from acl_anthology.utils.text import ( interpret_pages, month_str2num, @@ -394,15 +395,19 @@ def export_people(anthology, builddir, dryrun): ) if n.script is not None: diff_script_variants.append(n.as_full()) - if diff_script_variants: + if diff_script_variants and is_verified_person_id(person_id): data["full"] = f"{data['full']} ({', '.join(diff_script_variants)})" if person.comment is not None: data["comment"] = person.comment if person.orcid is not None: data["orcid"] = person.orcid similar = anthology.people.similar.subset(person_id) - if len(similar) > 1: - data["similar"] = list(similar - {person_id}) + similar.remove(person_id) + if similar_verified := [id_ for id_ in similar if is_verified_person_id(id_)]: + data["similar_verified"] = sorted(list(similar_verified)) + similar.difference_update(similar_verified) + if similar: # any remaining IDs are unverified + data["similar_unverified"] = sorted(list(similar)) people[person_id] = data progress.update(task, advance=1) diff --git a/hugo/assets/css/main.scss b/hugo/assets/css/main.scss index fc710130bb..0eba9e914d 100644 --- a/hugo/assets/css/main.scss +++ b/hugo/assets/css/main.scss @@ -19,7 +19,8 @@ $attachment_color: darken( $green, 10% ); $theme-colors: map-merge( $theme-colors, ( - "attachment": $attachment_color + "attachment": $attachment_color, + "verified": $attachment_color, ) ); diff --git a/hugo/content/info/corrections.md b/hugo/content/info/corrections.md index 043fc5b850..bffcd54911 100644 --- a/hugo/content/info/corrections.md +++ b/hugo/content/info/corrections.md @@ -50,7 +50,7 @@ Please pay careful attention to the following steps. 1. **Ensure that each name is correct**. We treat the information on the PDF as authoritative; this means that the metadata should reflect exactly what is printed on the PDF. A common situation is that the name recorded in Anthology metadata (e.g., John P. Hancock) will not match what is displayed on the PDF (John Hancock). This needs to be corrected first. Please review your papers and [follow the steps here](#metadata-corrections) to correct any discrepancies. Sometimes, this will resolve the split pages. 2. Obtain [an ORCID](https://orcid.org). This is required to help with matching of future papers. 3. Fill out [an author page correction](https://github.com/acl-org/acl-anthology/issues/new?template=02-name-correction.yml). A Github issue is our preferred mechanism, but you can also email [the Anthology director](mailto:anthology@aclweb.org). -4. Finally, to avoid issues in the future, ensure that the name you use on papers is properly recorded in your profile in publication management systems such as [Open Review](https://openreview.net), [Softconf](https://softconf.com), [EasyChair](https://easychair.org), and so on. +4. Finally, to avoid issues in the future, ensure that the name you use on papers is properly recorded in your profile in publication management systems such as [OpenReview](https://openreview.net), [Softconf](https://softconf.com), [EasyChair](https://easychair.org), and so on. Anthology staff will address your issue as quickly as possible. An example merged author profile is [Aravand Joshi](https://aclanthology.org/people/aravind-joshi). diff --git a/hugo/content/info/names.md b/hugo/content/info/names.md new file mode 100644 index 0000000000..238030eed2 --- /dev/null +++ b/hugo/content/info/names.md @@ -0,0 +1,14 @@ +--- +Title: Names in the ACL Anthology +linktitle: Names +subtitle: How the Anthology deals with names +date: "2025-12-29" +--- +_This page is a stub. It should describe how the Anthology deals with names, including the following._ + +- PDF metadata and the formation of name slugs +- Creating author pages +- Disambiguating names and merging name variants +- How papers are matched to authors + +It can and should refer to other documentation spread around, but this could be a piece of central information. diff --git a/hugo/content/info/orcid.md b/hugo/content/info/orcid.md new file mode 100644 index 0000000000..e498cd2cdb --- /dev/null +++ b/hugo/content/info/orcid.md @@ -0,0 +1,24 @@ +--- +Title: ORCID iDs in the ACL Anthology +linktitle: ORCID iDs +subtitle: Best practices for ensuring your papers are correctly linked +date: "2025-12-29" +--- + +An [ORCID iD](https://orcid.org) is a persistent digital identifier that distinguishes you from other researchers and links your research outputs and activities to your identity. +When these iDs are present on ingested papers, it removes the ambiguity that sometimes arises with popular names and name variants. + +We urge every author who is actively publishing papers to create an ORCID iD, and to supply this ID to publication systems such as OpenReview.[^1] In creating or editing your ORCID iD profile, please consider the following tips to help match your papers to your Anthology page: + +[^1]: Visit the [edit page link at OpenReview](https://openreview.net/profile/edit) and add your ORCID iD under the "Personal Links" section. + +- **Add name variants**: This is most important. In your ORCID iD profile, you can set your given and family names, your published name, and any name variants you have published under (e.g., with or without middle initials, maiden names, etc.). https://orcid.org/0000-0002-1831-3457 is a good example. Make sure you have at least one Latin variant of your name. This helps the Anthology match your publications even if they are listed under different names. +- **Make sure your name is visible**: Ensure that your name is set to be visible to everyone in your ORCID iD profile settings. If your name is private, the Anthology may not be able to access it for matching. +- **Register at least one personal email address**: This can be kept private but ensures permanent access so that you can update your ORCID record in the future. + +In addition, the following information can help us improve matching, should we need to manually disambiguate authors. + +- **Add a few representative publications**: You can add your publications to your ORCID iD profile. This is less important for matching in the Anthology, but it is a good practice to keep your ORCID iD profile complete. Many Anthology papers have DOIs, which simplifies the process. +- **Add educational history and affiliations**: Adding your educational background and current affiliations can help further distinguish you from other researchers with similar names. It is especially helpful for us to know the institution from which you receive (or expect to receive) your highest degree, since we use this information when disambiguating authors. + +In addition to this, make sure that the name you enter into submission mangagement systems (such as Softconf or OpenReview) matches one of the ORCID variants, ideally the published name. diff --git a/hugo/content/info/verification.md b/hugo/content/info/verification.md new file mode 100644 index 0000000000..af8ca14678 --- /dev/null +++ b/hugo/content/info/verification.md @@ -0,0 +1,26 @@ +--- +Title: Verified authors +linktitle: Verification +subtitle: How the ACL Anthology verifies authors +date: "2025-12-29" +--- +Every author appearing on a paper in the Anthology is given an author page. The ACL Anthology distinguishes between verified and unverified authors. + +**A _verified_ author is one for whom we have an explicit entry in our names database.** +This can happen either automatically or manually. Entries are created automatically when a paper is ingested with ORCID iD information attached to the papers author(s). We also create entries manually when we intervene to disambiguate authors with similar names or who publish under multiple names. The top of the author's page will have an ORCID icon linked to the ORCID profile (except for some legacy entries for which we do not yet have an ORCID iD). + +The ORCID icon does not guarantee that all papers on the page belong to that author, however. This is because, once an author is verified, papers lacking an explicit ORCID iD but matching the name string will be included on the page unless the name is known to be ambiguous. The "Fix author" button should be used to alert the Anthology team of any errors. + +**An _unverified_ author is one for whom we do not have an explicit entry in our names database.** Unverified pages have `/unverified/` appended to the URL. These pages are created automatically when a paper is ingested without ORCID iD information attached to the authors. Unverified author pages do not include a link to an ORCID profile, but instead include a question mark icon next to the author's name. + +Where an author name in our database is known to be ambiguous, there will be an `/unverified/` page listing any papers with that name which have not been explicitly identified with a specific verified author. These can be moved to a verified author page via a "Fix author" request. + +### Verifying an author + +1. Create an ORCID iD and populate it. + + We urge every author to create an [ORCID iD](https://orcid.org), and to supply this ID to publication systems such as OpenReview. The reason is that we use this information at ingestion time to match papers to an author. This is increasingly important as the size of the global scientific community increases, and ambiguous names proliferate. Please see our simple [ORCID iD guide]({{< ref "/info/orcid">}}) for information that will help match your papers to your Anthology page. + +2. File an issue on GitHub. + + Navigate to the author page in the Anthology. If there is no ORCID icon, click the "Fix author" link at the bottom of the links on the righthand side of the page to create an issue from our template. Provide an ORCID iD along with other relevant information. Leave the issue open to be reviewed by Anthology staff. diff --git a/hugo/layouts/_default/baseof.html b/hugo/layouts/_default/baseof.html index 61777eea19..bf0a9eb7c6 100644 --- a/hugo/layouts/_default/baseof.html +++ b/hugo/layouts/_default/baseof.html @@ -22,7 +22,7 @@ {{ $sass_options := (dict "includePaths" (slice "assets/css" "assets/css/vendor/bootstrap/scss")) }} {{ $style := resources.Get "css/main.scss" | toCSS $sass_options | minify | fingerprint }} - + {{ block "meta" . }}{{ end }} diff --git a/hugo/layouts/partials/header_navbar.html b/hugo/layouts/partials/header_navbar.html index ed6baaeff8..350c0f88bf 100644 --- a/hugo/layouts/partials/header_navbar.html +++ b/hugo/layouts/partials/header_navbar.html @@ -23,7 +23,7 @@ Submissions(current)
diff --git a/hugo/layouts/people/single.html b/hugo/layouts/people/single.html index a4348e8996..c489cd2938 100644 --- a/hugo/layouts/people/single.html +++ b/hugo/layouts/people/single.html @@ -1,9 +1,23 @@ {{ define "main" }} {{ $person := index .Site.Data.people .Params.name }} +{{ $is_verified := not (hasSuffix $person.slug "/unverified") }}

{{ $person.first }} {{ $person.last }} + {{ if $person.orcid }} + + + + {{ else if $is_verified }} + + + + {{ else }} + + + + {{ end }}

{{ with $person.comment }}

{{.}}

@@ -17,13 +31,21 @@

{{ end }}

{{ end }} - {{ with $person.similar }} + {{ with $person.similar_verified }} +

+ {{ $len := (len .) }} + {{ if $is_verified }}Other people with similar names:{{ else }}Papers on this page may belong to the following people:{{ end }} + {{ range $index, $sim_id := . }} + {{ trim (partial "author_link.html" (dict "ctx" $ "person" (dict "id" $sim_id))) " \n" | safeHTML }}{{ $sim_person := index $.Site.Data.people $sim_id }}{{ with $sim_person.comment }} ({{.}}){{ end }}{{ if ne (add $index 1) $len }}, {{ end }} + {{ end }} +

+ {{ end }} + {{ with $person.similar_unverified }}

- Other people with similar names: {{ $len := (len .) }} + Unverified author pages with similar names: {{ range $index, $sim_id := . }} - {{ trim (partial "author_link.html" (dict "ctx" $ "person" (dict "id" $sim_id))) " \n" | safeHTML }} - {{ $sim_person := index $.Site.Data.people $sim_id }}{{ with $sim_person.comment }}({{.}}){{ end }}{{ if ne (add $index 1) $len }}, {{ end }} + {{ trim (partial "author_link.html" (dict "ctx" $ "person" (dict "id" $sim_id))) " \n" | safeHTML }}{{ $sim_person := index $.Site.Data.people $sim_id }}{{ if ne (add $index 1) $len }}, {{ end }} {{ end }}

{{ end }}