Skip to content

Commit e584b04

Browse files
authored
Merge pull request #4199 from jcassette/duplicate
Allow to configure which fields are used to find duplicates
2 parents 7467bc3 + 2ebc28d commit e584b04

File tree

7 files changed

+122
-28
lines changed

7 files changed

+122
-28
lines changed

beets/autotag/__init__.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@
2727
TrackMatch,
2828
Distance,
2929
)
30-
from .match import tag_item, tag_album, Proposal # noqa
30+
from .match import tag_item, tag_album, current_metadata, Proposal # noqa
3131
from .match import Recommendation # noqa
3232

3333
# Global logger.

beets/config_default.yaml

+3
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,9 @@ import:
2727
group_albums: no
2828
pretend: false
2929
search_ids: []
30+
duplicate_keys:
31+
album: albumartist album
32+
item: artist title
3033
duplicate_action: ask
3134
bell: no
3235
set_fields: {}

beets/dbcore/db.py

+20-1
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@
2727
from beets.util import functemplate
2828
from beets.util import py3_path
2929
from beets.dbcore import types
30-
from .query import MatchQuery, NullSort, TrueQuery
30+
from .query import MatchQuery, NullSort, TrueQuery, AndQuery
3131
from collections.abc import Mapping
3232

3333

@@ -641,6 +641,25 @@ def set_parse(self, key, string):
641641
"""
642642
self[key] = self._parse(key, string)
643643

644+
# Convenient queries.
645+
646+
@classmethod
647+
def field_query(cls, field, pattern, query_cls=MatchQuery):
648+
"""Get a `FieldQuery` for this model."""
649+
return query_cls(field, pattern, field in cls._fields)
650+
651+
@classmethod
652+
def all_fields_query(cls, pats, query_cls=MatchQuery):
653+
"""Get a query that matches many fields with different patterns.
654+
655+
`pats` should be a mapping from field names to patterns. The
656+
resulting query is a conjunction ("and") of per-field queries
657+
for all of these field/pattern pairs.
658+
"""
659+
subqueries = [cls.field_query(k, v, query_cls)
660+
for k, v in pats.items()]
661+
return AndQuery(subqueries)
662+
644663

645664
# Database controller and supporting interfaces.
646665

beets/importer.py

+45-26
Original file line numberDiff line numberDiff line change
@@ -521,17 +521,18 @@ def skip(self):
521521

522522
# Convenient data.
523523

524-
def chosen_ident(self):
525-
"""Returns identifying metadata about the current choice. For
526-
albums, this is an (artist, album) pair. For items, this is
527-
(artist, title). May only be called when the choice flag is ASIS
528-
or RETAG (in which case the data comes from the files' current
529-
metadata) or APPLY (data comes from the choice).
524+
def chosen_info(self):
525+
"""Return a dictionary of metadata about the current choice.
526+
May only be called when the choice flag is ASIS or RETAG
527+
(in which case the data comes from the files' current metadata)
528+
or APPLY (in which case the data comes from the choice).
530529
"""
531530
if self.choice_flag in (action.ASIS, action.RETAG):
532-
return (self.cur_artist, self.cur_album)
531+
likelies, consensus = autotag.current_metadata(self.items)
532+
return likelies
533533
elif self.choice_flag is action.APPLY:
534-
return (self.match.info.artist, self.match.info.album)
534+
return self.match.info.copy()
535+
assert False
535536

536537
def imported_items(self):
537538
"""Return a list of Items that should be added to the library.
@@ -667,26 +668,34 @@ def find_duplicates(self, lib):
667668
"""Return a list of albums from `lib` with the same artist and
668669
album name as the task.
669670
"""
670-
artist, album = self.chosen_ident()
671+
info = self.chosen_info()
672+
info['albumartist'] = info['artist']
671673

672-
if artist is None:
674+
if info['artist'] is None:
673675
# As-is import with no artist. Skip check.
674676
return []
675677

676-
duplicates = []
678+
# Construct a query to find duplicates with this metadata. We
679+
# use a temporary Album object to generate any computed fields.
680+
tmp_album = library.Album(lib, **info)
681+
keys = config['import']['duplicate_keys']['album'].as_str_seq()
682+
dup_query = library.Album.all_fields_query({
683+
key: tmp_album.get(key)
684+
for key in keys
685+
})
686+
687+
# Don't count albums with the same files as duplicates.
677688
task_paths = {i.path for i in self.items if i}
678-
duplicate_query = dbcore.AndQuery((
679-
dbcore.MatchQuery('albumartist', artist),
680-
dbcore.MatchQuery('album', album),
681-
))
682689

683-
for album in lib.albums(duplicate_query):
690+
duplicates = []
691+
for album in lib.albums(dup_query):
684692
# Check whether the album paths are all present in the task
685693
# i.e. album is being completely re-imported by the task,
686694
# in which case it is not a duplicate (will be replaced).
687695
album_paths = {i.path for i in album.items()}
688696
if not (album_paths <= task_paths):
689697
duplicates.append(album)
698+
690699
return duplicates
691700

692701
def align_album_level_fields(self):
@@ -892,12 +901,17 @@ def __init__(self, toppath, item):
892901
self.is_album = False
893902
self.paths = [item.path]
894903

895-
def chosen_ident(self):
896-
assert self.choice_flag in (action.ASIS, action.APPLY, action.RETAG)
904+
def chosen_info(self):
905+
"""Return a dictionary of metadata about the current choice.
906+
May only be called when the choice flag is ASIS or RETAG
907+
(in which case the data comes from the files' current metadata)
908+
or APPLY (in which case the data comes from the choice).
909+
"""
910+
assert self.choice_flag in (action.ASIS, action.RETAG, action.APPLY)
897911
if self.choice_flag in (action.ASIS, action.RETAG):
898-
return (self.item.artist, self.item.title)
912+
return dict(self.item)
899913
elif self.choice_flag is action.APPLY:
900-
return (self.match.info.artist, self.match.info.title)
914+
return self.match.info.copy()
901915

902916
def imported_items(self):
903917
return [self.item]
@@ -918,14 +932,19 @@ def find_duplicates(self, lib):
918932
"""Return a list of items from `lib` that have the same artist
919933
and title as the task.
920934
"""
921-
artist, title = self.chosen_ident()
935+
info = self.chosen_info()
936+
937+
# Query for existing items using the same metadata. We use a
938+
# temporary `Item` object to generate any computed fields.
939+
tmp_item = library.Item(lib, **info)
940+
keys = config['import']['duplicate_keys']['item'].as_str_seq()
941+
dup_query = library.Album.all_fields_query({
942+
key: tmp_item.get(key)
943+
for key in keys
944+
})
922945

923946
found_items = []
924-
query = dbcore.AndQuery((
925-
dbcore.MatchQuery('artist', artist),
926-
dbcore.MatchQuery('title', title),
927-
))
928-
for other_item in lib.items(query):
947+
for other_item in lib.items(dup_query):
929948
# Existing items not considered duplicates.
930949
if other_item.path != self.item.path:
931950
found_items.append(other_item)

docs/changelog.rst

+4
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,10 @@ New features:
2525
* :doc:`/plugins/kodiupdate`: Now supports multiple kodi instances
2626
:bug:`4101`
2727
* Add the item fields ``bitrate_mode``, ``encoder_info`` and ``encoder_settings``.
28+
* Add query prefixes ``=`` and ``~``.
29+
* A new configuration option, :ref:`duplicate_keys`, lets you change which
30+
fields the beets importer uses to identify duplicates.
31+
:bug:`1133` :bug:`4199`
2832
* Add :ref:`exact match <exact-match>` queries, using the prefixes ``=`` and
2933
``=~``.
3034
:bug:`4251`

docs/reference/config.rst

+16
Original file line numberDiff line numberDiff line change
@@ -689,6 +689,22 @@ with the ``-a`` flag to the :ref:`import-cmd` command.)
689689

690690
Default: ``yes``.
691691

692+
.. _duplicate_keys:
693+
694+
duplicate_keys
695+
~~~~~~~~~~~~~~
696+
697+
The fields used to find duplicates when importing.
698+
There are two sub-values here: ``album`` and ``item``.
699+
Each one is a list of field names; if an existing object (album or item) in
700+
the library matches the new object on all of these fields, the importer will
701+
consider it a duplicate.
702+
703+
Default::
704+
705+
album: albumartist album
706+
item: artist title
707+
692708
.. _duplicate_action:
693709

694710
duplicate_action

test/test_importer.py

+33
Original file line numberDiff line numberDiff line change
@@ -1234,6 +1234,7 @@ def test_album_info(*args, **kwargs):
12341234
tracks=[track_info],
12351235
album_id='albumid',
12361236
artist_id='artistid',
1237+
flex='flex',
12371238
)
12381239
return iter([album_info])
12391240

@@ -1251,6 +1252,7 @@ def setUp(self):
12511252
# Create import session
12521253
self.importer = self.create_importer()
12531254
config['import']['autotag'] = True
1255+
config['import']['duplicate_keys']['album'] = 'albumartist album'
12541256

12551257
def tearDown(self):
12561258
self.teardown_beets()
@@ -1320,6 +1322,24 @@ def test_merge_duplicate_album(self):
13201322
def test_twice_in_import_dir(self):
13211323
self.skipTest('write me')
13221324

1325+
def test_keep_when_extra_key_is_different(self):
1326+
config['import']['duplicate_keys']['album'] = 'albumartist album flex'
1327+
1328+
item = self.lib.items().get()
1329+
import_file = MediaFile(os.path.join(
1330+
self.importer.paths[0], b'album 0', b'track 0.mp3'))
1331+
import_file.artist = item['artist']
1332+
import_file.albumartist = item['artist']
1333+
import_file.album = item['album']
1334+
import_file.title = item['title']
1335+
import_file.flex = 'different'
1336+
1337+
self.importer.default_resolution = self.importer.Resolution.SKIP
1338+
self.importer.run()
1339+
1340+
self.assertEqual(len(self.lib.albums()), 2)
1341+
self.assertEqual(len(self.lib.items()), 2)
1342+
13231343
def add_album_fixture(self, **kwargs):
13241344
# TODO move this into upstream
13251345
album = super().add_album_fixture()
@@ -1349,6 +1369,7 @@ def setUp(self):
13491369
self.importer = self.create_importer()
13501370
config['import']['autotag'] = True
13511371
config['import']['singletons'] = True
1372+
config['import']['duplicate_keys']['item'] = 'artist title'
13521373

13531374
def tearDown(self):
13541375
self.teardown_beets()
@@ -1385,6 +1406,18 @@ def test_skip_duplicate(self):
13851406
item = self.lib.items().get()
13861407
self.assertEqual(item.mb_trackid, 'old trackid')
13871408

1409+
def test_keep_when_extra_key_is_different(self):
1410+
config['import']['duplicate_keys']['item'] = 'artist title flex'
1411+
item = self.lib.items().get()
1412+
item.flex = 'different'
1413+
item.store()
1414+
self.assertEqual(len(self.lib.items()), 1)
1415+
1416+
self.importer.default_resolution = self.importer.Resolution.SKIP
1417+
self.importer.run()
1418+
1419+
self.assertEqual(len(self.lib.items()), 2)
1420+
13881421
def test_twice_in_import_dir(self):
13891422
self.skipTest('write me')
13901423

0 commit comments

Comments
 (0)