@@ -1260,6 +1260,25 @@ def _pe_cfg(url: str = '', **kwargs) -> Dict[str, Any]:
1260
1260
}
1261
1261
1262
1262
1263
+ def _dinov3_cfg (url : str = '' , ** kwargs ) -> Dict [str , Any ]:
1264
+ """Generate default configuration for DINOv3 models.
1265
+
1266
+ Args:
1267
+ url: Model weights URL.
1268
+ **kwargs: Additional configuration parameters.
1269
+
1270
+ Returns:
1271
+ Model configuration dictionary.
1272
+ """
1273
+ return {
1274
+ 'url' : url ,
1275
+ 'num_classes' : 0 , 'input_size' : (3 , 256 , 256 ), 'pool_size' : None ,
1276
+ 'crop_pct' : 1.0 , 'interpolation' : 'bicubic' , 'min_input_size' : (3 , 128 , 128 ),
1277
+ 'mean' : IMAGENET_DEFAULT_MEAN , 'std' : IMAGENET_DEFAULT_STD ,
1278
+ 'first_conv' : 'patch_embed.proj' , 'classifier' : 'head' ,
1279
+ 'license' : 'dinov3' , ** kwargs
1280
+ }
1281
+
1263
1282
default_cfgs = generate_default_cfgs ({
1264
1283
1265
1284
# EVA 01 CLIP fine-tuned on imagenet-1k
@@ -1614,89 +1633,43 @@ def _pe_cfg(url: str = '', **kwargs) -> Dict[str, Any]:
1614
1633
1615
1634
# DINOv3 weights are under a specific license with redistribution terms, please see
1616
1635
# https://github.com/facebookresearch/dinov3/blob/main/LICENSE.md
1617
- 'vit_small_patch16_dinov3_224 .lvdm_1689m' : _cfg (
1636
+ 'vit_small_patch16_dinov3 .lvdm_1689m' : _dinov3_cfg (
1618
1637
# hf_hub_id='timm/',
1619
- mean = IMAGENET_DEFAULT_MEAN , std = IMAGENET_DEFAULT_STD ,
1620
- crop_pct = 1.0 ,
1621
- num_classes = 0 ,
1622
- license = 'dinov3' ,
1623
1638
),
1624
- 'vit_small_patch16_dinov3_qkvb_224 .lvdm_1689m' : _cfg (
1639
+ 'vit_small_patch16_dinov3_qkvb .lvdm_1689m' : _dinov3_cfg (
1625
1640
# hf_hub_id='timm/',
1626
- mean = IMAGENET_DEFAULT_MEAN , std = IMAGENET_DEFAULT_STD ,
1627
- crop_pct = 1.0 ,
1628
- num_classes = 0 ,
1629
- license = 'dinov3' ,
1630
1641
),
1631
- 'vit_small_plus_patch16_dinov3_224 .lvdm_1689m' : _cfg (
1642
+ 'vit_small_plus_patch16_dinov3 .lvdm_1689m' : _dinov3_cfg (
1632
1643
# hf_hub_id='timm/',
1633
- mean = IMAGENET_DEFAULT_MEAN , std = IMAGENET_DEFAULT_STD ,
1634
- crop_pct = 1.0 ,
1635
- num_classes = 0 ,
1636
- license = 'dinov3' ,
1637
1644
),
1638
- 'vit_small_plus_patch16_dinov3_qkvb_224 .lvdm_1689m' : _cfg (
1645
+ 'vit_small_plus_patch16_dinov3_qkvb .lvdm_1689m' : _dinov3_cfg (
1639
1646
# hf_hub_id='timm/',
1640
- mean = IMAGENET_DEFAULT_MEAN , std = IMAGENET_DEFAULT_STD ,
1641
- crop_pct = 1.0 ,
1642
- num_classes = 0 ,
1643
- license = 'dinov3' ,
1644
1647
),
1645
- 'vit_base_patch16_dinov3_224 .lvdm_1689m' : _cfg (
1648
+ 'vit_base_patch16_dinov3 .lvdm_1689m' : _dinov3_cfg (
1646
1649
#hf_hub_id='timm/',
1647
- mean = IMAGENET_DEFAULT_MEAN , std = IMAGENET_DEFAULT_STD ,
1648
- crop_pct = 1.0 ,
1649
- num_classes = 0 ,
1650
- license = 'dinov3' ,
1651
1650
),
1652
- 'vit_base_patch16_dinov3_qkvb_224 .lvdm_1689m' : _cfg (
1651
+ 'vit_base_patch16_dinov3_qkvb .lvdm_1689m' : _dinov3_cfg (
1653
1652
#hf_hub_id='timm/',
1654
- mean = IMAGENET_DEFAULT_MEAN , std = IMAGENET_DEFAULT_STD ,
1655
- crop_pct = 1.0 ,
1656
- num_classes = 0 ,
1657
- license = 'dinov3' ,
1658
1653
),
1659
- 'vit_large_patch16_dinov3_224 .lvdm_1689m' : _cfg (
1654
+ 'vit_large_patch16_dinov3 .lvdm_1689m' : _dinov3_cfg (
1660
1655
# hf_hub_id='timm/',
1661
- mean = IMAGENET_DEFAULT_MEAN , std = IMAGENET_DEFAULT_STD ,
1662
- crop_pct = 1.0 ,
1663
- num_classes = 0 ,
1664
- license = 'dinov3' ,
1665
1656
),
1666
- 'vit_large_patch16_dinov3_qkvb_224 .lvdm_1689m' : _cfg (
1657
+ 'vit_large_patch16_dinov3_qkvb .lvdm_1689m' : _dinov3_cfg (
1667
1658
# hf_hub_id='timm/',
1668
- mean = IMAGENET_DEFAULT_MEAN , std = IMAGENET_DEFAULT_STD ,
1669
- crop_pct = 1.0 ,
1670
- num_classes = 0 ,
1671
- license = 'dinov3' ,
1672
1659
),
1673
- 'vit_large_patch16_dinov3_224 .sat_493m' : _cfg (
1660
+ 'vit_large_patch16_dinov3 .sat_493m' : _dinov3_cfg (
1674
1661
# hf_hub_id='timm/',
1675
1662
mean = (0.430 , 0.411 , 0.296 ), std = (0.213 , 0.156 , 0.143 ),
1676
- crop_pct = 1.0 ,
1677
- num_classes = 0 ,
1678
- license = 'dinov3' ,
1679
1663
),
1680
- 'vit_huge_plus_patch16_dinov3_224 .lvdm_1689m' : _cfg (
1664
+ 'vit_huge_plus_patch16_dinov3 .lvdm_1689m' : _dinov3_cfg (
1681
1665
# hf_hub_id='timm/',
1682
- mean = IMAGENET_DEFAULT_MEAN , std = IMAGENET_DEFAULT_STD ,
1683
- crop_pct = 1.0 ,
1684
- num_classes = 0 ,
1685
- license = 'dinov3' ,
1686
1666
),
1687
- 'vit_7b_patch16_dinov3_224 .lvdm_1689m' : _cfg (
1667
+ 'vit_7b_patch16_dinov3 .lvdm_1689m' : _dinov3_cfg (
1688
1668
# hf_hub_id='timm/',
1689
- mean = IMAGENET_DEFAULT_MEAN , std = IMAGENET_DEFAULT_STD ,
1690
- crop_pct = 1.0 ,
1691
- num_classes = 0 ,
1692
- license = 'dinov3' ,
1693
1669
),
1694
- 'vit_7b_patch16_dinov3_224 .sat_493m' : _cfg (
1670
+ 'vit_7b_patch16_dinov3 .sat_493m' : _dinov3_cfg (
1695
1671
# hf_hub_id='timm/',
1696
1672
mean = (0.430 , 0.411 , 0.296 ), std = (0.213 , 0.156 , 0.143 ),
1697
- crop_pct = 1.0 ,
1698
- num_classes = 0 ,
1699
- license = 'dinov3' ,
1700
1673
),
1701
1674
1702
1675
})
@@ -2640,9 +2613,10 @@ def vit_large_patch16_rope_mixed_ape_224(pretrained: bool = False, **kwargs) ->
2640
2613
2641
2614
2642
2615
@register_model
2643
- def vit_small_patch16_dinov3_224 (pretrained : bool = False , ** kwargs ) -> Eva :
2616
+ def vit_small_patch16_dinov3 (pretrained : bool = False , ** kwargs ) -> Eva :
2644
2617
model_args = dict (
2645
2618
patch_size = 16 ,
2619
+ dynamic_img_size = True ,
2646
2620
embed_dim = 384 ,
2647
2621
depth = 12 ,
2648
2622
num_heads = 6 ,
@@ -2658,14 +2632,15 @@ def vit_small_patch16_dinov3_224(pretrained: bool = False, **kwargs) -> Eva:
2658
2632
use_fc_norm = False ,
2659
2633
norm_layer = partial (LayerNorm , eps = 1e-5 ),
2660
2634
)
2661
- model = _create_eva ('vit_small_patch16_dinov3_224 ' , pretrained = pretrained , ** dict (model_args , ** kwargs ))
2635
+ model = _create_eva ('vit_small_patch16_dinov3 ' , pretrained = pretrained , ** dict (model_args , ** kwargs ))
2662
2636
return model
2663
2637
2664
2638
2665
2639
@register_model
2666
- def vit_small_patch16_dinov3_qkvb_224 (pretrained : bool = False , ** kwargs ) -> Eva :
2640
+ def vit_small_patch16_dinov3_qkvb (pretrained : bool = False , ** kwargs ) -> Eva :
2667
2641
model_args = dict (
2668
2642
patch_size = 16 ,
2643
+ dynamic_img_size = True ,
2669
2644
embed_dim = 384 ,
2670
2645
depth = 12 ,
2671
2646
num_heads = 6 ,
@@ -2681,14 +2656,15 @@ def vit_small_patch16_dinov3_qkvb_224(pretrained: bool = False, **kwargs) -> Eva
2681
2656
use_fc_norm = False ,
2682
2657
norm_layer = partial (LayerNorm , eps = 1e-5 ),
2683
2658
)
2684
- model = _create_eva ('vit_small_patch16_dinov3_qkvb_224 ' , pretrained = pretrained , ** dict (model_args , ** kwargs ))
2659
+ model = _create_eva ('vit_small_patch16_dinov3_qkvb ' , pretrained = pretrained , ** dict (model_args , ** kwargs ))
2685
2660
return model
2686
2661
2687
2662
2688
2663
@register_model
2689
- def vit_small_plus_patch16_dinov3_224 (pretrained : bool = False , ** kwargs ) -> Eva :
2664
+ def vit_small_plus_patch16_dinov3 (pretrained : bool = False , ** kwargs ) -> Eva :
2690
2665
model_args = dict (
2691
2666
patch_size = 16 ,
2667
+ dynamic_img_size = True ,
2692
2668
embed_dim = 384 ,
2693
2669
depth = 12 ,
2694
2670
num_heads = 6 ,
@@ -2706,14 +2682,15 @@ def vit_small_plus_patch16_dinov3_224(pretrained: bool = False, **kwargs) -> Eva
2706
2682
use_fc_norm = False ,
2707
2683
norm_layer = partial (LayerNorm , eps = 1e-5 ),
2708
2684
)
2709
- model = _create_eva ('vit_small_plus_patch16_dinov3_224 ' , pretrained = pretrained , ** dict (model_args , ** kwargs ))
2685
+ model = _create_eva ('vit_small_plus_patch16_dinov3 ' , pretrained = pretrained , ** dict (model_args , ** kwargs ))
2710
2686
return model
2711
2687
2712
2688
2713
2689
@register_model
2714
- def vit_small_plus_patch16_dinov3_qkvb_224 (pretrained : bool = False , ** kwargs ) -> Eva :
2690
+ def vit_small_plus_patch16_dinov3_qkvb (pretrained : bool = False , ** kwargs ) -> Eva :
2715
2691
model_args = dict (
2716
2692
patch_size = 16 ,
2693
+ dynamic_img_size = True ,
2717
2694
embed_dim = 384 ,
2718
2695
depth = 12 ,
2719
2696
num_heads = 6 ,
@@ -2731,14 +2708,15 @@ def vit_small_plus_patch16_dinov3_qkvb_224(pretrained: bool = False, **kwargs) -
2731
2708
use_fc_norm = False ,
2732
2709
norm_layer = partial (LayerNorm , eps = 1e-5 ),
2733
2710
)
2734
- model = _create_eva ('vit_small_plus_patch16_dinov3_qkvb_224 ' , pretrained = pretrained , ** dict (model_args , ** kwargs ))
2711
+ model = _create_eva ('vit_small_plus_patch16_dinov3_qkvb ' , pretrained = pretrained , ** dict (model_args , ** kwargs ))
2735
2712
return model
2736
2713
2737
2714
2738
2715
@register_model
2739
- def vit_base_patch16_dinov3_224 (pretrained : bool = False , ** kwargs ) -> Eva :
2716
+ def vit_base_patch16_dinov3 (pretrained : bool = False , ** kwargs ) -> Eva :
2740
2717
model_args = dict (
2741
2718
patch_size = 16 ,
2719
+ dynamic_img_size = True ,
2742
2720
embed_dim = 768 ,
2743
2721
depth = 12 ,
2744
2722
num_heads = 12 ,
@@ -2754,15 +2732,16 @@ def vit_base_patch16_dinov3_224(pretrained: bool = False, **kwargs) -> Eva:
2754
2732
use_fc_norm = False ,
2755
2733
norm_layer = partial (LayerNorm , eps = 1e-5 ),
2756
2734
)
2757
- model = _create_eva ('vit_base_patch16_dinov3_224 ' , pretrained = pretrained , ** dict (model_args , ** kwargs ))
2735
+ model = _create_eva ('vit_base_patch16_dinov3 ' , pretrained = pretrained , ** dict (model_args , ** kwargs ))
2758
2736
return model
2759
2737
2760
2738
2761
2739
@register_model
2762
- def vit_base_patch16_dinov3_qkvb_224 (pretrained : bool = False , ** kwargs ) -> Eva :
2740
+ def vit_base_patch16_dinov3_qkvb (pretrained : bool = False , ** kwargs ) -> Eva :
2763
2741
# DINOv3 Base variant w/ qkv_bias enabled (zero'd in weights)
2764
2742
model_args = dict (
2765
2743
patch_size = 16 ,
2744
+ dynamic_img_size = True ,
2766
2745
embed_dim = 768 ,
2767
2746
depth = 12 ,
2768
2747
num_heads = 12 ,
@@ -2778,14 +2757,15 @@ def vit_base_patch16_dinov3_qkvb_224(pretrained: bool = False, **kwargs) -> Eva:
2778
2757
use_fc_norm = False ,
2779
2758
norm_layer = partial (LayerNorm , eps = 1e-5 ),
2780
2759
)
2781
- model = _create_eva ('vit_base_patch16_dinov3_qkvb_224 ' , pretrained = pretrained , ** dict (model_args , ** kwargs ))
2760
+ model = _create_eva ('vit_base_patch16_dinov3_qkvb ' , pretrained = pretrained , ** dict (model_args , ** kwargs ))
2782
2761
return model
2783
2762
2784
2763
2785
2764
@register_model
2786
- def vit_large_patch16_dinov3_224 (pretrained : bool = False , ** kwargs ) -> Eva :
2765
+ def vit_large_patch16_dinov3 (pretrained : bool = False , ** kwargs ) -> Eva :
2787
2766
model_args = dict (
2788
2767
patch_size = 16 ,
2768
+ dynamic_img_size = True ,
2789
2769
embed_dim = 1024 ,
2790
2770
depth = 24 ,
2791
2771
num_heads = 16 ,
@@ -2801,14 +2781,15 @@ def vit_large_patch16_dinov3_224(pretrained: bool = False, **kwargs) -> Eva:
2801
2781
use_fc_norm = False ,
2802
2782
norm_layer = partial (LayerNorm , eps = 1e-5 ),
2803
2783
)
2804
- model = _create_eva ('vit_large_patch16_dinov3_224 ' , pretrained = pretrained , ** dict (model_args , ** kwargs ))
2784
+ model = _create_eva ('vit_large_patch16_dinov3 ' , pretrained = pretrained , ** dict (model_args , ** kwargs ))
2805
2785
return model
2806
2786
2807
2787
2808
2788
@register_model
2809
- def vit_large_patch16_dinov3_qkvb_224 (pretrained : bool = False , ** kwargs ) -> Eva :
2789
+ def vit_large_patch16_dinov3_qkvb (pretrained : bool = False , ** kwargs ) -> Eva :
2810
2790
model_args = dict (
2811
2791
patch_size = 16 ,
2792
+ dynamic_img_size = True ,
2812
2793
embed_dim = 768 ,
2813
2794
depth = 24 ,
2814
2795
num_heads = 16 ,
@@ -2824,14 +2805,15 @@ def vit_large_patch16_dinov3_qkvb_224(pretrained: bool = False, **kwargs) -> Eva
2824
2805
use_fc_norm = False ,
2825
2806
norm_layer = partial (LayerNorm , eps = 1e-5 ),
2826
2807
)
2827
- model = _create_eva ('vit_large_patch16_dinov3_qkvb_224 ' , pretrained = pretrained , ** dict (model_args , ** kwargs ))
2808
+ model = _create_eva ('vit_large_patch16_dinov3_qkvb ' , pretrained = pretrained , ** dict (model_args , ** kwargs ))
2828
2809
return model
2829
2810
2830
2811
2831
2812
@register_model
2832
- def vit_huge_plus_patch16_dinov3_224 (pretrained : bool = False , ** kwargs ) -> Eva :
2813
+ def vit_huge_plus_patch16_dinov3 (pretrained : bool = False , ** kwargs ) -> Eva :
2833
2814
model_args = dict (
2834
2815
patch_size = 16 ,
2816
+ dynamic_img_size = True ,
2835
2817
embed_dim = 1280 ,
2836
2818
depth = 32 ,
2837
2819
num_heads = 20 ,
@@ -2850,14 +2832,15 @@ def vit_huge_plus_patch16_dinov3_224(pretrained: bool = False, **kwargs) -> Eva:
2850
2832
norm_layer = partial (LayerNorm , eps = 1e-5 ),
2851
2833
)
2852
2834
2853
- model = _create_eva ('vit_huge_plus_patch16_dinov3_224 ' , pretrained = pretrained , ** dict (model_args , ** kwargs ))
2835
+ model = _create_eva ('vit_huge_plus_patch16_dinov3 ' , pretrained = pretrained , ** dict (model_args , ** kwargs ))
2854
2836
return model
2855
2837
2856
2838
2857
2839
@register_model
2858
- def vit_7b_patch16_dinov3_224 (pretrained : bool = False , ** kwargs ) -> Eva :
2840
+ def vit_7b_patch16_dinov3 (pretrained : bool = False , ** kwargs ) -> Eva :
2859
2841
model_args = dict (
2860
2842
patch_size = 16 ,
2843
+ dynamic_img_size = True ,
2861
2844
embed_dim = 4096 ,
2862
2845
depth = 40 ,
2863
2846
num_heads = 32 ,
@@ -2877,5 +2860,5 @@ def vit_7b_patch16_dinov3_224(pretrained: bool = False, **kwargs) -> Eva:
2877
2860
norm_layer = partial (LayerNorm , eps = 1e-5 ),
2878
2861
)
2879
2862
2880
- model = _create_eva ('vit_7b_patch16_dinov3_224 ' , pretrained = pretrained , ** dict (model_args , ** kwargs ))
2863
+ model = _create_eva ('vit_7b_patch16_dinov3 ' , pretrained = pretrained , ** dict (model_args , ** kwargs ))
2881
2864
return model
0 commit comments