diff --git a/gateway/sds_gateway/api_methods/models.py b/gateway/sds_gateway/api_methods/models.py index 9de993fb..ee6deefc 100644 --- a/gateway/sds_gateway/api_methods/models.py +++ b/gateway/sds_gateway/api_methods/models.py @@ -11,6 +11,7 @@ from blake3 import blake3 as Blake3 # noqa: N812 from django.conf import settings +from django.core.cache import cache from django.db import models from django.db.models import ProtectedError from django.db.models import QuerySet @@ -1386,7 +1387,14 @@ def handle_dataset_soft_delete(sender, instance: Dataset, **kwargs) -> None: """ Handle soft deletion of datasets by also soft deleting related share permissions. + Also invalidates keywords autocomplete cache when datasets are created/updated. """ + # Invalidate global keywords cache (since keywords are now from all users) + # This ensures autocomplete shows latest keywords when datasets change + # Cache key matches KeywordsAutocompleteView.CACHE_KEY in users/views.py + cache_key = "keywords_autocomplete_all_users" + cache.delete(cache_key) + if instance.is_deleted: # This is a soft delete, so we need to soft delete related share permissions # Soft delete all UserSharePermission records for this dataset diff --git a/gateway/sds_gateway/api_methods/serializers/dataset_serializers.py b/gateway/sds_gateway/api_methods/serializers/dataset_serializers.py index f79cd81f..416dcc12 100644 --- a/gateway/sds_gateway/api_methods/serializers/dataset_serializers.py +++ b/gateway/sds_gateway/api_methods/serializers/dataset_serializers.py @@ -1,5 +1,7 @@ """Dataset serializers for the API methods.""" +import json + from rest_framework import serializers from sds_gateway.api_methods.models import Dataset @@ -13,10 +15,40 @@ class DatasetGetSerializer(serializers.ModelSerializer[Dataset]): is_shared_with_me = serializers.SerializerMethodField() is_owner = serializers.SerializerMethodField() status_display = serializers.CharField(source="get_status_display", read_only=True) + keywords = serializers.SerializerMethodField() def get_authors(self, obj): return obj.authors[0] if obj.authors else None + def get_keywords(self, obj): + """ + Return keywords as a clean list of strings, ready for frontend display. + Handles all formats: JSON string, list, or empty. + """ + if not obj.keywords: + return [] + + # If it's already a list (from from_db deserialization), return it + if isinstance(obj.keywords, list): + return [str(k).strip() for k in obj.keywords if k and str(k).strip()] + + # If it's a string, try to parse it + if isinstance(obj.keywords, str): + trimmed = obj.keywords.strip() + if not trimmed: + return [] + + # Try to parse as JSON + try: + parsed = json.loads(trimmed) + if isinstance(parsed, list): + return [str(k).strip() for k in parsed if k and str(k).strip()] + except (json.JSONDecodeError, TypeError): + # If JSON parsing fails, treat as comma-separated string + return [k.strip() for k in trimmed.split(",") if k.strip()] + + return [] + def get_is_shared_with_me(self, obj): """Check if the dataset is shared with the current user.""" request = self.context.get("request") diff --git a/gateway/sds_gateway/static/css/components.css b/gateway/sds_gateway/static/css/components.css index 8aba9ecc..c38f0a7e 100644 --- a/gateway/sds_gateway/static/css/components.css +++ b/gateway/sds_gateway/static/css/components.css @@ -988,6 +988,62 @@ body { color: #000; } +/* Keywords Autocomplete Dropdown */ +.keywords-autocomplete-dropdown { + position: absolute; + top: 100%; + left: 0; + right: 0; + z-index: 9999; + background: white; + border: 1px solid #dee2e6; + border-radius: 0.375rem; + box-shadow: 0 0.5rem 1rem rgba(0, 0, 0, 0.15); + max-height: 200px; + overflow-y: auto; + margin-top: 0.25rem; +} + +.keywords-autocomplete-dropdown .list-group-item { + border: none; + border-bottom: 1px solid #dee2e6; + cursor: pointer; + padding: 0.75rem 1.25rem; +} + +.keywords-autocomplete-dropdown .list-group-item:last-child { + border-bottom: none; +} + +.keywords-autocomplete-dropdown .list-group-item:hover { + background-color: #f8f9fa; +} + +.keywords-autocomplete-dropdown .list-group-item:active { + background-color: #e9ecef; +} + +.keywords-autocomplete-dropdown .list-group-item.selected { + background-color: #0d6efd; + color: white; +} + +.keywords-autocomplete-dropdown .list-group-item.no-results { + padding: 0.75rem 1rem; + color: #6c757d; + font-style: italic; + text-align: center; + cursor: default; +} + +.keywords-autocomplete-dropdown .list-group-item.no-results:hover { + background-color: transparent; +} + +.keywords-suggestion { + font-size: 0.875rem; +} + /* Share Group Manager Specific Styles */ .selected-users-chips { gap: 0.5rem; diff --git a/gateway/sds_gateway/static/js/actions/DetailsActionManager.js b/gateway/sds_gateway/static/js/actions/DetailsActionManager.js index 9bfda448..6af1ff84 100644 --- a/gateway/sds_gateway/static/js/actions/DetailsActionManager.js +++ b/gateway/sds_gateway/static/js/actions/DetailsActionManager.js @@ -181,6 +181,15 @@ class DetailsActionManager { ".dataset-details-description", datasetData.description || "No description provided", ); + + // Update keywords display + // Keywords are now returned as a clean list from the backend + const keywordsValue = + Array.isArray(datasetData.keywords) && datasetData.keywords.length > 0 + ? datasetData.keywords.join(", ") + : "No keywords provided"; + this.updateElementText(modal, ".dataset-details-keywords", keywordsValue); + this.updateElementText( modal, ".dataset-details-status", @@ -930,6 +939,14 @@ class DetailsActionManager { } } + /** + * Format keywords for display + * @param {Array|string} keywords - Keywords as array or string + * @returns {string} Formatted keywords as comma-separated string + */ + // formatKeywords method removed - keywords are now formatted on the backend + // Backend returns keywords as a clean array of strings, so we just join them + /** * Initialize details buttons for dynamically loaded content * @param {Element} container - Container element to search within diff --git a/gateway/sds_gateway/static/js/captureGroupingComponents.js b/gateway/sds_gateway/static/js/captureGroupingComponents.js index 4aef04ab..174fcd55 100644 --- a/gateway/sds_gateway/static/js/captureGroupingComponents.js +++ b/gateway/sds_gateway/static/js/captureGroupingComponents.js @@ -35,9 +35,11 @@ class FormHandler { this.nameField = document.getElementById("id_name"); this.authorField = document.getElementById("id_author"); this.statusField = document.getElementById("id_status"); + this.keywordsField = document.getElementById("id_keywords"); this.initializeEventListeners(); this.initializeErrorContainer(); + this.initializeKeywordsAutocomplete(); this.validateCurrentStep(); // Initial validation this.updateNavigation(); // Initial navigation button display @@ -126,6 +128,224 @@ class FormHandler { } } + /** + * Initialize keywords autocomplete functionality + */ + initializeKeywordsAutocomplete() { + if (!this.keywordsField) return; + + // Create dropdown container + const dropdown = document.createElement("div"); + dropdown.className = "keywords-autocomplete-dropdown d-none"; + dropdown.id = "keywords-autocomplete-dropdown"; + dropdown.innerHTML = '
'; + + // Insert dropdown after the keywords field + const keywordsContainer = this.keywordsField.parentElement; + if (keywordsContainer) { + keywordsContainer.style.position = "relative"; + keywordsContainer.appendChild(dropdown); + } + + let searchTimeout = null; + let allKeywords = []; + + // Fetch all keywords on initialization + this.fetchKeywords().then((keywords) => { + allKeywords = keywords; + }); + + // Handle input events + this.keywordsField.addEventListener("input", (e) => { + clearTimeout(searchTimeout); + const query = e.target.value.trim(); + + // Get the last keyword being typed (after the last comma) + const lastCommaIndex = query.lastIndexOf(","); + const currentKeyword = + lastCommaIndex >= 0 + ? query.substring(lastCommaIndex + 1).trim() + : query.trim(); + + if (currentKeyword.length < 1) { + this.hideKeywordsDropdown(dropdown); + return; + } + + searchTimeout = setTimeout(() => { + this.searchKeywords(currentKeyword, allKeywords, dropdown); + }, 300); + }); + + // Handle keyboard navigation + this.keywordsField.addEventListener("keydown", (e) => { + const visibleItems = dropdown.querySelectorAll( + ".list-group-item:not(.no-results)", + ); + const currentIndex = Array.from(visibleItems).findIndex((item) => + item.classList.contains("selected"), + ); + + switch (e.key) { + case "ArrowDown": + e.preventDefault(); + this.navigateKeywordsDropdown(visibleItems, currentIndex, 1); + break; + case "ArrowUp": + e.preventDefault(); + this.navigateKeywordsDropdown(visibleItems, currentIndex, -1); + break; + case "Enter": { + e.preventDefault(); + const selectedItem = dropdown.querySelector( + ".list-group-item.selected", + ); + if (selectedItem) { + this.selectKeyword(selectedItem); + } + break; + } + case "Escape": + this.hideKeywordsDropdown(dropdown); + this.keywordsField.blur(); + break; + } + }); + + // Handle clicks outside to close dropdown + document.addEventListener("click", (e) => { + if ( + !this.keywordsField.contains(e.target) && + !dropdown.contains(e.target) + ) { + this.hideKeywordsDropdown(dropdown); + } + }); + + // Handle dropdown item clicks + dropdown.addEventListener("click", (e) => { + const item = e.target.closest(".list-group-item"); + if (item && !item.classList.contains("no-results")) { + this.selectKeyword(item); + } + }); + } + + /** + * Fetch all keywords from the API + */ + async fetchKeywords() { + try { + const response = await fetch("/users/keywords-autocomplete/"); + if (!response.ok) { + console.error("Failed to fetch keywords"); + return []; + } + const data = await response.json(); + return data.keywords || []; + } catch (error) { + console.error("Error fetching keywords:", error); + return []; + } + } + + /** + * Search keywords based on query + */ + searchKeywords(query, allKeywords, dropdown) { + const queryLower = query.toLowerCase(); + const filtered = allKeywords + .filter((keyword) => keyword.toLowerCase().includes(queryLower)) + .slice(0, 10); // Limit to 10 suggestions + + this.renderKeywordsDropdown(filtered, dropdown); + + if (filtered.length > 0) { + this.showKeywordsDropdown(dropdown); + } else { + this.hideKeywordsDropdown(dropdown); + } + } + + /** + * Render keywords dropdown + */ + renderKeywordsDropdown(keywords, dropdown) { + const listGroup = dropdown.querySelector(".list-group"); + if (!listGroup) return; + + if (keywords.length === 0) { + listGroup.innerHTML = + '
No suggestions found
'; + return; + } + + listGroup.innerHTML = keywords + .map( + (keyword) => + `
${keyword}
`, + ) + .join(""); + } + + /** + * Select a keyword from the dropdown + */ + selectKeyword(item) { + const keyword = item.getAttribute("data-keyword"); + if (!keyword) return; + + const currentValue = this.keywordsField.value.trim(); + const lastCommaIndex = currentValue.lastIndexOf(","); + + if (lastCommaIndex >= 0) { + // Replace the last keyword being typed + const prefix = currentValue.substring(0, lastCommaIndex + 1); + this.keywordsField.value = `${prefix} ${keyword}, `; + } else { + // Replace the entire value + this.keywordsField.value = `${keyword}, `; + } + + this.hideKeywordsDropdown( + document.getElementById("keywords-autocomplete-dropdown"), + ); + this.keywordsField.focus(); + } + + /** + * Navigate dropdown with keyboard + */ + navigateKeywordsDropdown(items, currentIndex, direction) { + for (const item of items) { + item.classList.remove("selected"); + } + + const nextIndex = currentIndex + direction; + if (nextIndex >= 0 && nextIndex < items.length) { + items[nextIndex].classList.add("selected"); + items[nextIndex].scrollIntoView({ block: "nearest" }); + } + } + + /** + * Show dropdown + */ + showKeywordsDropdown(dropdown) { + if (dropdown) { + dropdown.classList.remove("d-none"); + } + } + + /** + * Hide dropdown + */ + hideKeywordsDropdown(dropdown) { + if (dropdown) { + dropdown.classList.add("d-none"); + } + } + show(container, showClass = "display-block") { container.classList.remove("display-none"); container.classList.add(showClass); @@ -178,6 +398,10 @@ class FormHandler { document.querySelector("#step4 .dataset-description").textContent = document.getElementById("id_description").value || "No description provided."; + const keywordsValue = + document.getElementById("id_keywords")?.value.trim() || ""; + document.querySelector("#step4 .dataset-keywords").textContent = + keywordsValue || "No keywords provided."; // Update captures table const capturesTableBody = document.querySelector( diff --git a/gateway/sds_gateway/static/js/dataset/DatasetCreationHandler.js b/gateway/sds_gateway/static/js/dataset/DatasetCreationHandler.js index b458fc8a..fdf07579 100644 --- a/gateway/sds_gateway/static/js/dataset/DatasetCreationHandler.js +++ b/gateway/sds_gateway/static/js/dataset/DatasetCreationHandler.js @@ -24,6 +24,7 @@ class DatasetCreationHandler { this.authorsField = document.getElementById("id_authors"); this.statusField = document.getElementById("id_status"); this.descriptionField = document.getElementById("id_description"); + this.keywordsField = document.getElementById("id_keywords"); // Hidden fields this.selectedCapturesField = document.getElementById("selected_captures"); @@ -45,6 +46,7 @@ class DatasetCreationHandler { this.initializeErrorContainer(); this.initializeAuthorsManagement(); this.initializePlaceholders(); + this.initializeKeywordsAutocomplete(); this.validateCurrentStep(); this.updateNavigation(); } @@ -206,6 +208,225 @@ class DatasetCreationHandler { } } + /** + * Initialize keywords autocomplete functionality + */ + initializeKeywordsAutocomplete() { + if (!this.keywordsField) return; + + // Create dropdown container + const dropdown = document.createElement("div"); + dropdown.className = "keywords-autocomplete-dropdown d-none"; + dropdown.id = "keywords-autocomplete-dropdown"; + dropdown.innerHTML = '
'; + + // Insert dropdown after the keywords field + const keywordsContainer = this.keywordsField.parentElement; + if (keywordsContainer) { + keywordsContainer.style.position = "relative"; + keywordsContainer.appendChild(dropdown); + } + + let searchTimeout = null; + let allKeywords = []; + const filteredKeywords = []; + + // Fetch all keywords on initialization + this.fetchKeywords().then((keywords) => { + allKeywords = keywords; + }); + + // Handle input events + this.keywordsField.addEventListener("input", (e) => { + clearTimeout(searchTimeout); + const query = e.target.value.trim(); + + // Get the last keyword being typed (after the last comma) + const lastCommaIndex = query.lastIndexOf(","); + const currentKeyword = + lastCommaIndex >= 0 + ? query.substring(lastCommaIndex + 1).trim() + : query.trim(); + + if (currentKeyword.length < 1) { + this.hideDropdown(dropdown); + return; + } + + searchTimeout = setTimeout(() => { + this.searchKeywords(currentKeyword, allKeywords, dropdown); + }, 300); + }); + + // Handle keyboard navigation + this.keywordsField.addEventListener("keydown", (e) => { + const visibleItems = dropdown.querySelectorAll( + ".list-group-item:not(.no-results)", + ); + const currentIndex = Array.from(visibleItems).findIndex((item) => + item.classList.contains("selected"), + ); + + switch (e.key) { + case "ArrowDown": + e.preventDefault(); + this.navigateDropdown(visibleItems, currentIndex, 1); + break; + case "ArrowUp": + e.preventDefault(); + this.navigateDropdown(visibleItems, currentIndex, -1); + break; + case "Enter": { + e.preventDefault(); + const selectedItem = dropdown.querySelector( + ".list-group-item.selected", + ); + if (selectedItem) { + this.selectKeyword(selectedItem); + } + break; + } + case "Escape": + this.hideDropdown(dropdown); + this.keywordsField.blur(); + break; + } + }); + + // Handle clicks outside to close dropdown + document.addEventListener("click", (e) => { + if ( + !this.keywordsField.contains(e.target) && + !dropdown.contains(e.target) + ) { + this.hideDropdown(dropdown); + } + }); + + // Handle dropdown item clicks + dropdown.addEventListener("click", (e) => { + const item = e.target.closest(".list-group-item"); + if (item && !item.classList.contains("no-results")) { + this.selectKeyword(item); + } + }); + } + + /** + * Fetch all keywords from the API + */ + async fetchKeywords() { + try { + const response = await fetch("/users/keywords-autocomplete/"); + if (!response.ok) { + console.error("Failed to fetch keywords"); + return []; + } + const data = await response.json(); + return data.keywords || []; + } catch (error) { + console.error("Error fetching keywords:", error); + return []; + } + } + + /** + * Search keywords based on query + */ + searchKeywords(query, allKeywords, dropdown) { + const queryLower = query.toLowerCase(); + const filtered = allKeywords + .filter((keyword) => keyword.toLowerCase().includes(queryLower)) + .slice(0, 10); // Limit to 10 suggestions + + this.renderKeywordsDropdown(filtered, dropdown); + + if (filtered.length > 0) { + this.showDropdown(dropdown); + } else { + this.hideDropdown(dropdown); + } + } + + /** + * Render keywords dropdown + */ + renderKeywordsDropdown(keywords, dropdown) { + const listGroup = dropdown.querySelector(".list-group"); + if (!listGroup) return; + + if (keywords.length === 0) { + listGroup.innerHTML = + '
No suggestions found
'; + return; + } + + listGroup.innerHTML = keywords + .map( + (keyword) => + `
${keyword}
`, + ) + .join(""); + } + + /** + * Select a keyword from the dropdown + */ + selectKeyword(item) { + const keyword = item.getAttribute("data-keyword"); + if (!keyword) return; + + const currentValue = this.keywordsField.value.trim(); + const lastCommaIndex = currentValue.lastIndexOf(","); + + if (lastCommaIndex >= 0) { + // Replace the last keyword being typed + const prefix = currentValue.substring(0, lastCommaIndex + 1); + this.keywordsField.value = `${prefix} ${keyword}, `; + } else { + // Replace the entire value + this.keywordsField.value = `${keyword}, `; + } + + this.hideDropdown( + document.getElementById("keywords-autocomplete-dropdown"), + ); + this.keywordsField.focus(); + } + + /** + * Navigate dropdown with keyboard + */ + navigateDropdown(items, currentIndex, direction) { + for (const item of items) { + item.classList.remove("selected"); + } + + const nextIndex = currentIndex + direction; + if (nextIndex >= 0 && nextIndex < items.length) { + items[nextIndex].classList.add("selected"); + items[nextIndex].scrollIntoView({ block: "nearest" }); + } + } + + /** + * Show dropdown + */ + showDropdown(dropdown) { + if (dropdown) { + dropdown.classList.remove("d-none"); + } + } + + /** + * Hide dropdown + */ + hideDropdown(dropdown) { + if (dropdown) { + dropdown.classList.add("d-none"); + } + } + /** * Initialize file browser modal handlers */ @@ -517,6 +738,15 @@ class DatasetCreationHandler { : "No description provided."; } + // Update keywords display + const keywordsDisplay = document.querySelector("#step4 .dataset-keywords"); + if (keywordsDisplay) { + const keywordsValue = this.keywordsField + ? this.keywordsField.value.trim() + : ""; + keywordsDisplay.textContent = keywordsValue || "No keywords provided."; + } + // Update selected items table this.updateSelectedItemsTable(); } diff --git a/gateway/sds_gateway/static/js/datasetDetailsModal.js b/gateway/sds_gateway/static/js/datasetDetailsModal.js index d7df56ec..ce3e051c 100644 --- a/gateway/sds_gateway/static/js/datasetDetailsModal.js +++ b/gateway/sds_gateway/static/js/datasetDetailsModal.js @@ -123,6 +123,17 @@ class DatasetDetailsModal { document.querySelector(".dataset-details-description").textContent = dataset.description || "No description available"; + // Update keywords display + // Keywords are now returned as a clean list from the backend + const keywordsElement = document.querySelector(".dataset-details-keywords"); + if (keywordsElement) { + const keywordsValue = + Array.isArray(dataset.keywords) && dataset.keywords.length > 0 + ? dataset.keywords.join(", ") + : "No keywords available"; + keywordsElement.textContent = keywordsValue; + } + // Format status with badge using database values const statusElement = document.querySelector(".dataset-details-status"); if (dataset.status === "draft") { @@ -146,6 +157,9 @@ class DatasetDetailsModal { document.querySelector(".dataset-details-updated").innerHTML = updatedDate; } + // formatKeywords method removed - keywords are now formatted on the backend + // Backend returns keywords as a clean array of strings, so we just join them + /** * Copy dataset UUID to clipboard */ diff --git a/gateway/sds_gateway/templates/users/group_captures.html b/gateway/sds_gateway/templates/users/group_captures.html index b261ce95..857c270d 100644 --- a/gateway/sds_gateway/templates/users/group_captures.html +++ b/gateway/sds_gateway/templates/users/group_captures.html @@ -107,6 +107,14 @@
{{ dataset_form.description }} +
+ + {{ dataset_form.keywords }} + {% if dataset_form.keywords.help_text %}
{{ dataset_form.keywords.help_text }}
{% endif %} +
diff --git a/gateway/sds_gateway/templates/users/partials/dataset_details_modal.html b/gateway/sds_gateway/templates/users/partials/dataset_details_modal.html index 045abee8..15522129 100644 --- a/gateway/sds_gateway/templates/users/partials/dataset_details_modal.html +++ b/gateway/sds_gateway/templates/users/partials/dataset_details_modal.html @@ -66,6 +66,10 @@

Loading...

+
+ +

Loading...

+
diff --git a/gateway/sds_gateway/templates/users/partials/review_create_dataset.html b/gateway/sds_gateway/templates/users/partials/review_create_dataset.html index c93b94c6..2f811292 100644 --- a/gateway/sds_gateway/templates/users/partials/review_create_dataset.html +++ b/gateway/sds_gateway/templates/users/partials/review_create_dataset.html @@ -19,10 +19,14 @@
Dataset Information

-
+

+
+ +

+
diff --git a/gateway/sds_gateway/users/forms.py b/gateway/sds_gateway/users/forms.py index fafd32bf..2d812d32 100644 --- a/gateway/sds_gateway/users/forms.py +++ b/gateway/sds_gateway/users/forms.py @@ -100,6 +100,20 @@ class DatasetInfoForm(forms.Form): widget=forms.Select(attrs={"class": "form-control"}), help_text="Draft: Work in progress, Final: Complete and ready for use", ) + keywords = forms.CharField( + label="Keywords", + required=False, + widget=forms.TextInput( + attrs={ + "class": "form-control", + "placeholder": ( + "Enter keywords separated by commas " + "(e.g., radio, spectrum, analysis)" + ), + } + ), + help_text="Enter keywords separated by commas", + ) def __init__(self, *args, **kwargs): user = kwargs.pop("user", None) @@ -126,6 +140,17 @@ def clean_description(self): """Clean and validate the description.""" return self.cleaned_data.get("description", "").strip() + def clean_keywords(self): + """Parse keywords from comma-separated string to list.""" + keywords_str = self.cleaned_data.get("keywords", "").strip() + if not keywords_str: + return [] + + # Split by comma, strip whitespace, and filter out empty strings + return [ + keyword.strip() for keyword in keywords_str.split(",") if keyword.strip() + ] + class CaptureSearchForm(forms.Form): directory = forms.CharField( diff --git a/gateway/sds_gateway/users/tests/test_keywords_autocomplete.py b/gateway/sds_gateway/users/tests/test_keywords_autocomplete.py new file mode 100644 index 00000000..70b0ca43 --- /dev/null +++ b/gateway/sds_gateway/users/tests/test_keywords_autocomplete.py @@ -0,0 +1,509 @@ +"""Tests for the KeywordsAutocompleteView endpoint.""" + +import json + +import pytest +from django.core.cache import cache +from django.test import Client +from django.urls import reverse +from rest_framework import status + +from sds_gateway.api_methods.models import Dataset +from sds_gateway.users.models import User + +# Test constants +HTTP_OK = status.HTTP_200_OK +HTTP_INTERNAL_SERVER_ERROR = status.HTTP_500_INTERNAL_SERVER_ERROR +DEFAULT_KEYWORD_LIMIT = 50 +TEST_LIMIT = 2 + + +@pytest.mark.django_db +class TestKeywordsAutocompleteView: + """Tests for the KeywordsAutocompleteView endpoint.""" + + @pytest.fixture + def client(self) -> Client: + return Client() + + @pytest.fixture + def user1(self) -> User: + """Create first test user.""" + return User.objects.create_user( + email="user1@example.com", + password="testpass123", # noqa: S106 + name="User 1", + is_approved=True, + ) + + @pytest.fixture + def user2(self) -> User: + """Create second test user.""" + return User.objects.create_user( + email="user2@example.com", + password="testpass123", # noqa: S106 + name="User 2", + is_approved=True, + ) + + @pytest.fixture + def user3(self) -> User: + """Create third test user.""" + return User.objects.create_user( + email="user3@example.com", + password="testpass123", # noqa: S106 + name="User 3", + is_approved=True, + ) + + @pytest.fixture + def dataset1(self, user1: User) -> Dataset: + """Create dataset with keywords for user1.""" + return Dataset.objects.create( + name="Dataset 1", + owner=user1, + description="Test dataset 1", + keywords=json.dumps(["keyword1", "keyword2", "shared-keyword"]), + status="draft", + ) + + @pytest.fixture + def dataset2(self, user2: User) -> Dataset: + """Create dataset with keywords for user2.""" + return Dataset.objects.create( + name="Dataset 2", + owner=user2, + description="Test dataset 2", + keywords=json.dumps(["keyword3", "keyword4", "shared-keyword"]), + status="draft", + ) + + @pytest.fixture + def dataset3(self, user3: User) -> Dataset: + """Create dataset with unique keywords for user3.""" + return Dataset.objects.create( + name="Dataset 3", + owner=user3, + description="Test dataset 3", + keywords=json.dumps(["unique-keyword-user3", "another-unique"]), + status="draft", + ) + + @pytest.fixture(autouse=True) + def clear_cache(self): + """Clear cache before and after each test.""" + cache.clear() + yield + cache.clear() + + def test_retrieves_keywords_from_all_users( + self, + client: Client, + user1: User, + dataset1: Dataset, + dataset2: Dataset, + dataset3: Dataset, + ) -> None: + """Test that keywords from all users are retrieved.""" + client.force_login(user1) + url = reverse("users:keywords_autocomplete") + + response = client.get(url) + + assert response.status_code == HTTP_OK + data = response.json() + assert "keywords" in data + keywords = data["keywords"] + + # Should contain keywords from all users + assert "keyword1" in keywords # From user1 + assert "keyword2" in keywords # From user1 + assert "keyword3" in keywords # From user2 + assert "keyword4" in keywords # From user2 + assert "unique-keyword-user3" in keywords # From user3 + assert "another-unique" in keywords # From user3 + assert "shared-keyword" in keywords # From both user1 and user2 + + def test_retrieves_keywords_for_different_user( + self, + client: Client, + user2: User, + dataset1: Dataset, + dataset2: Dataset, + dataset3: Dataset, + ) -> None: + """Test that user2 can see keywords from user1 and user3.""" + client.force_login(user2) + url = reverse("users:keywords_autocomplete") + + response = client.get(url) + + assert response.status_code == HTTP_OK + data = response.json() + keywords = data["keywords"] + + # Should contain keywords from all users + assert "keyword1" in keywords # From user1 + assert "keyword2" in keywords # From user1 + assert "keyword3" in keywords # From user2 + assert "keyword4" in keywords # From user2 + assert "unique-keyword-user3" in keywords # From user3 + assert "another-unique" in keywords # From user3 + + def test_keywords_deduplication( + self, + client: Client, + user1: User, + dataset1: Dataset, + dataset2: Dataset, + ) -> None: + """Test that duplicate keywords are deduplicated.""" + client.force_login(user1) + url = reverse("users:keywords_autocomplete") + + response = client.get(url) + + assert response.status_code == HTTP_OK + data = response.json() + keywords = data["keywords"] + + # "shared-keyword" appears in both datasets, should only appear once + assert keywords.count("shared-keyword") == 1 + # All keywords should be unique + assert len(keywords) == len(set(keywords)) + + def test_query_filtering( + self, + client: Client, + user1: User, + dataset1: Dataset, + dataset2: Dataset, + dataset3: Dataset, + ) -> None: + """Test that query parameter filters keywords.""" + client.force_login(user1) + url = reverse("users:keywords_autocomplete") + + # Filter by "unique" + response = client.get(url, {"query": "unique"}) + + assert response.status_code == HTTP_OK + data = response.json() + keywords = data["keywords"] + + # Should only contain keywords with "unique" in them + assert all("unique" in keyword.lower() for keyword in keywords) + assert "unique-keyword-user3" in keywords + assert "another-unique" in keywords + + # Should not contain other keywords + assert "keyword1" not in keywords + assert "keyword2" not in keywords + + def test_query_filtering_case_insensitive( + self, + client: Client, + user1: User, + dataset1: Dataset, + dataset2: Dataset, + ) -> None: + """Test that query filtering is case-insensitive.""" + client.force_login(user1) + url = reverse("users:keywords_autocomplete") + + # Filter with uppercase + response = client.get(url, {"query": "KEYWORD"}) + + assert response.status_code == HTTP_OK + data = response.json() + keywords = data["keywords"] + + # Should contain keywords with "keyword" (case-insensitive) + keyword_matches = [ + "keyword1" in keywords, + "keyword2" in keywords, + "keyword3" in keywords, + "keyword4" in keywords, + ] + assert any(keyword_matches) + + def test_limit_parameter( + self, + client: Client, + user1: User, + dataset1: Dataset, + dataset2: Dataset, + dataset3: Dataset, + ) -> None: + """Test that limit parameter limits the number of results.""" + client.force_login(user1) + url = reverse("users:keywords_autocomplete") + + # Request with limit of 2 + response = client.get(url, {"limit": "2"}) + + assert response.status_code == HTTP_OK + data = response.json() + keywords = data["keywords"] + + # Should have at most TEST_LIMIT keywords + assert len(keywords) <= TEST_LIMIT + + def test_default_limit( + self, + client: Client, + user1: User, + dataset1: Dataset, + dataset2: Dataset, + dataset3: Dataset, + ) -> None: + """Test that default limit is 50.""" + client.force_login(user1) + url = reverse("users:keywords_autocomplete") + + response = client.get(url) + + assert response.status_code == HTTP_OK + data = response.json() + keywords = data["keywords"] + + # Should have at most DEFAULT_KEYWORD_LIMIT keywords (default limit) + assert len(keywords) <= DEFAULT_KEYWORD_LIMIT + + def test_cache_hit( + self, + client: Client, + user1: User, + dataset1: Dataset, + ) -> None: + """Test that cache is used on subsequent requests.""" + client.force_login(user1) + url = reverse("users:keywords_autocomplete") + + # First request (cache miss) + response1 = client.get(url) + assert response1.status_code == HTTP_OK + data1 = response1.json() + keywords1 = set(data1["keywords"]) + + # Second request (cache hit) + response2 = client.get(url) + assert response2.status_code == HTTP_OK + data2 = response2.json() + keywords2 = set(data2["keywords"]) + + # Results should be the same + assert keywords1 == keywords2 + + def test_cache_invalidation_on_dataset_create( + self, + client: Client, + user1: User, + user2: User, + dataset1: Dataset, + ) -> None: + """Test that cache is invalidated when a new dataset is created.""" + client.force_login(user1) + url = reverse("users:keywords_autocomplete") + + # First request to populate cache + response1 = client.get(url) + assert response1.status_code == HTTP_OK + + # Create a new dataset with new keywords + Dataset.objects.create( + name="New Dataset", + owner=user2, + description="New test dataset", + keywords=json.dumps(["new-keyword-from-cache-test"]), + status="draft", + ) + + # Second request should include the new keyword (cache was invalidated) + response2 = client.get(url) + assert response2.status_code == HTTP_OK + data2 = response2.json() + keywords2 = set(data2["keywords"]) + + # Should contain the new keyword + assert "new-keyword-from-cache-test" in keywords2 + # Should also contain old keywords + assert "keyword1" in keywords2 + + def test_cache_invalidation_on_dataset_update( + self, + client: Client, + user1: User, + dataset1: Dataset, + ) -> None: + """Test that cache is invalidated when a dataset is updated.""" + client.force_login(user1) + url = reverse("users:keywords_autocomplete") + + # First request to populate cache + response1 = client.get(url) + assert response1.status_code == HTTP_OK + + # Update dataset with new keywords + dataset1.keywords = json.dumps(["updated-keyword-1", "updated-keyword-2"]) + dataset1.save() + + # Second request should include the updated keywords + response2 = client.get(url) + assert response2.status_code == HTTP_OK + data2 = response2.json() + keywords2 = set(data2["keywords"]) + + # Should contain the updated keywords + assert "updated-keyword-1" in keywords2 + assert "updated-keyword-2" in keywords2 + # Should not contain old keywords + assert "keyword1" not in keywords2 + assert "keyword2" not in keywords2 + + def test_cache_invalidation_on_dataset_soft_delete( + self, + client: Client, + user1: User, + dataset1: Dataset, + ) -> None: + """Test that cache is invalidated when a dataset is soft deleted.""" + client.force_login(user1) + url = reverse("users:keywords_autocomplete") + + # First request to populate cache + response1 = client.get(url) + assert response1.status_code == HTTP_OK + + # Soft delete the dataset + dataset1.is_deleted = True + dataset1.save() + + # Second request should not include keywords from deleted dataset + response2 = client.get(url) + assert response2.status_code == HTTP_OK + data2 = response2.json() + keywords2 = set(data2["keywords"]) + + # Should not contain keywords from deleted dataset + assert "keyword1" not in keywords2 + assert "keyword2" not in keywords2 + + def test_excludes_deleted_datasets( + self, + client: Client, + user1: User, + dataset1: Dataset, + dataset2: Dataset, + ) -> None: + """Test that keywords from deleted datasets are not included.""" + # Soft delete dataset1 + dataset1.is_deleted = True + dataset1.save() + + client.force_login(user1) + url = reverse("users:keywords_autocomplete") + + response = client.get(url) + + assert response.status_code == HTTP_OK + data = response.json() + keywords = data["keywords"] + + # Should not contain keywords from deleted dataset + assert "keyword1" not in keywords + assert "keyword2" not in keywords + + # Should still contain keywords from non-deleted dataset + assert "keyword3" in keywords + assert "keyword4" in keywords + + def test_excludes_empty_keywords( + self, + client: Client, + user1: User, + dataset1: Dataset, + ) -> None: + """Test that datasets with empty keywords are excluded.""" + # Create dataset with empty keywords (use empty JSON array string) + Dataset.objects.create( + name="Empty Keywords Dataset", + owner=user1, + keywords=json.dumps([]), # Empty list, will be stored as "[]" + status="draft", + ) + + # Create dataset with empty string keywords + Dataset.objects.create( + name="Empty String Keywords Dataset", + owner=user1, + keywords="", # Empty string + status="draft", + ) + + client.force_login(user1) + url = reverse("users:keywords_autocomplete") + + response = client.get(url) + + assert response.status_code == HTTP_OK + data = response.json() + keywords = data["keywords"] + + # Should only contain keywords from dataset1, not empty/null datasets + assert "keyword1" in keywords + assert "keyword2" in keywords + + def test_requires_authentication( + self, + client: Client, + dataset1: Dataset, + ) -> None: + """Test that the endpoint requires authentication.""" + url = reverse("users:keywords_autocomplete") + + # Request without authentication + response = client.get(url) + + # Should redirect to login or return 403/401 + assert response.status_code in [302, 401, 403] + + def test_handles_invalid_limit( + self, + client: Client, + user1: User, + dataset1: Dataset, + ) -> None: + """Test that invalid limit parameter is handled gracefully.""" + client.force_login(user1) + url = reverse("users:keywords_autocomplete") + + # Request with invalid limit (non-numeric) + response = client.get(url, {"limit": "invalid"}) + + # The current implementation uses int() which will raise ValueError + # which is caught and returns 500 + assert response.status_code == HTTP_INTERNAL_SERVER_ERROR + data = response.json() + assert "error" in data + + def test_keywords_sorted( + self, + client: Client, + user1: User, + dataset1: Dataset, + dataset2: Dataset, + dataset3: Dataset, + ) -> None: + """Test that keywords are returned in sorted order.""" + client.force_login(user1) + url = reverse("users:keywords_autocomplete") + + response = client.get(url) + + assert response.status_code == HTTP_OK + data = response.json() + keywords = data["keywords"] + + # Should be sorted + assert keywords == sorted(keywords) diff --git a/gateway/sds_gateway/users/urls.py b/gateway/sds_gateway/users/urls.py index b884c4b4..2e5106a6 100644 --- a/gateway/sds_gateway/users/urls.py +++ b/gateway/sds_gateway/users/urls.py @@ -9,6 +9,7 @@ from .views import ListCapturesView from .views import UploadCaptureView from .views import generate_api_key_form_view +from .views import keywords_autocomplete_view from .views import new_api_key_view from .views import render_html_fragment_view from .views import revoke_api_key_view @@ -44,6 +45,11 @@ path("files//h5info/", FileH5InfoView.as_view(), name="file_h5info"), path("dataset-list/", user_dataset_list_view, name="dataset_list"), path("dataset-details/", user_dataset_details_view, name="dataset_details"), + path( + "keywords-autocomplete/", + keywords_autocomplete_view, + name="keywords_autocomplete", + ), path("render-html/", render_html_fragment_view, name="render_html"), path("group-captures/", user_group_captures_view, name="group_captures"), path( diff --git a/gateway/sds_gateway/users/views.py b/gateway/sds_gateway/users/views.py index 057377f6..4b47af62 100644 --- a/gateway/sds_gateway/users/views.py +++ b/gateway/sds_gateway/users/views.py @@ -10,6 +10,7 @@ from django.contrib.auth.models import AbstractBaseUser from django.contrib.auth.models import AnonymousUser from django.contrib.messages.views import SuccessMessageMixin +from django.core.cache import cache from django.core.paginator import EmptyPage from django.core.paginator import PageNotAnInteger from django.core.paginator import Paginator @@ -1521,11 +1522,19 @@ def get_context_data(self, **kwargs): else: initial_data = {} if existing_dataset: + # Convert keywords list to comma-separated string for form display + keywords_str = ( + ", ".join(existing_dataset.keywords) + if existing_dataset.keywords + and isinstance(existing_dataset.keywords, list) + else "" + ) initial_data = { "name": existing_dataset.name, "description": existing_dataset.description, "author": existing_dataset.authors[0], "status": existing_dataset.status, + "keywords": keywords_str, } dataset_form = DatasetInfoForm(user=self.request.user, initial=initial_data) @@ -1687,6 +1696,7 @@ def _create_or_update_dataset(self, request, dataset_form) -> Dataset: dataset.description = dataset_form.cleaned_data["description"] dataset.authors = [dataset_form.cleaned_data["author"]] dataset.status = dataset_form.cleaned_data["status"] + dataset.keywords = dataset_form.cleaned_data.get("keywords", []) dataset.save() # Clear existing relationships @@ -1699,6 +1709,7 @@ def _create_or_update_dataset(self, request, dataset_form) -> Dataset: description=dataset_form.cleaned_data["description"], authors=[dataset_form.cleaned_data["author"]], status=dataset_form.cleaned_data["status"], + keywords=dataset_form.cleaned_data.get("keywords", []), owner=request.user, ) @@ -2479,6 +2490,120 @@ def get(self, request, *args, **kwargs) -> JsonResponse: user_dataset_details_view = DatasetDetailsView.as_view() +class KeywordsAutocompleteView(Auth0LoginRequiredMixin, View): + """ + View to provide keyword autocomplete suggestions. + Returns all unique keywords from ALL datasets (all users). + + Uses caching to improve performance. Cache is invalidated when datasets + are created, updated, or deleted. + """ + + # Cache timeout: 1 hour (3600 seconds) + # Cache is invalidated on dataset create/update, so this is just a safety timeout + CACHE_TIMEOUT = 3600 + + # Global cache key (shared across all users) + # Note: This key is also used in api_methods/models.py for cache invalidation + CACHE_KEY = "keywords_autocomplete_all_users" + + def _fetch_keywords_from_db(self) -> set[str]: + """ + Fetch all unique keywords from ALL non-deleted datasets. + This is the expensive operation that we cache. + """ + # Get all keywords directly from database to avoid recursion issues + # Using values_list avoids loading full Dataset objects + # and from_db deserialization + keywords_values = ( + Dataset.objects.filter(is_deleted=False) + .exclude(keywords__isnull=True) + .exclude(keywords="") + .values_list("keywords", flat=True) + ) + + # Extract all keywords from all datasets + all_keywords = set() + for keywords_value in keywords_values: + if not keywords_value: + continue + + # Keywords are stored as JSON string, need to parse + if isinstance(keywords_value, str): + try: + keywords_list = json.loads(keywords_value) + if isinstance(keywords_list, list): + all_keywords.update(keywords_list) + except (json.JSONDecodeError, TypeError): + # If parsing fails, try to handle as comma-separated string + keywords_str = keywords_value.strip() + if keywords_str: + keywords_list = [ + k.strip() for k in keywords_str.split(",") if k.strip() + ] + all_keywords.update(keywords_list) + elif isinstance(keywords_value, list): + # Already a list (shouldn't happen with values_list, but handle it) + all_keywords.update(keywords_value) + + return all_keywords + + def get(self, request: HttpRequest) -> JsonResponse: + """ + Get all unique keywords from ALL datasets (all users). + + Query parameters: + - query: Optional search query to filter keywords (case-insensitive) + - limit: Optional limit on number of results (default: 50) + + Returns: + JsonResponse with list of unique keywords + """ + try: + # Get query parameters + search_query = request.GET.get("query", "").strip().lower() + limit = int(request.GET.get("limit", 50)) + + # Try to get from cache first (global cache, same for all users) + cached_keywords = cache.get(self.CACHE_KEY) + + if cached_keywords is not None: + # Cache hit - use cached keywords + all_keywords = cached_keywords + else: + # Cache miss - fetch from database + all_keywords = self._fetch_keywords_from_db() + # Cache the result (convert set to list for JSON serialization) + cache.set(self.CACHE_KEY, list(all_keywords), self.CACHE_TIMEOUT) + # Convert back to set for filtering + all_keywords = set(all_keywords) + + # Filter by search query if provided + if search_query: + filtered_keywords = [ + keyword + for keyword in all_keywords + if search_query in keyword.lower() + ] + else: + filtered_keywords = list(all_keywords) + + # Sort and limit + filtered_keywords.sort() + filtered_keywords = filtered_keywords[:limit] + + return JsonResponse({"keywords": filtered_keywords}) + + except Exception: + logger.exception("Error retrieving keywords for autocomplete") + return JsonResponse( + {"error": "Internal server error", "keywords": []}, status=500 + ) + + +keywords_autocomplete_view = KeywordsAutocompleteView.as_view() + + class RenderHTMLFragmentView(Auth0LoginRequiredMixin, View): """Generic view to render any HTML fragment from a Django template."""