diff --git a/gateway/sds_gateway/api_methods/models.py b/gateway/sds_gateway/api_methods/models.py
index 9de993fb..ee6deefc 100644
--- a/gateway/sds_gateway/api_methods/models.py
+++ b/gateway/sds_gateway/api_methods/models.py
@@ -11,6 +11,7 @@
from blake3 import blake3 as Blake3 # noqa: N812
from django.conf import settings
+from django.core.cache import cache
from django.db import models
from django.db.models import ProtectedError
from django.db.models import QuerySet
@@ -1386,7 +1387,14 @@ def handle_dataset_soft_delete(sender, instance: Dataset, **kwargs) -> None:
"""
Handle soft deletion of datasets by also
soft deleting related share permissions.
+ Also invalidates keywords autocomplete cache when datasets are created/updated.
"""
+ # Invalidate global keywords cache (since keywords are now from all users)
+ # This ensures autocomplete shows latest keywords when datasets change
+ # Cache key matches KeywordsAutocompleteView.CACHE_KEY in users/views.py
+ cache_key = "keywords_autocomplete_all_users"
+ cache.delete(cache_key)
+
if instance.is_deleted:
# This is a soft delete, so we need to soft delete related share permissions
# Soft delete all UserSharePermission records for this dataset
diff --git a/gateway/sds_gateway/api_methods/serializers/dataset_serializers.py b/gateway/sds_gateway/api_methods/serializers/dataset_serializers.py
index f79cd81f..416dcc12 100644
--- a/gateway/sds_gateway/api_methods/serializers/dataset_serializers.py
+++ b/gateway/sds_gateway/api_methods/serializers/dataset_serializers.py
@@ -1,5 +1,7 @@
"""Dataset serializers for the API methods."""
+import json
+
from rest_framework import serializers
from sds_gateway.api_methods.models import Dataset
@@ -13,10 +15,40 @@ class DatasetGetSerializer(serializers.ModelSerializer[Dataset]):
is_shared_with_me = serializers.SerializerMethodField()
is_owner = serializers.SerializerMethodField()
status_display = serializers.CharField(source="get_status_display", read_only=True)
+ keywords = serializers.SerializerMethodField()
def get_authors(self, obj):
return obj.authors[0] if obj.authors else None
+ def get_keywords(self, obj):
+ """
+ Return keywords as a clean list of strings, ready for frontend display.
+ Handles all formats: JSON string, list, or empty.
+ """
+ if not obj.keywords:
+ return []
+
+ # If it's already a list (from from_db deserialization), return it
+ if isinstance(obj.keywords, list):
+ return [str(k).strip() for k in obj.keywords if k and str(k).strip()]
+
+ # If it's a string, try to parse it
+ if isinstance(obj.keywords, str):
+ trimmed = obj.keywords.strip()
+ if not trimmed:
+ return []
+
+ # Try to parse as JSON
+ try:
+ parsed = json.loads(trimmed)
+ if isinstance(parsed, list):
+ return [str(k).strip() for k in parsed if k and str(k).strip()]
+ except (json.JSONDecodeError, TypeError):
+ # If JSON parsing fails, treat as comma-separated string
+ return [k.strip() for k in trimmed.split(",") if k.strip()]
+
+ return []
+
def get_is_shared_with_me(self, obj):
"""Check if the dataset is shared with the current user."""
request = self.context.get("request")
diff --git a/gateway/sds_gateway/static/css/components.css b/gateway/sds_gateway/static/css/components.css
index 8aba9ecc..c38f0a7e 100644
--- a/gateway/sds_gateway/static/css/components.css
+++ b/gateway/sds_gateway/static/css/components.css
@@ -988,6 +988,62 @@ body {
color: #000;
}
+/* Keywords Autocomplete Dropdown */
+.keywords-autocomplete-dropdown {
+ position: absolute;
+ top: 100%;
+ left: 0;
+ right: 0;
+ z-index: 9999;
+ background: white;
+ border: 1px solid #dee2e6;
+ border-radius: 0.375rem;
+ box-shadow: 0 0.5rem 1rem rgba(0, 0, 0, 0.15);
+ max-height: 200px;
+ overflow-y: auto;
+ margin-top: 0.25rem;
+}
+
+.keywords-autocomplete-dropdown .list-group-item {
+ border: none;
+ border-bottom: 1px solid #dee2e6;
+ cursor: pointer;
+ padding: 0.75rem 1.25rem;
+}
+
+.keywords-autocomplete-dropdown .list-group-item:last-child {
+ border-bottom: none;
+}
+
+.keywords-autocomplete-dropdown .list-group-item:hover {
+ background-color: #f8f9fa;
+}
+
+.keywords-autocomplete-dropdown .list-group-item:active {
+ background-color: #e9ecef;
+}
+
+.keywords-autocomplete-dropdown .list-group-item.selected {
+ background-color: #0d6efd;
+ color: white;
+}
+
+.keywords-autocomplete-dropdown .list-group-item.no-results {
+ padding: 0.75rem 1rem;
+ color: #6c757d;
+ font-style: italic;
+ text-align: center;
+ cursor: default;
+}
+
+.keywords-autocomplete-dropdown .list-group-item.no-results:hover {
+ background-color: transparent;
+}
+
+.keywords-suggestion {
+ font-size: 0.875rem;
+}
+
/* Share Group Manager Specific Styles */
.selected-users-chips {
gap: 0.5rem;
diff --git a/gateway/sds_gateway/static/js/actions/DetailsActionManager.js b/gateway/sds_gateway/static/js/actions/DetailsActionManager.js
index 9bfda448..6af1ff84 100644
--- a/gateway/sds_gateway/static/js/actions/DetailsActionManager.js
+++ b/gateway/sds_gateway/static/js/actions/DetailsActionManager.js
@@ -181,6 +181,15 @@ class DetailsActionManager {
".dataset-details-description",
datasetData.description || "No description provided",
);
+
+ // Update keywords display
+ // Keywords are now returned as a clean list from the backend
+ const keywordsValue =
+ Array.isArray(datasetData.keywords) && datasetData.keywords.length > 0
+ ? datasetData.keywords.join(", ")
+ : "No keywords provided";
+ this.updateElementText(modal, ".dataset-details-keywords", keywordsValue);
+
this.updateElementText(
modal,
".dataset-details-status",
@@ -930,6 +939,14 @@ class DetailsActionManager {
}
}
+ /**
+ * Format keywords for display
+ * @param {Array|string} keywords - Keywords as array or string
+ * @returns {string} Formatted keywords as comma-separated string
+ */
+ // formatKeywords method removed - keywords are now formatted on the backend
+ // Backend returns keywords as a clean array of strings, so we just join them
+
/**
* Initialize details buttons for dynamically loaded content
* @param {Element} container - Container element to search within
diff --git a/gateway/sds_gateway/static/js/captureGroupingComponents.js b/gateway/sds_gateway/static/js/captureGroupingComponents.js
index 4aef04ab..174fcd55 100644
--- a/gateway/sds_gateway/static/js/captureGroupingComponents.js
+++ b/gateway/sds_gateway/static/js/captureGroupingComponents.js
@@ -35,9 +35,11 @@ class FormHandler {
this.nameField = document.getElementById("id_name");
this.authorField = document.getElementById("id_author");
this.statusField = document.getElementById("id_status");
+ this.keywordsField = document.getElementById("id_keywords");
this.initializeEventListeners();
this.initializeErrorContainer();
+ this.initializeKeywordsAutocomplete();
this.validateCurrentStep(); // Initial validation
this.updateNavigation(); // Initial navigation button display
@@ -126,6 +128,224 @@ class FormHandler {
}
}
+ /**
+ * Initialize keywords autocomplete functionality
+ */
+ initializeKeywordsAutocomplete() {
+ if (!this.keywordsField) return;
+
+ // Create dropdown container
+ const dropdown = document.createElement("div");
+ dropdown.className = "keywords-autocomplete-dropdown d-none";
+ dropdown.id = "keywords-autocomplete-dropdown";
+ dropdown.innerHTML = '
';
+
+ // Insert dropdown after the keywords field
+ const keywordsContainer = this.keywordsField.parentElement;
+ if (keywordsContainer) {
+ keywordsContainer.style.position = "relative";
+ keywordsContainer.appendChild(dropdown);
+ }
+
+ let searchTimeout = null;
+ let allKeywords = [];
+
+ // Fetch all keywords on initialization
+ this.fetchKeywords().then((keywords) => {
+ allKeywords = keywords;
+ });
+
+ // Handle input events
+ this.keywordsField.addEventListener("input", (e) => {
+ clearTimeout(searchTimeout);
+ const query = e.target.value.trim();
+
+ // Get the last keyword being typed (after the last comma)
+ const lastCommaIndex = query.lastIndexOf(",");
+ const currentKeyword =
+ lastCommaIndex >= 0
+ ? query.substring(lastCommaIndex + 1).trim()
+ : query.trim();
+
+ if (currentKeyword.length < 1) {
+ this.hideKeywordsDropdown(dropdown);
+ return;
+ }
+
+ searchTimeout = setTimeout(() => {
+ this.searchKeywords(currentKeyword, allKeywords, dropdown);
+ }, 300);
+ });
+
+ // Handle keyboard navigation
+ this.keywordsField.addEventListener("keydown", (e) => {
+ const visibleItems = dropdown.querySelectorAll(
+ ".list-group-item:not(.no-results)",
+ );
+ const currentIndex = Array.from(visibleItems).findIndex((item) =>
+ item.classList.contains("selected"),
+ );
+
+ switch (e.key) {
+ case "ArrowDown":
+ e.preventDefault();
+ this.navigateKeywordsDropdown(visibleItems, currentIndex, 1);
+ break;
+ case "ArrowUp":
+ e.preventDefault();
+ this.navigateKeywordsDropdown(visibleItems, currentIndex, -1);
+ break;
+ case "Enter": {
+ e.preventDefault();
+ const selectedItem = dropdown.querySelector(
+ ".list-group-item.selected",
+ );
+ if (selectedItem) {
+ this.selectKeyword(selectedItem);
+ }
+ break;
+ }
+ case "Escape":
+ this.hideKeywordsDropdown(dropdown);
+ this.keywordsField.blur();
+ break;
+ }
+ });
+
+ // Handle clicks outside to close dropdown
+ document.addEventListener("click", (e) => {
+ if (
+ !this.keywordsField.contains(e.target) &&
+ !dropdown.contains(e.target)
+ ) {
+ this.hideKeywordsDropdown(dropdown);
+ }
+ });
+
+ // Handle dropdown item clicks
+ dropdown.addEventListener("click", (e) => {
+ const item = e.target.closest(".list-group-item");
+ if (item && !item.classList.contains("no-results")) {
+ this.selectKeyword(item);
+ }
+ });
+ }
+
+ /**
+ * Fetch all keywords from the API
+ */
+ async fetchKeywords() {
+ try {
+ const response = await fetch("/users/keywords-autocomplete/");
+ if (!response.ok) {
+ console.error("Failed to fetch keywords");
+ return [];
+ }
+ const data = await response.json();
+ return data.keywords || [];
+ } catch (error) {
+ console.error("Error fetching keywords:", error);
+ return [];
+ }
+ }
+
+ /**
+ * Search keywords based on query
+ */
+ searchKeywords(query, allKeywords, dropdown) {
+ const queryLower = query.toLowerCase();
+ const filtered = allKeywords
+ .filter((keyword) => keyword.toLowerCase().includes(queryLower))
+ .slice(0, 10); // Limit to 10 suggestions
+
+ this.renderKeywordsDropdown(filtered, dropdown);
+
+ if (filtered.length > 0) {
+ this.showKeywordsDropdown(dropdown);
+ } else {
+ this.hideKeywordsDropdown(dropdown);
+ }
+ }
+
+ /**
+ * Render keywords dropdown
+ */
+ renderKeywordsDropdown(keywords, dropdown) {
+ const listGroup = dropdown.querySelector(".list-group");
+ if (!listGroup) return;
+
+ if (keywords.length === 0) {
+ listGroup.innerHTML =
+ 'No suggestions found
';
+ return;
+ }
+
+ listGroup.innerHTML = keywords
+ .map(
+ (keyword) =>
+ `${keyword}
`,
+ )
+ .join("");
+ }
+
+ /**
+ * Select a keyword from the dropdown
+ */
+ selectKeyword(item) {
+ const keyword = item.getAttribute("data-keyword");
+ if (!keyword) return;
+
+ const currentValue = this.keywordsField.value.trim();
+ const lastCommaIndex = currentValue.lastIndexOf(",");
+
+ if (lastCommaIndex >= 0) {
+ // Replace the last keyword being typed
+ const prefix = currentValue.substring(0, lastCommaIndex + 1);
+ this.keywordsField.value = `${prefix} ${keyword}, `;
+ } else {
+ // Replace the entire value
+ this.keywordsField.value = `${keyword}, `;
+ }
+
+ this.hideKeywordsDropdown(
+ document.getElementById("keywords-autocomplete-dropdown"),
+ );
+ this.keywordsField.focus();
+ }
+
+ /**
+ * Navigate dropdown with keyboard
+ */
+ navigateKeywordsDropdown(items, currentIndex, direction) {
+ for (const item of items) {
+ item.classList.remove("selected");
+ }
+
+ const nextIndex = currentIndex + direction;
+ if (nextIndex >= 0 && nextIndex < items.length) {
+ items[nextIndex].classList.add("selected");
+ items[nextIndex].scrollIntoView({ block: "nearest" });
+ }
+ }
+
+ /**
+ * Show dropdown
+ */
+ showKeywordsDropdown(dropdown) {
+ if (dropdown) {
+ dropdown.classList.remove("d-none");
+ }
+ }
+
+ /**
+ * Hide dropdown
+ */
+ hideKeywordsDropdown(dropdown) {
+ if (dropdown) {
+ dropdown.classList.add("d-none");
+ }
+ }
+
show(container, showClass = "display-block") {
container.classList.remove("display-none");
container.classList.add(showClass);
@@ -178,6 +398,10 @@ class FormHandler {
document.querySelector("#step4 .dataset-description").textContent =
document.getElementById("id_description").value ||
"No description provided.";
+ const keywordsValue =
+ document.getElementById("id_keywords")?.value.trim() || "";
+ document.querySelector("#step4 .dataset-keywords").textContent =
+ keywordsValue || "No keywords provided.";
// Update captures table
const capturesTableBody = document.querySelector(
diff --git a/gateway/sds_gateway/static/js/dataset/DatasetCreationHandler.js b/gateway/sds_gateway/static/js/dataset/DatasetCreationHandler.js
index b458fc8a..fdf07579 100644
--- a/gateway/sds_gateway/static/js/dataset/DatasetCreationHandler.js
+++ b/gateway/sds_gateway/static/js/dataset/DatasetCreationHandler.js
@@ -24,6 +24,7 @@ class DatasetCreationHandler {
this.authorsField = document.getElementById("id_authors");
this.statusField = document.getElementById("id_status");
this.descriptionField = document.getElementById("id_description");
+ this.keywordsField = document.getElementById("id_keywords");
// Hidden fields
this.selectedCapturesField = document.getElementById("selected_captures");
@@ -45,6 +46,7 @@ class DatasetCreationHandler {
this.initializeErrorContainer();
this.initializeAuthorsManagement();
this.initializePlaceholders();
+ this.initializeKeywordsAutocomplete();
this.validateCurrentStep();
this.updateNavigation();
}
@@ -206,6 +208,225 @@ class DatasetCreationHandler {
}
}
+ /**
+ * Initialize keywords autocomplete functionality
+ */
+ initializeKeywordsAutocomplete() {
+ if (!this.keywordsField) return;
+
+ // Create dropdown container
+ const dropdown = document.createElement("div");
+ dropdown.className = "keywords-autocomplete-dropdown d-none";
+ dropdown.id = "keywords-autocomplete-dropdown";
+ dropdown.innerHTML = '';
+
+ // Insert dropdown after the keywords field
+ const keywordsContainer = this.keywordsField.parentElement;
+ if (keywordsContainer) {
+ keywordsContainer.style.position = "relative";
+ keywordsContainer.appendChild(dropdown);
+ }
+
+ let searchTimeout = null;
+ let allKeywords = [];
+ const filteredKeywords = [];
+
+ // Fetch all keywords on initialization
+ this.fetchKeywords().then((keywords) => {
+ allKeywords = keywords;
+ });
+
+ // Handle input events
+ this.keywordsField.addEventListener("input", (e) => {
+ clearTimeout(searchTimeout);
+ const query = e.target.value.trim();
+
+ // Get the last keyword being typed (after the last comma)
+ const lastCommaIndex = query.lastIndexOf(",");
+ const currentKeyword =
+ lastCommaIndex >= 0
+ ? query.substring(lastCommaIndex + 1).trim()
+ : query.trim();
+
+ if (currentKeyword.length < 1) {
+ this.hideDropdown(dropdown);
+ return;
+ }
+
+ searchTimeout = setTimeout(() => {
+ this.searchKeywords(currentKeyword, allKeywords, dropdown);
+ }, 300);
+ });
+
+ // Handle keyboard navigation
+ this.keywordsField.addEventListener("keydown", (e) => {
+ const visibleItems = dropdown.querySelectorAll(
+ ".list-group-item:not(.no-results)",
+ );
+ const currentIndex = Array.from(visibleItems).findIndex((item) =>
+ item.classList.contains("selected"),
+ );
+
+ switch (e.key) {
+ case "ArrowDown":
+ e.preventDefault();
+ this.navigateDropdown(visibleItems, currentIndex, 1);
+ break;
+ case "ArrowUp":
+ e.preventDefault();
+ this.navigateDropdown(visibleItems, currentIndex, -1);
+ break;
+ case "Enter": {
+ e.preventDefault();
+ const selectedItem = dropdown.querySelector(
+ ".list-group-item.selected",
+ );
+ if (selectedItem) {
+ this.selectKeyword(selectedItem);
+ }
+ break;
+ }
+ case "Escape":
+ this.hideDropdown(dropdown);
+ this.keywordsField.blur();
+ break;
+ }
+ });
+
+ // Handle clicks outside to close dropdown
+ document.addEventListener("click", (e) => {
+ if (
+ !this.keywordsField.contains(e.target) &&
+ !dropdown.contains(e.target)
+ ) {
+ this.hideDropdown(dropdown);
+ }
+ });
+
+ // Handle dropdown item clicks
+ dropdown.addEventListener("click", (e) => {
+ const item = e.target.closest(".list-group-item");
+ if (item && !item.classList.contains("no-results")) {
+ this.selectKeyword(item);
+ }
+ });
+ }
+
+ /**
+ * Fetch all keywords from the API
+ */
+ async fetchKeywords() {
+ try {
+ const response = await fetch("/users/keywords-autocomplete/");
+ if (!response.ok) {
+ console.error("Failed to fetch keywords");
+ return [];
+ }
+ const data = await response.json();
+ return data.keywords || [];
+ } catch (error) {
+ console.error("Error fetching keywords:", error);
+ return [];
+ }
+ }
+
+ /**
+ * Search keywords based on query
+ */
+ searchKeywords(query, allKeywords, dropdown) {
+ const queryLower = query.toLowerCase();
+ const filtered = allKeywords
+ .filter((keyword) => keyword.toLowerCase().includes(queryLower))
+ .slice(0, 10); // Limit to 10 suggestions
+
+ this.renderKeywordsDropdown(filtered, dropdown);
+
+ if (filtered.length > 0) {
+ this.showDropdown(dropdown);
+ } else {
+ this.hideDropdown(dropdown);
+ }
+ }
+
+ /**
+ * Render keywords dropdown
+ */
+ renderKeywordsDropdown(keywords, dropdown) {
+ const listGroup = dropdown.querySelector(".list-group");
+ if (!listGroup) return;
+
+ if (keywords.length === 0) {
+ listGroup.innerHTML =
+ 'No suggestions found
';
+ return;
+ }
+
+ listGroup.innerHTML = keywords
+ .map(
+ (keyword) =>
+ `${keyword}
`,
+ )
+ .join("");
+ }
+
+ /**
+ * Select a keyword from the dropdown
+ */
+ selectKeyword(item) {
+ const keyword = item.getAttribute("data-keyword");
+ if (!keyword) return;
+
+ const currentValue = this.keywordsField.value.trim();
+ const lastCommaIndex = currentValue.lastIndexOf(",");
+
+ if (lastCommaIndex >= 0) {
+ // Replace the last keyword being typed
+ const prefix = currentValue.substring(0, lastCommaIndex + 1);
+ this.keywordsField.value = `${prefix} ${keyword}, `;
+ } else {
+ // Replace the entire value
+ this.keywordsField.value = `${keyword}, `;
+ }
+
+ this.hideDropdown(
+ document.getElementById("keywords-autocomplete-dropdown"),
+ );
+ this.keywordsField.focus();
+ }
+
+ /**
+ * Navigate dropdown with keyboard
+ */
+ navigateDropdown(items, currentIndex, direction) {
+ for (const item of items) {
+ item.classList.remove("selected");
+ }
+
+ const nextIndex = currentIndex + direction;
+ if (nextIndex >= 0 && nextIndex < items.length) {
+ items[nextIndex].classList.add("selected");
+ items[nextIndex].scrollIntoView({ block: "nearest" });
+ }
+ }
+
+ /**
+ * Show dropdown
+ */
+ showDropdown(dropdown) {
+ if (dropdown) {
+ dropdown.classList.remove("d-none");
+ }
+ }
+
+ /**
+ * Hide dropdown
+ */
+ hideDropdown(dropdown) {
+ if (dropdown) {
+ dropdown.classList.add("d-none");
+ }
+ }
+
/**
* Initialize file browser modal handlers
*/
@@ -517,6 +738,15 @@ class DatasetCreationHandler {
: "No description provided.";
}
+ // Update keywords display
+ const keywordsDisplay = document.querySelector("#step4 .dataset-keywords");
+ if (keywordsDisplay) {
+ const keywordsValue = this.keywordsField
+ ? this.keywordsField.value.trim()
+ : "";
+ keywordsDisplay.textContent = keywordsValue || "No keywords provided.";
+ }
+
// Update selected items table
this.updateSelectedItemsTable();
}
diff --git a/gateway/sds_gateway/static/js/datasetDetailsModal.js b/gateway/sds_gateway/static/js/datasetDetailsModal.js
index d7df56ec..ce3e051c 100644
--- a/gateway/sds_gateway/static/js/datasetDetailsModal.js
+++ b/gateway/sds_gateway/static/js/datasetDetailsModal.js
@@ -123,6 +123,17 @@ class DatasetDetailsModal {
document.querySelector(".dataset-details-description").textContent =
dataset.description || "No description available";
+ // Update keywords display
+ // Keywords are now returned as a clean list from the backend
+ const keywordsElement = document.querySelector(".dataset-details-keywords");
+ if (keywordsElement) {
+ const keywordsValue =
+ Array.isArray(dataset.keywords) && dataset.keywords.length > 0
+ ? dataset.keywords.join(", ")
+ : "No keywords available";
+ keywordsElement.textContent = keywordsValue;
+ }
+
// Format status with badge using database values
const statusElement = document.querySelector(".dataset-details-status");
if (dataset.status === "draft") {
@@ -146,6 +157,9 @@ class DatasetDetailsModal {
document.querySelector(".dataset-details-updated").innerHTML = updatedDate;
}
+ // formatKeywords method removed - keywords are now formatted on the backend
+ // Backend returns keywords as a clean array of strings, so we just join them
+
/**
* Copy dataset UUID to clipboard
*/
diff --git a/gateway/sds_gateway/templates/users/group_captures.html b/gateway/sds_gateway/templates/users/group_captures.html
index b261ce95..857c270d 100644
--- a/gateway/sds_gateway/templates/users/group_captures.html
+++ b/gateway/sds_gateway/templates/users/group_captures.html
@@ -107,6 +107,14 @@
{{ dataset_form.description }}
+
diff --git a/gateway/sds_gateway/templates/users/partials/dataset_details_modal.html b/gateway/sds_gateway/templates/users/partials/dataset_details_modal.html
index 045abee8..15522129 100644
--- a/gateway/sds_gateway/templates/users/partials/dataset_details_modal.html
+++ b/gateway/sds_gateway/templates/users/partials/dataset_details_modal.html
@@ -66,6 +66,10 @@
Loading...
+
+
+
Loading...
+
diff --git a/gateway/sds_gateway/templates/users/partials/review_create_dataset.html b/gateway/sds_gateway/templates/users/partials/review_create_dataset.html
index c93b94c6..2f811292 100644
--- a/gateway/sds_gateway/templates/users/partials/review_create_dataset.html
+++ b/gateway/sds_gateway/templates/users/partials/review_create_dataset.html
@@ -19,10 +19,14 @@ Dataset Information
-
diff --git a/gateway/sds_gateway/users/forms.py b/gateway/sds_gateway/users/forms.py
index fafd32bf..2d812d32 100644
--- a/gateway/sds_gateway/users/forms.py
+++ b/gateway/sds_gateway/users/forms.py
@@ -100,6 +100,20 @@ class DatasetInfoForm(forms.Form):
widget=forms.Select(attrs={"class": "form-control"}),
help_text="Draft: Work in progress, Final: Complete and ready for use",
)
+ keywords = forms.CharField(
+ label="Keywords",
+ required=False,
+ widget=forms.TextInput(
+ attrs={
+ "class": "form-control",
+ "placeholder": (
+ "Enter keywords separated by commas "
+ "(e.g., radio, spectrum, analysis)"
+ ),
+ }
+ ),
+ help_text="Enter keywords separated by commas",
+ )
def __init__(self, *args, **kwargs):
user = kwargs.pop("user", None)
@@ -126,6 +140,17 @@ def clean_description(self):
"""Clean and validate the description."""
return self.cleaned_data.get("description", "").strip()
+ def clean_keywords(self):
+ """Parse keywords from comma-separated string to list."""
+ keywords_str = self.cleaned_data.get("keywords", "").strip()
+ if not keywords_str:
+ return []
+
+ # Split by comma, strip whitespace, and filter out empty strings
+ return [
+ keyword.strip() for keyword in keywords_str.split(",") if keyword.strip()
+ ]
+
class CaptureSearchForm(forms.Form):
directory = forms.CharField(
diff --git a/gateway/sds_gateway/users/tests/test_keywords_autocomplete.py b/gateway/sds_gateway/users/tests/test_keywords_autocomplete.py
new file mode 100644
index 00000000..70b0ca43
--- /dev/null
+++ b/gateway/sds_gateway/users/tests/test_keywords_autocomplete.py
@@ -0,0 +1,509 @@
+"""Tests for the KeywordsAutocompleteView endpoint."""
+
+import json
+
+import pytest
+from django.core.cache import cache
+from django.test import Client
+from django.urls import reverse
+from rest_framework import status
+
+from sds_gateway.api_methods.models import Dataset
+from sds_gateway.users.models import User
+
+# Test constants
+HTTP_OK = status.HTTP_200_OK
+HTTP_INTERNAL_SERVER_ERROR = status.HTTP_500_INTERNAL_SERVER_ERROR
+DEFAULT_KEYWORD_LIMIT = 50
+TEST_LIMIT = 2
+
+
+@pytest.mark.django_db
+class TestKeywordsAutocompleteView:
+ """Tests for the KeywordsAutocompleteView endpoint."""
+
+ @pytest.fixture
+ def client(self) -> Client:
+ return Client()
+
+ @pytest.fixture
+ def user1(self) -> User:
+ """Create first test user."""
+ return User.objects.create_user(
+ email="user1@example.com",
+ password="testpass123", # noqa: S106
+ name="User 1",
+ is_approved=True,
+ )
+
+ @pytest.fixture
+ def user2(self) -> User:
+ """Create second test user."""
+ return User.objects.create_user(
+ email="user2@example.com",
+ password="testpass123", # noqa: S106
+ name="User 2",
+ is_approved=True,
+ )
+
+ @pytest.fixture
+ def user3(self) -> User:
+ """Create third test user."""
+ return User.objects.create_user(
+ email="user3@example.com",
+ password="testpass123", # noqa: S106
+ name="User 3",
+ is_approved=True,
+ )
+
+ @pytest.fixture
+ def dataset1(self, user1: User) -> Dataset:
+ """Create dataset with keywords for user1."""
+ return Dataset.objects.create(
+ name="Dataset 1",
+ owner=user1,
+ description="Test dataset 1",
+ keywords=json.dumps(["keyword1", "keyword2", "shared-keyword"]),
+ status="draft",
+ )
+
+ @pytest.fixture
+ def dataset2(self, user2: User) -> Dataset:
+ """Create dataset with keywords for user2."""
+ return Dataset.objects.create(
+ name="Dataset 2",
+ owner=user2,
+ description="Test dataset 2",
+ keywords=json.dumps(["keyword3", "keyword4", "shared-keyword"]),
+ status="draft",
+ )
+
+ @pytest.fixture
+ def dataset3(self, user3: User) -> Dataset:
+ """Create dataset with unique keywords for user3."""
+ return Dataset.objects.create(
+ name="Dataset 3",
+ owner=user3,
+ description="Test dataset 3",
+ keywords=json.dumps(["unique-keyword-user3", "another-unique"]),
+ status="draft",
+ )
+
+ @pytest.fixture(autouse=True)
+ def clear_cache(self):
+ """Clear cache before and after each test."""
+ cache.clear()
+ yield
+ cache.clear()
+
+ def test_retrieves_keywords_from_all_users(
+ self,
+ client: Client,
+ user1: User,
+ dataset1: Dataset,
+ dataset2: Dataset,
+ dataset3: Dataset,
+ ) -> None:
+ """Test that keywords from all users are retrieved."""
+ client.force_login(user1)
+ url = reverse("users:keywords_autocomplete")
+
+ response = client.get(url)
+
+ assert response.status_code == HTTP_OK
+ data = response.json()
+ assert "keywords" in data
+ keywords = data["keywords"]
+
+ # Should contain keywords from all users
+ assert "keyword1" in keywords # From user1
+ assert "keyword2" in keywords # From user1
+ assert "keyword3" in keywords # From user2
+ assert "keyword4" in keywords # From user2
+ assert "unique-keyword-user3" in keywords # From user3
+ assert "another-unique" in keywords # From user3
+ assert "shared-keyword" in keywords # From both user1 and user2
+
+ def test_retrieves_keywords_for_different_user(
+ self,
+ client: Client,
+ user2: User,
+ dataset1: Dataset,
+ dataset2: Dataset,
+ dataset3: Dataset,
+ ) -> None:
+ """Test that user2 can see keywords from user1 and user3."""
+ client.force_login(user2)
+ url = reverse("users:keywords_autocomplete")
+
+ response = client.get(url)
+
+ assert response.status_code == HTTP_OK
+ data = response.json()
+ keywords = data["keywords"]
+
+ # Should contain keywords from all users
+ assert "keyword1" in keywords # From user1
+ assert "keyword2" in keywords # From user1
+ assert "keyword3" in keywords # From user2
+ assert "keyword4" in keywords # From user2
+ assert "unique-keyword-user3" in keywords # From user3
+ assert "another-unique" in keywords # From user3
+
+ def test_keywords_deduplication(
+ self,
+ client: Client,
+ user1: User,
+ dataset1: Dataset,
+ dataset2: Dataset,
+ ) -> None:
+ """Test that duplicate keywords are deduplicated."""
+ client.force_login(user1)
+ url = reverse("users:keywords_autocomplete")
+
+ response = client.get(url)
+
+ assert response.status_code == HTTP_OK
+ data = response.json()
+ keywords = data["keywords"]
+
+ # "shared-keyword" appears in both datasets, should only appear once
+ assert keywords.count("shared-keyword") == 1
+ # All keywords should be unique
+ assert len(keywords) == len(set(keywords))
+
+ def test_query_filtering(
+ self,
+ client: Client,
+ user1: User,
+ dataset1: Dataset,
+ dataset2: Dataset,
+ dataset3: Dataset,
+ ) -> None:
+ """Test that query parameter filters keywords."""
+ client.force_login(user1)
+ url = reverse("users:keywords_autocomplete")
+
+ # Filter by "unique"
+ response = client.get(url, {"query": "unique"})
+
+ assert response.status_code == HTTP_OK
+ data = response.json()
+ keywords = data["keywords"]
+
+ # Should only contain keywords with "unique" in them
+ assert all("unique" in keyword.lower() for keyword in keywords)
+ assert "unique-keyword-user3" in keywords
+ assert "another-unique" in keywords
+
+ # Should not contain other keywords
+ assert "keyword1" not in keywords
+ assert "keyword2" not in keywords
+
+ def test_query_filtering_case_insensitive(
+ self,
+ client: Client,
+ user1: User,
+ dataset1: Dataset,
+ dataset2: Dataset,
+ ) -> None:
+ """Test that query filtering is case-insensitive."""
+ client.force_login(user1)
+ url = reverse("users:keywords_autocomplete")
+
+ # Filter with uppercase
+ response = client.get(url, {"query": "KEYWORD"})
+
+ assert response.status_code == HTTP_OK
+ data = response.json()
+ keywords = data["keywords"]
+
+ # Should contain keywords with "keyword" (case-insensitive)
+ keyword_matches = [
+ "keyword1" in keywords,
+ "keyword2" in keywords,
+ "keyword3" in keywords,
+ "keyword4" in keywords,
+ ]
+ assert any(keyword_matches)
+
+ def test_limit_parameter(
+ self,
+ client: Client,
+ user1: User,
+ dataset1: Dataset,
+ dataset2: Dataset,
+ dataset3: Dataset,
+ ) -> None:
+ """Test that limit parameter limits the number of results."""
+ client.force_login(user1)
+ url = reverse("users:keywords_autocomplete")
+
+ # Request with limit of 2
+ response = client.get(url, {"limit": "2"})
+
+ assert response.status_code == HTTP_OK
+ data = response.json()
+ keywords = data["keywords"]
+
+ # Should have at most TEST_LIMIT keywords
+ assert len(keywords) <= TEST_LIMIT
+
+ def test_default_limit(
+ self,
+ client: Client,
+ user1: User,
+ dataset1: Dataset,
+ dataset2: Dataset,
+ dataset3: Dataset,
+ ) -> None:
+ """Test that default limit is 50."""
+ client.force_login(user1)
+ url = reverse("users:keywords_autocomplete")
+
+ response = client.get(url)
+
+ assert response.status_code == HTTP_OK
+ data = response.json()
+ keywords = data["keywords"]
+
+ # Should have at most DEFAULT_KEYWORD_LIMIT keywords (default limit)
+ assert len(keywords) <= DEFAULT_KEYWORD_LIMIT
+
+ def test_cache_hit(
+ self,
+ client: Client,
+ user1: User,
+ dataset1: Dataset,
+ ) -> None:
+ """Test that cache is used on subsequent requests."""
+ client.force_login(user1)
+ url = reverse("users:keywords_autocomplete")
+
+ # First request (cache miss)
+ response1 = client.get(url)
+ assert response1.status_code == HTTP_OK
+ data1 = response1.json()
+ keywords1 = set(data1["keywords"])
+
+ # Second request (cache hit)
+ response2 = client.get(url)
+ assert response2.status_code == HTTP_OK
+ data2 = response2.json()
+ keywords2 = set(data2["keywords"])
+
+ # Results should be the same
+ assert keywords1 == keywords2
+
+ def test_cache_invalidation_on_dataset_create(
+ self,
+ client: Client,
+ user1: User,
+ user2: User,
+ dataset1: Dataset,
+ ) -> None:
+ """Test that cache is invalidated when a new dataset is created."""
+ client.force_login(user1)
+ url = reverse("users:keywords_autocomplete")
+
+ # First request to populate cache
+ response1 = client.get(url)
+ assert response1.status_code == HTTP_OK
+
+ # Create a new dataset with new keywords
+ Dataset.objects.create(
+ name="New Dataset",
+ owner=user2,
+ description="New test dataset",
+ keywords=json.dumps(["new-keyword-from-cache-test"]),
+ status="draft",
+ )
+
+ # Second request should include the new keyword (cache was invalidated)
+ response2 = client.get(url)
+ assert response2.status_code == HTTP_OK
+ data2 = response2.json()
+ keywords2 = set(data2["keywords"])
+
+ # Should contain the new keyword
+ assert "new-keyword-from-cache-test" in keywords2
+ # Should also contain old keywords
+ assert "keyword1" in keywords2
+
+ def test_cache_invalidation_on_dataset_update(
+ self,
+ client: Client,
+ user1: User,
+ dataset1: Dataset,
+ ) -> None:
+ """Test that cache is invalidated when a dataset is updated."""
+ client.force_login(user1)
+ url = reverse("users:keywords_autocomplete")
+
+ # First request to populate cache
+ response1 = client.get(url)
+ assert response1.status_code == HTTP_OK
+
+ # Update dataset with new keywords
+ dataset1.keywords = json.dumps(["updated-keyword-1", "updated-keyword-2"])
+ dataset1.save()
+
+ # Second request should include the updated keywords
+ response2 = client.get(url)
+ assert response2.status_code == HTTP_OK
+ data2 = response2.json()
+ keywords2 = set(data2["keywords"])
+
+ # Should contain the updated keywords
+ assert "updated-keyword-1" in keywords2
+ assert "updated-keyword-2" in keywords2
+ # Should not contain old keywords
+ assert "keyword1" not in keywords2
+ assert "keyword2" not in keywords2
+
+ def test_cache_invalidation_on_dataset_soft_delete(
+ self,
+ client: Client,
+ user1: User,
+ dataset1: Dataset,
+ ) -> None:
+ """Test that cache is invalidated when a dataset is soft deleted."""
+ client.force_login(user1)
+ url = reverse("users:keywords_autocomplete")
+
+ # First request to populate cache
+ response1 = client.get(url)
+ assert response1.status_code == HTTP_OK
+
+ # Soft delete the dataset
+ dataset1.is_deleted = True
+ dataset1.save()
+
+ # Second request should not include keywords from deleted dataset
+ response2 = client.get(url)
+ assert response2.status_code == HTTP_OK
+ data2 = response2.json()
+ keywords2 = set(data2["keywords"])
+
+ # Should not contain keywords from deleted dataset
+ assert "keyword1" not in keywords2
+ assert "keyword2" not in keywords2
+
+ def test_excludes_deleted_datasets(
+ self,
+ client: Client,
+ user1: User,
+ dataset1: Dataset,
+ dataset2: Dataset,
+ ) -> None:
+ """Test that keywords from deleted datasets are not included."""
+ # Soft delete dataset1
+ dataset1.is_deleted = True
+ dataset1.save()
+
+ client.force_login(user1)
+ url = reverse("users:keywords_autocomplete")
+
+ response = client.get(url)
+
+ assert response.status_code == HTTP_OK
+ data = response.json()
+ keywords = data["keywords"]
+
+ # Should not contain keywords from deleted dataset
+ assert "keyword1" not in keywords
+ assert "keyword2" not in keywords
+
+ # Should still contain keywords from non-deleted dataset
+ assert "keyword3" in keywords
+ assert "keyword4" in keywords
+
+ def test_excludes_empty_keywords(
+ self,
+ client: Client,
+ user1: User,
+ dataset1: Dataset,
+ ) -> None:
+ """Test that datasets with empty keywords are excluded."""
+ # Create dataset with empty keywords (use empty JSON array string)
+ Dataset.objects.create(
+ name="Empty Keywords Dataset",
+ owner=user1,
+ keywords=json.dumps([]), # Empty list, will be stored as "[]"
+ status="draft",
+ )
+
+ # Create dataset with empty string keywords
+ Dataset.objects.create(
+ name="Empty String Keywords Dataset",
+ owner=user1,
+ keywords="", # Empty string
+ status="draft",
+ )
+
+ client.force_login(user1)
+ url = reverse("users:keywords_autocomplete")
+
+ response = client.get(url)
+
+ assert response.status_code == HTTP_OK
+ data = response.json()
+ keywords = data["keywords"]
+
+ # Should only contain keywords from dataset1, not empty/null datasets
+ assert "keyword1" in keywords
+ assert "keyword2" in keywords
+
+ def test_requires_authentication(
+ self,
+ client: Client,
+ dataset1: Dataset,
+ ) -> None:
+ """Test that the endpoint requires authentication."""
+ url = reverse("users:keywords_autocomplete")
+
+ # Request without authentication
+ response = client.get(url)
+
+ # Should redirect to login or return 403/401
+ assert response.status_code in [302, 401, 403]
+
+ def test_handles_invalid_limit(
+ self,
+ client: Client,
+ user1: User,
+ dataset1: Dataset,
+ ) -> None:
+ """Test that invalid limit parameter is handled gracefully."""
+ client.force_login(user1)
+ url = reverse("users:keywords_autocomplete")
+
+ # Request with invalid limit (non-numeric)
+ response = client.get(url, {"limit": "invalid"})
+
+ # The current implementation uses int() which will raise ValueError
+ # which is caught and returns 500
+ assert response.status_code == HTTP_INTERNAL_SERVER_ERROR
+ data = response.json()
+ assert "error" in data
+
+ def test_keywords_sorted(
+ self,
+ client: Client,
+ user1: User,
+ dataset1: Dataset,
+ dataset2: Dataset,
+ dataset3: Dataset,
+ ) -> None:
+ """Test that keywords are returned in sorted order."""
+ client.force_login(user1)
+ url = reverse("users:keywords_autocomplete")
+
+ response = client.get(url)
+
+ assert response.status_code == HTTP_OK
+ data = response.json()
+ keywords = data["keywords"]
+
+ # Should be sorted
+ assert keywords == sorted(keywords)
diff --git a/gateway/sds_gateway/users/urls.py b/gateway/sds_gateway/users/urls.py
index b884c4b4..2e5106a6 100644
--- a/gateway/sds_gateway/users/urls.py
+++ b/gateway/sds_gateway/users/urls.py
@@ -9,6 +9,7 @@
from .views import ListCapturesView
from .views import UploadCaptureView
from .views import generate_api_key_form_view
+from .views import keywords_autocomplete_view
from .views import new_api_key_view
from .views import render_html_fragment_view
from .views import revoke_api_key_view
@@ -44,6 +45,11 @@
path("files//h5info/", FileH5InfoView.as_view(), name="file_h5info"),
path("dataset-list/", user_dataset_list_view, name="dataset_list"),
path("dataset-details/", user_dataset_details_view, name="dataset_details"),
+ path(
+ "keywords-autocomplete/",
+ keywords_autocomplete_view,
+ name="keywords_autocomplete",
+ ),
path("render-html/", render_html_fragment_view, name="render_html"),
path("group-captures/", user_group_captures_view, name="group_captures"),
path(
diff --git a/gateway/sds_gateway/users/views.py b/gateway/sds_gateway/users/views.py
index 057377f6..4b47af62 100644
--- a/gateway/sds_gateway/users/views.py
+++ b/gateway/sds_gateway/users/views.py
@@ -10,6 +10,7 @@
from django.contrib.auth.models import AbstractBaseUser
from django.contrib.auth.models import AnonymousUser
from django.contrib.messages.views import SuccessMessageMixin
+from django.core.cache import cache
from django.core.paginator import EmptyPage
from django.core.paginator import PageNotAnInteger
from django.core.paginator import Paginator
@@ -1521,11 +1522,19 @@ def get_context_data(self, **kwargs):
else:
initial_data = {}
if existing_dataset:
+ # Convert keywords list to comma-separated string for form display
+ keywords_str = (
+ ", ".join(existing_dataset.keywords)
+ if existing_dataset.keywords
+ and isinstance(existing_dataset.keywords, list)
+ else ""
+ )
initial_data = {
"name": existing_dataset.name,
"description": existing_dataset.description,
"author": existing_dataset.authors[0],
"status": existing_dataset.status,
+ "keywords": keywords_str,
}
dataset_form = DatasetInfoForm(user=self.request.user, initial=initial_data)
@@ -1687,6 +1696,7 @@ def _create_or_update_dataset(self, request, dataset_form) -> Dataset:
dataset.description = dataset_form.cleaned_data["description"]
dataset.authors = [dataset_form.cleaned_data["author"]]
dataset.status = dataset_form.cleaned_data["status"]
+ dataset.keywords = dataset_form.cleaned_data.get("keywords", [])
dataset.save()
# Clear existing relationships
@@ -1699,6 +1709,7 @@ def _create_or_update_dataset(self, request, dataset_form) -> Dataset:
description=dataset_form.cleaned_data["description"],
authors=[dataset_form.cleaned_data["author"]],
status=dataset_form.cleaned_data["status"],
+ keywords=dataset_form.cleaned_data.get("keywords", []),
owner=request.user,
)
@@ -2479,6 +2490,120 @@ def get(self, request, *args, **kwargs) -> JsonResponse:
user_dataset_details_view = DatasetDetailsView.as_view()
+class KeywordsAutocompleteView(Auth0LoginRequiredMixin, View):
+ """
+ View to provide keyword autocomplete suggestions.
+ Returns all unique keywords from ALL datasets (all users).
+
+ Uses caching to improve performance. Cache is invalidated when datasets
+ are created, updated, or deleted.
+ """
+
+ # Cache timeout: 1 hour (3600 seconds)
+ # Cache is invalidated on dataset create/update, so this is just a safety timeout
+ CACHE_TIMEOUT = 3600
+
+ # Global cache key (shared across all users)
+ # Note: This key is also used in api_methods/models.py for cache invalidation
+ CACHE_KEY = "keywords_autocomplete_all_users"
+
+ def _fetch_keywords_from_db(self) -> set[str]:
+ """
+ Fetch all unique keywords from ALL non-deleted datasets.
+ This is the expensive operation that we cache.
+ """
+ # Get all keywords directly from database to avoid recursion issues
+ # Using values_list avoids loading full Dataset objects
+ # and from_db deserialization
+ keywords_values = (
+ Dataset.objects.filter(is_deleted=False)
+ .exclude(keywords__isnull=True)
+ .exclude(keywords="")
+ .values_list("keywords", flat=True)
+ )
+
+ # Extract all keywords from all datasets
+ all_keywords = set()
+ for keywords_value in keywords_values:
+ if not keywords_value:
+ continue
+
+ # Keywords are stored as JSON string, need to parse
+ if isinstance(keywords_value, str):
+ try:
+ keywords_list = json.loads(keywords_value)
+ if isinstance(keywords_list, list):
+ all_keywords.update(keywords_list)
+ except (json.JSONDecodeError, TypeError):
+ # If parsing fails, try to handle as comma-separated string
+ keywords_str = keywords_value.strip()
+ if keywords_str:
+ keywords_list = [
+ k.strip() for k in keywords_str.split(",") if k.strip()
+ ]
+ all_keywords.update(keywords_list)
+ elif isinstance(keywords_value, list):
+ # Already a list (shouldn't happen with values_list, but handle it)
+ all_keywords.update(keywords_value)
+
+ return all_keywords
+
+ def get(self, request: HttpRequest) -> JsonResponse:
+ """
+ Get all unique keywords from ALL datasets (all users).
+
+ Query parameters:
+ - query: Optional search query to filter keywords (case-insensitive)
+ - limit: Optional limit on number of results (default: 50)
+
+ Returns:
+ JsonResponse with list of unique keywords
+ """
+ try:
+ # Get query parameters
+ search_query = request.GET.get("query", "").strip().lower()
+ limit = int(request.GET.get("limit", 50))
+
+ # Try to get from cache first (global cache, same for all users)
+ cached_keywords = cache.get(self.CACHE_KEY)
+
+ if cached_keywords is not None:
+ # Cache hit - use cached keywords
+ all_keywords = cached_keywords
+ else:
+ # Cache miss - fetch from database
+ all_keywords = self._fetch_keywords_from_db()
+ # Cache the result (convert set to list for JSON serialization)
+ cache.set(self.CACHE_KEY, list(all_keywords), self.CACHE_TIMEOUT)
+ # Convert back to set for filtering
+ all_keywords = set(all_keywords)
+
+ # Filter by search query if provided
+ if search_query:
+ filtered_keywords = [
+ keyword
+ for keyword in all_keywords
+ if search_query in keyword.lower()
+ ]
+ else:
+ filtered_keywords = list(all_keywords)
+
+ # Sort and limit
+ filtered_keywords.sort()
+ filtered_keywords = filtered_keywords[:limit]
+
+ return JsonResponse({"keywords": filtered_keywords})
+
+ except Exception:
+ logger.exception("Error retrieving keywords for autocomplete")
+ return JsonResponse(
+ {"error": "Internal server error", "keywords": []}, status=500
+ )
+
+
+keywords_autocomplete_view = KeywordsAutocompleteView.as_view()
+
+
class RenderHTMLFragmentView(Auth0LoginRequiredMixin, View):
"""Generic view to render any HTML fragment from a Django template."""