Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
28 commits
Select commit Hold shift + click to select a range
9889a63
feat: add AemClient for AEM API interactions
holtvogt Nov 14, 2025
310452b
feat: implement AemAnalyzer and FragmentAnalyzer for content fragment…
holtvogt Nov 14, 2025
eebdcb8
feat: add content fragment unused audit and opportunity data mapper
holtvogt Nov 14, 2025
2d7874c
feat: add content fragment unused handler to the main index
holtvogt Nov 14, 2025
fd7f2c6
feat: add unit tests for AemAnalyzer, AemClient, and FragmentAnalyzer
holtvogt Nov 14, 2025
dee8a2b
feat: add unit tests for content fragment unused handler and opportun…
holtvogt Nov 14, 2025
5bfd04b
feat: add timeout between API calls
holtvogt Nov 18, 2025
e48ccf7
fix: change fragments return with data identifier
holtvogt Nov 18, 2025
f7089de
feat: implement S3 storage functionality for uploading and downloadin…
holtvogt Nov 18, 2025
eb0bc33
feat: add S3 integration for fragment storage and suggestion creation
holtvogt Nov 18, 2025
e0de110
feat: add TODO for checking unpublished modified content
holtvogt Nov 21, 2025
664a9c8
refactor: remove pagination limit in AemAnalyzer
holtvogt Nov 21, 2025
552e00a
refactor: remove MAX_PAGES constant from AemAnalyzer
holtvogt Nov 21, 2025
14cfa5d
feat: implement retry logic for fetching fragments with timeout handling
holtvogt Nov 26, 2025
1ee5bae
fix: buildKey function to use fragmentPath
holtvogt Nov 26, 2025
781ebc3
fix: dead code in fragment fetching logic
holtvogt Nov 26, 2025
4f5be05
test: add unit tests for S3 storage upload and download methods
holtvogt Nov 26, 2025
2992a45
test: add timeout handling and pagination logic
holtvogt Nov 26, 2025
3b4aeb9
test: enhance unit tests for content fragment unused handler with S3 …
holtvogt Nov 26, 2025
ec089d6
Merge branch 'main' into feat/add-unused-content-fragments-audit
holtvogt Nov 26, 2025
95708a7
feat: update AemAnalyzer and AemClient to use ImsClient for token man…
holtvogt Dec 2, 2025
4fc78e3
docs: add JSDoc
holtvogt Dec 4, 2025
4197258
Merge branch 'main' into feat/add-unused-content-fragments-audit
holtvogt Dec 4, 2025
d8187ab
fix: update AemAnalyzer stub in tests to use createFrom method
holtvogt Dec 4, 2025
5793d27
refactor: update AemClient tests to use mock ImsClient and improve er…
holtvogt Dec 4, 2025
ca5578d
refactor: update AemAnalyzer tests to use mockAemClient and remove co…
holtvogt Dec 4, 2025
39056d0
test: add tests for createFrom method and retry logic for token expir…
holtvogt Dec 4, 2025
6739b11
test: add unit tests for isTokenExpired method in AemClient
holtvogt Dec 4, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
139 changes: 139 additions & 0 deletions src/content-fragment-insights/aem-analyzer.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,139 @@
/*
* Copyright 2025 Adobe. All rights reserved.
* This file is licensed to you under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License. You may obtain a copy
* of the License at http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software distributed under
* the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR REPRESENTATIONS
* OF ANY KIND, either express or implied. See the License for the specific language
* governing permissions and limitations under the License.
*/

import { AemClient } from './clients/aem-client.js';
import { FragmentAnalyzer } from './fragment-analyzer.js';

export class AemAnalyzer {
static DEFAULT_FRAGMENT_ROOT_PATH = '/content/dam/';

static MAX_FETCH_ATTEMPTS = 3;

static ERROR_CODE_TIMEOUT = 'ETIMEOUT';

constructor(aemClient, log) {
this.log = log;
this.aemClient = aemClient;
this.fragmentAnalyzer = new FragmentAnalyzer(log);
this.rootPath = AemAnalyzer.DEFAULT_FRAGMENT_ROOT_PATH;
this.fragments = [];
}

static async createFrom(context) {
const { log } = context;
const aemClient = await AemClient.createFrom(context);
return new AemAnalyzer(aemClient, log);
}

static parseFragment(fragment) {
if (!fragment) {
return null;
}

const fragmentPath = fragment.path;
const status = fragment.status.toUpperCase();
const createdAt = fragment.created?.at || null;
const modifiedAt = fragment.modified?.at || null;
const publishedAt = fragment.published?.at || null;

return {
fragmentPath,
status,
createdAt,
modifiedAt,
publishedAt,
lastModified: modifiedAt || createdAt || null,
};
}

async findUnusedFragments() {
await this.fetchAllFragments();

const unusedFragments = this.fragmentAnalyzer.findUnusedFragments(this.fragments);

return {
totalFragments: this.fragments.length,
totalUnused: unusedFragments.length,
data: unusedFragments,
};
}

async fetchAllFragments() {
const fragments = [];
let cursor = null;

this.log.info(`[Content Fragment Insights] Fetching fragments from ${this.rootPath}`);

// For large tenants, this fetch loop can take minutes. Add pagination if needed in the future
do {
// eslint-disable-next-line no-await-in-loop
const { items, cursor: nextCursor } = await this.fetchFragmentsPage({ cursor });

items.forEach((item) => {
const parsedFragment = AemAnalyzer.parseFragment(item);
if (parsedFragment) {
fragments.push(parsedFragment);
}
});

cursor = nextCursor;

if (cursor) {
// Be respectful to the API
// eslint-disable-next-line no-await-in-loop
await new Promise((resolve) => {
setTimeout(resolve, 100);
});
}
} while (cursor);

this.log.info(`[Content Fragment Insights] Collected ${fragments.length} fragments from ${this.rootPath}`);

this.fragments = fragments;
}

async fetchFragmentsPage({ cursor }) {
const options = {
cursor,
projection: 'minimal',
};

let result = { items: [], cursor: null };

for (let attempt = 0; attempt < AemAnalyzer.MAX_FETCH_ATTEMPTS; attempt += 1) {
try {
// eslint-disable-next-line no-await-in-loop
result = await this.aemClient.getFragments(this.rootPath, options);
break;
} catch (error) {
const isTimeout = error?.code === AemAnalyzer.ERROR_CODE_TIMEOUT;
const isTokenExpired = this.aemClient.isTokenExpired();

if (!isTimeout && !isTokenExpired) {
throw error;
}

if (isTimeout) {
this.log.warn(
`[Content Fragment Insights] Timeout while fetching fragment page. Retrying... attempt ${attempt + 1}/${AemAnalyzer.MAX_FETCH_ATTEMPTS}`,
);
} else if (isTokenExpired) {
this.log.warn(
`[Content Fragment Insights] Token expired. Refreshing and retrying... attempt ${attempt + 1}/${AemAnalyzer.MAX_FETCH_ATTEMPTS}`,
);
}
}
}

return result;
}
}
196 changes: 196 additions & 0 deletions src/content-fragment-insights/clients/aem-client.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,196 @@
/*
* Copyright 2025 Adobe. All rights reserved.
* This file is licensed to you under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License. You may obtain a copy
* of the License at http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software distributed under
* the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR REPRESENTATIONS
* OF ANY KIND, either express or implied. See the License for the specific language
* governing permissions and limitations under the License.
*/

import { ImsClient } from '@adobe/spacecat-shared-ims-client';
import { tracingFetch as fetch } from '@adobe/spacecat-shared-utils';

/**
* Client for interacting with Adobe Experience Manager (AEM) Sites API
* @see https://developer.adobe.com/experience-cloud/experience-manager-apis/api/stable/sites/
*/
export class AemClient {
static API_SITES_BASE = '/adobe/sites';

static API_SITES_FRAGMENTS = `${AemClient.API_SITES_BASE}/cf/fragments`;

constructor(baseUrl, imsClient, log = console) {
if (!baseUrl) {
throw new Error('baseUrl is required for AEM client');
}

if (!imsClient) {
throw new Error('imsClient is required for AEM client');
}

this.baseUrl = baseUrl;
this.imsClient = imsClient;
this.accessToken = null;
this.tokenObtainedAt = null;
this.log = log;
}

/**
* Factory method to create an AemClient from context
* @param {Object} context - The audit context
* @param {Object} context.site - The site object
* @param {Object} context.env - Environment variables
* @param {Object} context.log - Logger instance
* @returns {Promise<AemClient>}
*/
static async createFrom(context) {
const { site, env, log } = context;

const authorUrl = site.getDeliveryConfig().authorURL;
if (!authorUrl) {
throw new Error('AEM Author configuration missing: AEM Author URL required');
}

const imsClient = ImsClient.createFrom({
log,
env: {
IMS_HOST: env.IMS_HOST,
IMS_CLIENT_ID: env.IMS_CLIENT_ID,
IMS_CLIENT_CODE: env.IMS_CLIENT_CODE,
IMS_CLIENT_SECRET: env.IMS_CLIENT_SECRET,
IMS_SCOPE: env.IMS_SCOPE,
},
});

return new AemClient(authorUrl, imsClient, log);
}

/**
* Gets a valid service access token, fetching a new one if expired or missing.
* @returns {Promise<string>} The access token string
*/
async getAccessToken() {
if (this.isTokenExpired()) {
this.accessToken = await this.imsClient.getServiceAccessToken();
this.tokenObtainedAt = Date.now();
}
return this.accessToken.access_token;
}

/**
* Checks if the current access token is expired.
* @returns {boolean} True if the access token is expired, false otherwise.
*/
isTokenExpired() {
if (!this.accessToken || !this.tokenObtainedAt) {
this.invalidateAccessToken();
return true;
}

const expiresAt = this.tokenObtainedAt + (this.accessToken.expires_in * 1000);
const isExpired = Date.now() >= expiresAt;
if (isExpired) {
this.invalidateAccessToken();
}

return isExpired;
}

/**
* Invalidates the current access token, forcing a refresh on next request.
*/
invalidateAccessToken() {
this.accessToken = null;
this.tokenObtainedAt = null;
}

/**
* Generic request method for AEM API calls
* @private
* @param {string} method - HTTP method
* @param {string} path - API path
* @param {Object} options - Additional fetch options
* @returns {Promise<Object|null>}
*/
async request(method, path, options = {}) {
const url = `${this.baseUrl}${path}`;
const token = await this.getAccessToken();

const headers = {
Authorization: `Bearer ${token}`,
Accept: 'application/json',
...options.headers,
};

this.log.debug(`[AEM Client] ${method} ${url}`);

const response = await fetch(url, {
method,
headers,
...options,
});

if (!response.ok) {
const errorText = await response.text();
this.log.error(`[AEM Client] Request failed with status ${response.status}: ${errorText}`);
throw new Error(`AEM API request failed with status ${response.status}: ${errorText}`);
}

// Handle non-empty responses
const contentType = response.headers.get('content-type');
if (contentType?.includes('application/json')) {
return response.json();
}

return null;
}

/**
* List all content fragments
* @see https://developer.adobe.com/experience-cloud/experience-manager-apis/api/stable/sites/#operation/fragments/getFragments
*
* @param {string} path - The path to search for content fragments
* @param {Object} options - Query options
* @param {string} [options.cursor] - Pagination cursor
* @param {string} [options.projection='minimal'] - Response projection (minimal, full)
* @param {number} [options.limit] - Maximum items per page
* @returns {Promise<{items: Array, cursor: string|null}>}
*/
async getFragments(path, options = {}) {
const {
cursor = null,
projection = 'minimal',
limit,
} = options;

const params = new URLSearchParams({
path,
projection,
});

if (cursor) {
params.set('cursor', cursor);
}

if (limit) {
params.set('limit', limit.toString());
}

const queryPath = `${AemClient.API_SITES_FRAGMENTS}?${params.toString()}`;

try {
const data = await this.request('GET', queryPath);

return {
items: data?.items || [],
cursor: data?.cursor || null,
};
} catch (error) {
this.log.error(`[AEM Client] Failed to fetch fragments from ${path}:`, error);
throw error;
}
}
}
Loading