Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
72 changes: 54 additions & 18 deletions src/readability/opportunities/guidance-handler.js
Original file line number Diff line number Diff line change
Expand Up @@ -12,29 +12,54 @@

import { ok, notFound } from '@adobe/spacecat-shared-http-utils';

/**
* Enriches suggestion data with fileds required for auto-optimize.
*
* Adds the URL and transform rules required for auto-optimize
* based on the suggestion's properties.
*
* @param {Object} data - The suggestion data object.
* @returns {Object} The enriched data with auto-optimize fields.
*/
function enrichSuggestionDataForAutoOptimize(data) {
return {
...data,
url: data.pageUrl,
scrapedAt: new Date(data.scrapedAt).toISOString(),
transformRules: {
value: data.improvedText,
op: 'replace',
selector: data.selector,
target: 'ai-bots',
prerenderRequired: true,
},
};
}

/**
* Maps Mystique readability suggestions to opportunity format
* @param {Array} mystiquesuggestions - Array of suggestions from Mystique
* @returns {Array} Array of suggestions for opportunity
*/
function mapMystiqueSuggestionsToOpportunityFormat(mystiquesuggestions) {
return mystiquesuggestions.map((suggestion, index) => {
const suggestionId = `readability-opportunity-${suggestion.pageUrl || 'unknown'}-${index}`;

return {
id: suggestionId,
pageUrl: suggestion.pageUrl,
originalText: suggestion.original_paragraph,
improvedText: suggestion.improved_paragraph,
selector: suggestion.selector,
originalFleschScore: suggestion.current_flesch_score,
improvedFleschScore: suggestion.improved_flesch_score,
seoRecommendation: suggestion.seo_recommendation,
aiRationale: suggestion.ai_rationale,
targetFleschScore: suggestion.target_flesch_score,
type: 'READABILITY_IMPROVEMENT',
};
});
return mystiquesuggestions
.map((suggestion, index) => {
const suggestionId = `readability-opportunity-${suggestion.pageUrl || 'unknown'}-${index}`;

return {
id: suggestionId,
pageUrl: suggestion.pageUrl,
originalText: suggestion.original_paragraph,
improvedText: suggestion.improved_paragraph,
selector: suggestion.selector,
originalFleschScore: suggestion.current_flesch_score,
improvedFleschScore: suggestion.improved_flesch_score,
seoRecommendation: suggestion.seo_recommendation,
aiRationale: suggestion.ai_rationale,
targetFleschScore: suggestion.target_flesch_score,
type: 'READABILITY_IMPROVEMENT',
};
});
}

export default async function handler(message, context) {
Expand Down Expand Up @@ -92,6 +117,7 @@ export default async function handler(message, context) {
pageUrl: data.pageUrl || auditUrl,
originalText: data.original_paragraph,
improvedText: data.improved_paragraph,
selector: data.selector,
originalFleschScore: data.current_flesch_score,
improvedFleschScore: data.improved_flesch_score,
seoRecommendation: data.seo_recommendation,
Expand Down Expand Up @@ -131,6 +157,13 @@ export default async function handler(message, context) {
if (matchingSuggestion) {
return async () => {
try {
// If improvedText is empty or null, remove the suggestion instead of updating
if (!mystiquesuggestion.improvedText || mystiquesuggestion.improvedText.trim() === '') {
await matchingSuggestion.remove();
log.warn(`[readability-opportunity guidance]: Removed suggestion ${matchingSuggestion.getId()} because Mystique 'improvedText' is empty`);
return true;
}

// Update the existing suggestion with AI improvements
const currentData = matchingSuggestion.getData();
const updatedData = {
Expand All @@ -145,7 +178,10 @@ export default async function handler(message, context) {
mystiqueProcessingCompleted: new Date().toISOString(),
};

await matchingSuggestion.setData(updatedData);
// Enrich with auto-optimize data only after validating improvedText
const enrichedData = enrichSuggestionDataForAutoOptimize(updatedData);

await matchingSuggestion.setData(enrichedData);
await matchingSuggestion.save();

log.info(`[readability-opportunity guidance]: Updated suggestion ${matchingSuggestion.getId()} with AI improvements`);
Expand Down
25 changes: 13 additions & 12 deletions src/readability/opportunities/handler.js
Original file line number Diff line number Diff line change
Expand Up @@ -147,23 +147,19 @@ export async function processReadabilityOpportunities(context) {
},
);

// Prepare suggestions data for database
const suggestions = readabilityIssues.map((issue, index) => {
// Prepare suggestions data for database (raw data format for syncSuggestions)
const suggestionsData = readabilityIssues.map((issue, index) => {
// Extract only the fields needed for display (exclude full textContent)
const {
textContent,
...issueWithoutFullText
} = issue;

return {
opportunityId: opportunity.getId(),
type: SuggestionModel.TYPES.CONTENT_UPDATE,
rank: issue.rank, // Use the rank already calculated in analysis
data: {
...issueWithoutFullText,
id: `readability-${siteId}-${index}`,
textPreview: textContent?.substring(0, 500),
},
...issueWithoutFullText,
scrapedAt: new Date(issue.scrapedAt).toISOString(),
id: `readability-${siteId}-${index}`,
textPreview: textContent?.substring(0, 500),
};
});

Expand All @@ -172,10 +168,15 @@ export async function processReadabilityOpportunities(context) {

await syncSuggestions({
opportunity,
newData: suggestions,
newData: suggestionsData,
context,
buildKey,
mapNewSuggestion: (suggestion) => suggestion,
mapNewSuggestion: (data) => ({
opportunityId: opportunity.getId(),
type: SuggestionModel.TYPES.CONTENT_UPDATE,
rank: data.rank,
data,
}),
});

// Send to Mystique for AI-powered readability improvements
Expand Down
23 changes: 19 additions & 4 deletions src/readability/shared/analysis-utils.js
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,7 @@ async function analyzeTextReadability(
detectedLanguages,
getSupportedLanguage,
log,
scrapedAt,
) {
try {
// Check if text is in a supported language
Expand Down Expand Up @@ -107,6 +108,7 @@ async function analyzeTextReadability(

return {
pageUrl,
scrapedAt,
selector,
textContent: text,
displayText,
Expand Down Expand Up @@ -165,7 +167,7 @@ const getMeaningfulElementsForReadability = ($) => {
* @returns {Promise<Array>} Array of readability issue objects for text elements
* with poor readability.
*/
export async function analyzePageContent(rawBody, pageUrl, traffic, log) {
export async function analyzePageContent(rawBody, pageUrl, traffic, log, scrapedAt) {
const readabilityIssues = [];

try {
Expand Down Expand Up @@ -236,6 +238,7 @@ export async function analyzePageContent(rawBody, pageUrl, traffic, log) {
detectedLanguages,
getSupportedLanguage,
log,
scrapedAt,
);
analysisPromises.push(analysisPromise);
});
Expand All @@ -248,6 +251,7 @@ export async function analyzePageContent(rawBody, pageUrl, traffic, log) {
detectedLanguages,
getSupportedLanguage,
log,
scrapedAt,
);
analysisPromises.push(analysisPromise);
}
Expand Down Expand Up @@ -279,7 +283,18 @@ export async function analyzePageContent(rawBody, pageUrl, traffic, log) {
}

/**
* Analyzes readability for all scraped pages from S3
/**
* Analyzes readability for all scraped pages from S3.
*
* Fetches all scraped page objects for the specified site from S3, analyzes the readability
* of each page's content, and returns the combined list of readability issues found as well
* as the number of processed URLs.
*
* @param {AWS.S3} s3Client - The AWS S3 client instance.
* @param {string} bucketName - The name of the S3 bucket containing scraped pages.
* @param {string} siteId - The site ID whose pages should be analyzed.
* @param {Object} log - Logger instance for info, warn, and error messages.
* @returns {Promise<Object>} The analysis result.
*/
export async function analyzePageReadability(s3Client, bucketName, siteId, log) {
try {
Expand Down Expand Up @@ -307,12 +322,12 @@ export async function analyzePageReadability(s3Client, bucketName, siteId, log)
return { issues: [], processed: false };
}

const { finalUrl, scrapeResult: { rawBody } } = scrapedData;
const { finalUrl, scrapeResult: { rawBody }, scrapedAt } = scrapedData;

// Extract page traffic data if available
const traffic = extractTrafficFromKey(key) || 0;

const pageIssues = await analyzePageContent(rawBody, finalUrl, traffic, log);
const pageIssues = await analyzePageContent(rawBody, finalUrl, traffic, log, scrapedAt);

return {
issues: pageIssues,
Expand Down