diff --git a/backend/api/onboarding_utils/onboarding_completion_service.py b/backend/api/onboarding_utils/onboarding_completion_service.py index 350c0b4b..0dab86e1 100644 --- a/backend/api/onboarding_utils/onboarding_completion_service.py +++ b/backend/api/onboarding_utils/onboarding_completion_service.py @@ -16,6 +16,7 @@ from services.persona.facebook.facebook_persona_scheduler import schedule_facebook_persona_generation from services.oauth_token_monitoring_service import create_oauth_monitoring_tasks from services.onboarding.unified_oauth_validator import UnifiedOAuthValidator +from services.platform_insights_monitoring_service import create_platform_insights_task class OnboardingCompletionService: """Service for handling onboarding completion logic.""" @@ -84,6 +85,31 @@ async def complete_onboarding(self, current_user: Dict[str, Any]) -> Dict[str, A # Non-critical: log but don't fail onboarding completion logger.warning(f"Failed to create OAuth token monitoring tasks for user {user_id}: {e}") + + # Create platform insights tasks (GSC/Bing) after onboarding completion + try: + from services.database import SessionLocal + db = SessionLocal() + try: + connection_summary = self.oauth_validator.get_connection_summary(user_id) + platform_ids = [p.get('provider') for p in connection_summary.get('platforms', []) if p.get('status') == 'active'] + created_platform_tasks = [] + for platform in platform_ids: + if platform in {'gsc', 'bing'}: + task_res = create_platform_insights_task( + user_id=user_id, + platform=platform, + site_url=None, + db=db + ) + if task_res.get('success'): + created_platform_tasks.append(task_res.get('task_id')) + logger.info(f"Created/verified platform insights tasks for user {user_id}: {created_platform_tasks}") + finally: + db.close() + except Exception as e: + logger.warning(f"Failed to create platform insights tasks for user {user_id}: {e}") + # Create website analysis tasks for user's website and competitors try: from services.database import SessionLocal diff --git a/backend/routers/gsc_auth.py b/backend/routers/gsc_auth.py index f0ddac2d..abb98a20 100644 --- a/backend/routers/gsc_auth.py +++ b/backend/routers/gsc_auth.py @@ -21,6 +21,7 @@ # Initialize GSC service (for backward compatibility) from services.gsc_service import GSCService +from services.gsc_task_report_service import GSCTaskReportService gsc_service = GSCService() @@ -68,6 +69,19 @@ class GSCCachedOpportunitiesResponse(BaseModel): opportunities: List[Dict[str, Any]] generated_from_cache: bool + +class GSCTaskReportResponse(BaseModel): + connected: bool + site_url: Optional[str] = None + generated_at: Optional[str] = None + sections: List[Dict[str, Any]] + google_query_templates: List[str] + + +class GSCRunTaskRequest(BaseModel): + task_key: str + site_url: Optional[str] = None + @router.get("/auth/url") async def get_gsc_auth_url(request: Request, user: dict = Depends(get_current_user)): """ @@ -508,3 +522,43 @@ async def gsc_health_check(): except Exception as e: logger.error(f"GSC health check failed: {e}") raise HTTPException(status_code=500, detail="GSC service unhealthy") + + +@router.get("/task-reports", response_model=GSCTaskReportResponse) +async def get_gsc_task_reports( + site_url: Optional[str] = Query(None, description="Optional GSC site URL"), + user: dict = Depends(get_current_user) +): + """Get issue 1-4 task sections for onboarding and SEO dashboard widgets.""" + try: + user_id = user.get('id') + if not user_id: + raise HTTPException(status_code=400, detail="User ID not found") + + service = GSCTaskReportService() + return service.build_task_report(user_id=str(user_id), site_url=site_url) + except Exception as e: + logger.error(f"Error getting GSC task reports: {e}") + raise HTTPException(status_code=500, detail=f"Error getting task reports: {str(e)}") + + +@router.post("/task-reports/run") +async def run_gsc_task_report( + request: GSCRunTaskRequest, + user: dict = Depends(get_current_user) +): + """Run one issue task once (onboarding learn mode).""" + try: + user_id = user.get('id') + if not user_id: + raise HTTPException(status_code=400, detail="User ID not found") + + service = GSCTaskReportService() + return service.run_single_task( + user_id=str(user_id), + task_key=request.task_key, + site_url=request.site_url + ) + except Exception as e: + logger.error(f"Error running GSC task report: {e}") + raise HTTPException(status_code=500, detail=f"Error running task report: {str(e)}") diff --git a/backend/services/gsc_query_request_shapes_tests.py b/backend/services/gsc_query_request_shapes_tests.py new file mode 100644 index 00000000..6061e5de --- /dev/null +++ b/backend/services/gsc_query_request_shapes_tests.py @@ -0,0 +1,41 @@ +from services.gsc_service import GSCService + + +class MinimalGSCService(GSCService): + def __init__(self): + # Skip DB/table init for pure request-shape tests. + pass + + +def test_build_query_request_uses_documented_bounds_and_type(): + svc = MinimalGSCService() + + req = svc._build_search_analytics_request( + start_date="2026-01-01", + end_date="2026-01-31", + dimensions=["query"], + row_limit=100000, + start_row=-10, + ) + + assert req["startDate"] == "2026-01-01" + assert req["endDate"] == "2026-01-31" + assert req["type"] == "web" + assert req["dimensions"] == ["query"] + assert req["startRow"] == 0 + assert req["rowLimit"] == 25000 + + +def test_build_overall_request_omits_dimensions_for_aggregate_totals(): + svc = MinimalGSCService() + + req = svc._build_search_analytics_request( + start_date="2026-01-01", + end_date="2026-01-31", + dimensions=None, + row_limit=1, + ) + + assert "dimensions" not in req + assert req["rowLimit"] == 1 + assert req["type"] == "web" diff --git a/backend/services/gsc_service.py b/backend/services/gsc_service.py index 8f0591f9..facbb323 100644 --- a/backend/services/gsc_service.py +++ b/backend/services/gsc_service.py @@ -15,6 +15,8 @@ class GSCService: """Service for Google Search Console integration.""" + DEFAULT_SEARCH_TYPE = 'web' + MAX_ROW_LIMIT = 25000 def __init__(self): """Initialize GSC service with database connection.""" @@ -342,11 +344,12 @@ def get_search_analytics(self, user_id: str, site_url: str, return {'error': 'Authentication failed', 'rows': [], 'rowCount': 0} # Step 1: Verify data presence first (as per GSC API documentation) - verification_request = { - 'startDate': start_date, - 'endDate': end_date, - 'dimensions': ['date'] # Only date dimension for verification - } + verification_request = self._build_search_analytics_request( + start_date=start_date, + end_date=end_date, + dimensions=['date'], + row_limit=self.MAX_ROW_LIMIT, + ) logger.info(f"GSC Data verification request for user {user_id}: {verification_request}") @@ -371,12 +374,12 @@ def get_search_analytics(self, user_id: str, site_url: str, return {'error': f'Data verification failed: {str(verification_error)}', 'rows': [], 'rowCount': 0} # Step 2: Get overall metrics (no dimensions) - request = { - 'startDate': start_date, - 'endDate': end_date, - 'dimensions': [], # No dimensions for overall metrics - 'rowLimit': 1000 - } + request = self._build_search_analytics_request( + start_date=start_date, + end_date=end_date, + dimensions=None, # Aggregated totals (no dimensions) + row_limit=1, + ) logger.info(f"GSC API request for user {user_id}: {request}") @@ -392,12 +395,12 @@ def get_search_analytics(self, user_id: str, site_url: str, return {'error': str(api_error), 'rows': [], 'rowCount': 0} # Step 3: Get query-level data for insights (as per documentation) - query_request = { - 'startDate': start_date, - 'endDate': end_date, - 'dimensions': ['query'], # Get query-level data - 'rowLimit': 1000 - } + query_request = self._build_search_analytics_request( + start_date=start_date, + end_date=end_date, + dimensions=['query'], + row_limit=self.MAX_ROW_LIMIT, + ) logger.info(f"GSC Query-level request for user {user_id}: {query_request}") @@ -458,6 +461,38 @@ def get_search_analytics(self, user_id: str, site_url: str, except Exception as e: logger.error(f"Error getting search analytics for user {user_id}: {e}") raise + + def _build_search_analytics_request( + self, + start_date: str, + end_date: str, + dimensions: Optional[List[str]] = None, + row_limit: Optional[int] = None, + start_row: int = 0, + search_type: Optional[str] = None, + ) -> Dict[str, Any]: + """Build a GSC Search Analytics request body aligned with API documentation. + + Notes: + - `dimensions` is optional; omit it entirely for aggregated totals. + - `rowLimit` max is 25,000 per API call. + - `type` defaults to `web` when not specified. + """ + request: Dict[str, Any] = { + 'startDate': start_date, + 'endDate': end_date, + 'type': search_type or self.DEFAULT_SEARCH_TYPE, + 'startRow': max(start_row, 0), + } + + if dimensions: + request['dimensions'] = dimensions + + if row_limit is not None: + bounded_row_limit = max(1, min(int(row_limit), self.MAX_ROW_LIMIT)) + request['rowLimit'] = bounded_row_limit + + return request def get_sitemaps(self, user_id: str, site_url: str) -> List[Dict[str, Any]]: """Get sitemaps from GSC.""" diff --git a/backend/services/gsc_task_report_service.py b/backend/services/gsc_task_report_service.py new file mode 100644 index 00000000..4ad71357 --- /dev/null +++ b/backend/services/gsc_task_report_service.py @@ -0,0 +1,196 @@ +"""GSC task report service for onboarding step-5 and SEO dashboard sections.""" + +from __future__ import annotations + +from datetime import datetime, timedelta +from typing import Any, Dict, List, Optional + +from loguru import logger + +from services.gsc_service import GSCService + + +class GSCTaskReportService: + """Builds report payloads for issue 1-4 task sections.""" + + GOOGLE_QUERY_TEMPLATES = [ + 'site:{domain} "{query}"', + 'site:{domain} intitle:"{query}"', + 'site:{domain} inurl:{topic}', + '"{query}" site:{competitor}', + 'intitle:"{query}" "{competitor}"', + 'related:{domain}', + 'site:{domain} -inurl:tag -inurl:category "{query}"' + ] + + def __init__(self): + self.gsc_service = GSCService() + + def _get_site_url(self, user_id: str, site_url: Optional[str]) -> Optional[str]: + if site_url: + return site_url + sites = self.gsc_service.get_site_list(user_id) + if not sites: + return None + return sites[0].get("siteUrl") + + def _fetch_query_rows(self, user_id: str, site_url: str, days: int = 30) -> List[Dict[str, Any]]: + end_date = datetime.now().strftime('%Y-%m-%d') + start_date = (datetime.now() - timedelta(days=days)).strftime('%Y-%m-%d') + return self._fetch_query_rows_range(user_id, site_url, start_date, end_date) + + def _fetch_query_rows_range( + self, + user_id: str, + site_url: str, + start_date: str, + end_date: str, + ) -> List[Dict[str, Any]]: + data = self.gsc_service.get_search_analytics( + user_id=user_id, + site_url=site_url, + start_date=start_date, + end_date=end_date, + ) + return data.get("query_data", {}).get("rows", []) + + def _high_impression_low_ctr(self, rows: List[Dict[str, Any]]) -> List[Dict[str, Any]]: + opportunities: List[Dict[str, Any]] = [] + for row in rows: + keys = row.get("keys", []) + query = keys[0] if keys else None + impressions = float(row.get("impressions", 0) or 0) + ctr = float(row.get("ctr", 0) or 0) + position = float(row.get("position", 0) or 0) + if query and impressions >= 100 and ctr < 0.03 and 1 <= position <= 20: + opportunities.append({ + "query": query, + "impressions": int(impressions), + "clicks": int(float(row.get("clicks", 0) or 0)), + "ctr": round(ctr * 100, 2), + "position": round(position, 2), + "recommended_action": "Rewrite title/meta + align H1 intro to intent", + }) + return sorted(opportunities, key=lambda x: x["impressions"], reverse=True)[:10] + + def _decay_summary(self, current: List[Dict[str, Any]], previous: List[Dict[str, Any]]) -> Dict[str, Any]: + current_map = {((r.get("keys") or [None])[0]): r for r in current if r.get("keys")} + previous_map = {((r.get("keys") or [None])[0]): r for r in previous if r.get("keys")} + decayed = 0 + samples = [] + for query, row in current_map.items(): + if not query or query not in previous_map: + continue + curr_clicks = float(row.get("clicks", 0) or 0) + prev_clicks = float(previous_map[query].get("clicks", 0) or 0) + curr_ctr = float(row.get("ctr", 0) or 0) + prev_ctr = float(previous_map[query].get("ctr", 0) or 0) + curr_pos = float(row.get("position", 0) or 0) + prev_pos = float(previous_map[query].get("position", 0) or 0) + if prev_clicks > 0 and (curr_clicks < prev_clicks * 0.8 or curr_ctr < prev_ctr - 0.01 or curr_pos > prev_pos + 1): + decayed += 1 + samples.append({ + "query": query, + "clicks_delta_pct": round(((curr_clicks - prev_clicks) / prev_clicks) * 100, 2), + "ctr_delta_pp": round((curr_ctr - prev_ctr) * 100, 2), + "position_delta": round(curr_pos - prev_pos, 2), + }) + return {"decayed_queries": decayed, "samples": samples[:8]} + + def build_task_report(self, user_id: str, site_url: Optional[str] = None) -> Dict[str, Any]: + selected_site = self._get_site_url(user_id, site_url) + if not selected_site: + return { + "connected": False, + "site_url": site_url, + "message": "No GSC site connected", + "sections": [], + "google_query_templates": self.GOOGLE_QUERY_TEMPLATES, + } + + # Compare two equivalent windows: current 30d vs previous 30d. + current_end = datetime.now().date() + current_start = current_end - timedelta(days=30) + prev_end = current_start + prev_start = prev_end - timedelta(days=30) + + rows_30d = self._fetch_query_rows_range( + user_id, + selected_site, + current_start.strftime('%Y-%m-%d'), + current_end.strftime('%Y-%m-%d'), + ) + rows_prev_30d = self._fetch_query_rows_range( + user_id, + selected_site, + prev_start.strftime('%Y-%m-%d'), + prev_end.strftime('%Y-%m-%d'), + ) + + issue1 = self._high_impression_low_ctr(rows_30d) + issue2 = self._decay_summary(rows_30d, rows_prev_30d) + + sections = [ + { + "issue_key": "issue_1", + "title": "Keyword-to-brief opportunities", + "description": "High impression / low CTR queries to refresh titles/meta/outlines.", + "metrics": { + "opportunities_count": len(issue1), + "top_impression": issue1[0]["impressions"] if issue1 else 0, + }, + "items": issue1, + }, + { + "issue_key": "issue_2", + "title": "Intent-aware refresh queue", + "description": "Weekly decayed queries and recommended rewrites.", + "metrics": { + "decayed_queries": issue2["decayed_queries"], + "sample_count": len(issue2["samples"]), + }, + "items": issue2["samples"], + }, + { + "issue_key": "issue_3", + "title": "Property-aware publishing guardrails", + "description": "Publishing guidance requires GSC + CMS integrations.", + "metrics": { + "guardrails_enabled": True, + "required_integrations": ["gsc", "wordpress_or_wix"], + }, + "items": [], + }, + { + "issue_key": "issue_4", + "title": "Google query monitoring templates", + "description": "Exact Google query templates for SERP checks and AI report prompts.", + "metrics": { + "templates_count": len(self.GOOGLE_QUERY_TEMPLATES), + }, + "items": [{"query_template": q} for q in self.GOOGLE_QUERY_TEMPLATES], + }, + ] + + return { + "connected": True, + "site_url": selected_site, + "generated_at": datetime.utcnow().isoformat(), + "sections": sections, + "google_query_templates": self.GOOGLE_QUERY_TEMPLATES, + } + + def run_single_task(self, user_id: str, task_key: str, site_url: Optional[str] = None) -> Dict[str, Any]: + report = self.build_task_report(user_id, site_url) + sections = report.get("sections", []) + selected = next((s for s in sections if s.get("issue_key") == task_key), None) + if not selected: + return {"success": False, "error": f"Unknown task key: {task_key}"} + logger.info("[GSCTaskReportService] run_single_task user={} task={}", user_id, task_key) + return { + "success": True, + "task_key": task_key, + "site_url": report.get("site_url"), + "result": selected, + "generated_at": report.get("generated_at"), + } diff --git a/docs/gsc_step5_dashboard_design_review.md b/docs/gsc_step5_dashboard_design_review.md new file mode 100644 index 00000000..e70f2015 --- /dev/null +++ b/docs/gsc_step5_dashboard_design_review.md @@ -0,0 +1,52 @@ +# GSC Step-5 + SEO Dashboard Task Reporting: Design Review (Issues 1–4) + +## Scope +This document summarizes final design considerations, issues solved, and exact code changes for the GSC task-reporting implementation used by: +- Onboarding Step 5 (optional “run once” testing UX) +- SEO Dashboard (continuous monitoring visibility) + +## Issues Addressed +1. **Issue 1 – Task bootstrapping after onboarding** + - Ensured onboarding completion creates/verifies platform-insights tasks for connected `gsc`/`bing`. + +2. **Issue 2 – GSC query request correctness & scale** + - Standardized Search Analytics request bodies with documented fields (`type`, `rowLimit` cap, `startRow`) and removed ambiguous aggregate request shape (`dimensions: []`). + +3. **Issue 3 – Opportunity/decay reliability** + - Fixed decay comparison to use equivalent windows (current 30d vs previous 30d), not a sliced 60d dataset. + +4. **Issue 4 – Shared reporting contract + exact query templates** + - Preserved a single backend contract for task sections and exact Google query templates reused in onboarding/dashboard UI. + +## Design Considerations +- **Single source of truth**: `GSCTaskReportService` remains the only section-construction service consumed by both UI surfaces. +- **Docs-aligned GSC requests**: request builder enforces valid defaults and bounded limits for stable behavior. +- **Non-blocking onboarding**: task creation is best-effort and logged; onboarding completion is not failed by monitoring setup issues. +- **Composable UI**: `GSCTaskReportsPanel` is shared to avoid duplicated behavior and drift. + +## Specific Code Edits +- **GSC request builder + docs-aligned usage** + - `backend/services/gsc_service.py` + - Added `_build_search_analytics_request(...)` and switched verification/aggregate/query requests to use it. + - Added constants: `DEFAULT_SEARCH_TYPE='web'`, `MAX_ROW_LIMIT=25000`. + +- **Correct period-over-period decay logic** + - `backend/services/gsc_task_report_service.py` + - Added `_fetch_query_rows_range(...)`. + - Updated comparison windows to current 30d vs previous 30d. + +- **Task report API contract (already in previous commit; retained)** + - `backend/routers/gsc_auth.py` + - `GET /gsc/task-reports`, `POST /gsc/task-reports/run`. + +- **Onboarding task bootstrap (already in previous commit; retained)** + - `backend/api/onboarding_utils/onboarding_completion_service.py` + +- **Validation tests added** + - `backend/services/gsc_query_request_shapes_tests.py` + - Verifies request shape constraints and aggregate request behavior. + +## Validation Performed +- Unit tests for GSC query request shapes and constraints. +- Python compilation checks for modified backend modules. + diff --git a/frontend/src/api/gsc.ts b/frontend/src/api/gsc.ts index da7d1261..d404cecc 100644 --- a/frontend/src/api/gsc.ts +++ b/frontend/src/api/gsc.ts @@ -58,6 +58,22 @@ export interface GSCDataQualityResponse { }; } + + +export interface GSCTaskReportResponse { + connected: boolean; + site_url?: string; + generated_at?: string; + sections: Array<{ + issue_key: string; + title: string; + description: string; + metrics: Record; + items: any[]; + }>; + google_query_templates: string[]; +} + export interface GSCCachedOpportunitiesResponse { site_url: string; opportunities: Array<{ @@ -229,6 +245,25 @@ class GSCAPI { return response.data; } + + + async getTaskReports(siteUrl?: string): Promise { + const client = await this.getAuthenticatedClient(); + const response = await client.get(`${this.baseUrl}/task-reports`, { + params: siteUrl ? { site_url: siteUrl } : undefined + }); + return response.data; + } + + async runTaskReport(taskKey: string, siteUrl?: string): Promise<{ success: boolean; result?: any }> { + const client = await this.getAuthenticatedClient(); + const response = await client.post(`${this.baseUrl}/task-reports/run`, { + task_key: taskKey, + site_url: siteUrl + }); + return response.data; + } + /** * Health check */ diff --git a/frontend/src/components/OnboardingWizard/IntegrationsStep.tsx b/frontend/src/components/OnboardingWizard/IntegrationsStep.tsx index 832cb374..6b0fc3e1 100644 --- a/frontend/src/components/OnboardingWizard/IntegrationsStep.tsx +++ b/frontend/src/components/OnboardingWizard/IntegrationsStep.tsx @@ -32,6 +32,7 @@ import { useBingOAuth } from '../../hooks/useBingOAuth'; import { useGSCConnection } from './common/useGSCConnection'; import { usePlatformConnections } from './common/usePlatformConnections'; import PlatformAnalytics from '../shared/PlatformAnalytics'; +import GSCTaskReportsPanel from '../shared/GSCTaskReportsPanel'; import { cachedAnalyticsAPI } from '../../api/cachedAnalytics'; import { gscAPI, type GSCDataQualityResponse, type GSCCachedOpportunitiesResponse } from '../../api/gsc'; @@ -502,6 +503,20 @@ const IntegrationsStep: React.FC = ({ onContinue, updateH )} + + + {/* Optional Step-5 task testing UI (shared with SEO dashboard) */} + {connectedPlatforms.includes('gsc') && ( + +
+ +
+
+ )} + {/* Social Media Platforms */}
diff --git a/frontend/src/components/SEODashboard/SEODashboard.tsx b/frontend/src/components/SEODashboard/SEODashboard.tsx index 0b24e355..2a705ed8 100644 --- a/frontend/src/components/SEODashboard/SEODashboard.tsx +++ b/frontend/src/components/SEODashboard/SEODashboard.tsx @@ -39,6 +39,7 @@ import useSEOCopilotStore from '../../stores/seoCopilotStore'; // GSC Components import GSCLoginButton from './components/GSCLoginButton'; +import GSCTaskReportsPanel from '../shared/GSCTaskReportsPanel'; // Zustand store import { useSEODashboardStore } from '../../stores/seoDashboardStore'; @@ -693,6 +694,10 @@ const SEODashboard: React.FC = () => { }} showBackgroundJobs={showBackgroundJobs} /> + + {platformStatus.gsc.connected && ( + + )} {/* Enhanced Metrics with Tooltips */} diff --git a/frontend/src/components/shared/GSCTaskReportsPanel.tsx b/frontend/src/components/shared/GSCTaskReportsPanel.tsx new file mode 100644 index 00000000..97fd90ae --- /dev/null +++ b/frontend/src/components/shared/GSCTaskReportsPanel.tsx @@ -0,0 +1,102 @@ +import React, { useEffect, useState } from 'react'; +import { + Box, + Paper, + Typography, + Button, + CircularProgress, + Alert, + Chip, + Stack, + Divider +} from '@mui/material'; +import { gscAPI } from '../../api/gsc'; + +interface Props { + siteUrl?: string; + compact?: boolean; + title?: string; +} + +const GSCTaskReportsPanel: React.FC = ({ siteUrl, compact = false, title = 'GSC Task Reports (Issues 1-4)' }) => { + const [loading, setLoading] = useState(false); + const [error, setError] = useState(null); + const [data, setData] = useState(null); + const [runningTask, setRunningTask] = useState(null); + + const load = async () => { + setLoading(true); + setError(null); + try { + const response = await gscAPI.getTaskReports(siteUrl); + setData(response); + } catch (e: any) { + setError(e?.message || 'Failed to load task reports'); + } finally { + setLoading(false); + } + }; + + const runTask = async (taskKey: string) => { + setRunningTask(taskKey); + try { + await gscAPI.runTaskReport(taskKey, siteUrl); + await load(); + } catch (e) { + console.error('Failed to run task:', e); + } finally { + setRunningTask(null); + } + }; + + useEffect(() => { + load(); + }, [siteUrl]); + + return ( + + + {title} + + + + {loading && } + {error && {error}} + {data?.connected === false && Connect GSC and choose a property to preview these tasks.} + + {data?.sections?.map((section: any) => ( + + + + {section.title} + {section.description} + + + + + + {Object.entries(section.metrics || {}).map(([k, v]) => ( + + ))} + + + {!compact && Array.isArray(section.items) && section.items.slice(0, 3).map((item: any, idx: number) => ( + + • {item.query || item.query_template || JSON.stringify(item)} + + ))} + + + ))} + + ); +}; + +export default GSCTaskReportsPanel;