diff --git a/backend/routers/gsc_auth.py b/backend/routers/gsc_auth.py index f117f1a9..f0ddac2d 100644 --- a/backend/routers/gsc_auth.py +++ b/backend/routers/gsc_auth.py @@ -4,12 +4,12 @@ All legacy endpoints are preserved with deprecation warnings for backward compatibility. """ -from fastapi import APIRouter, HTTPException, Depends, Query -from fastapi.responses import HTMLResponse, JSONResponse +from fastapi import APIRouter, HTTPException, Depends, Query, Request +from fastapi.responses import HTMLResponse from typing import Dict, List, Any, Optional from pydantic import BaseModel from loguru import logger -import os +from urllib.parse import parse_qs, urlparse # Import unified OAuth client for migration from services.integrations.registry import get_provider @@ -23,6 +23,9 @@ from services.gsc_service import GSCService gsc_service = GSCService() + +DEPRECATED_GSC_LEGACY_ROUTES_REMOVAL_DATE = "2026-06-30" + def _get_gsc_postmessage_origin() -> str: return get_trusted_origins_for_redirect("GSC", "GSC_REDIRECT_URI")[0] @@ -49,15 +52,31 @@ class GSCCallbackResponse(BaseModel): connected: Optional[bool] = None site_count: Optional[int] = None + +class GSCDataQualityResponse(BaseModel): + site_url: str + permission_level: Optional[str] = None + has_sufficient_permission: bool + data_days_available: int + data_window_start: Optional[str] = None + data_window_end: Optional[str] = None + indexing_health: Dict[str, Any] + + +class GSCCachedOpportunitiesResponse(BaseModel): + site_url: str + opportunities: List[Dict[str, Any]] + generated_from_cache: bool + @router.get("/auth/url") -async def get_gsc_auth_url(user: dict = Depends(get_current_user)): +async def get_gsc_auth_url(request: Request, user: dict = Depends(get_current_user)): """ Get Google Search Console OAuth authorization URL. @deprecated Use unified OAuth client: unifiedOAuthClient.getAuthUrl('gsc') This method is preserved for backward compatibility but will be removed in future versions. """ - console.warn('GSC Router: get_gsc_auth_url() is deprecated. Use unifiedOAuthClient.getAuthUrl("gsc") instead') + logger.warning(f'GSC Router: /gsc/auth/url is deprecated; use /oauth/gsc/auth. Planned removal after {DEPRECATED_GSC_LEGACY_ROUTES_REMOVAL_DATE}') try: user_id = user.get('id') @@ -66,32 +85,29 @@ async def get_gsc_auth_url(user: dict = Depends(get_current_user)): logger.info(f"Generating GSC OAuth URL for user: {user_id}") - # Use unified OAuth client for consistent patterns - from frontend.src.api.unifiedOAuth import unifiedOAuthClient - unified_client = unifiedOAuthClient() - - try: - auth_response = await unified_client.getAuthUrl('gsc') - - return { - "auth_url": auth_response.auth_url, - "state": auth_response.state, - "trusted_origins": auth_response.trusted_origins - } - except Exception as unified_error: - # Fall back to legacy service if unified client fails - logger.warning(f"Unified client failed, falling back to legacy GSC service: {unified_error}") - - auth_url = gsc_service.get_oauth_url(user_id) - - return { - "auth_url": auth_url, - "state": gsc_service.get_oauth_state(user_id), - "trusted_origins": get_trusted_origins_for_redirect("GSC", "GSC_REDIRECT_URI") - } + provider = get_provider("gsc") + if not provider: + raise HTTPException(status_code=500, detail="GSC provider not registered") + + auth_payload = provider.get_auth_url(user_id) + if not auth_payload.auth_url: + raise HTTPException(status_code=500, detail="Failed to generate GSC OAuth URL") + + parsed_query = parse_qs(urlparse(auth_payload.auth_url).query) + state = parsed_query.get("state", [auth_payload.state or ""])[0] + + trusted_origins = get_trusted_origins_for_redirect("GSC", "GSC_REDIRECT_URI") + backend_origin = str(request.base_url).rstrip("/") + if backend_origin not in trusted_origins: + trusted_origins.append(backend_origin) + + return { + "auth_url": auth_payload.auth_url, + "state": state, + "trusted_origins": trusted_origins + } logger.info(f"GSC OAuth URL generated successfully for user: {user_id}") - logger.info(f"OAuth URL: {auth_url[:100]}...") except Exception as e: logger.error(f"Error generating GSC OAuth URL: {e}") @@ -153,7 +169,10 @@ async def handle_gsc_callback(

Connection Successful. You can close this window.

@@ -169,7 +188,10 @@ async def handle_gsc_callback(

Connection Failed. Please close this window and try again.

@@ -186,7 +208,10 @@ async def handle_gsc_callback(

Connection Error. Please close this window and try again.

{str(e)}
@@ -212,6 +237,111 @@ async def get_gsc_sites(user: dict = Depends(get_current_user)): logger.error(f"Error getting GSC sites: {e}") raise HTTPException(status_code=500, detail=f"Error getting sites: {str(e)}") + +@router.get("/data-quality", response_model=GSCDataQualityResponse) +async def get_gsc_data_quality( + site_url: str = Query(..., description="GSC site URL"), + user: dict = Depends(get_current_user) +): + """Get immediate data quality checks for onboarding UX.""" + try: + user_id = user.get('id') + if not user_id: + raise HTTPException(status_code=400, detail="User ID not found") + + sites = gsc_service.get_site_list(user_id) + selected_site = next((s for s in sites if s.get("siteUrl") == site_url), None) + permission_level = selected_site.get("permissionLevel") if selected_site else None + has_sufficient_permission = permission_level in {"siteOwner", "siteFullUser", "siteRestrictedUser"} + + analytics = gsc_service.get_search_analytics(user_id=user_id, site_url=site_url) + verification_rows = analytics.get("verification_data", {}).get("rows", []) + date_keys = [row.get("keys", [None])[0] for row in verification_rows if row.get("keys")] + date_keys = [d for d in date_keys if isinstance(d, str)] + + data_window_start = min(date_keys) if date_keys else None + data_window_end = max(date_keys) if date_keys else None + + sitemaps = gsc_service.get_sitemaps(user_id, site_url) + submitted = 0 + indexed = 0 + for sitemap in sitemaps: + for content in sitemap.get("contents", []) or []: + try: + submitted += int(content.get("submitted", 0) or 0) + indexed += int(content.get("indexed", 0) or 0) + except (TypeError, ValueError): + continue + + indexing_ratio = round((indexed / submitted) * 100, 2) if submitted > 0 else None + + return GSCDataQualityResponse( + site_url=site_url, + permission_level=permission_level, + has_sufficient_permission=has_sufficient_permission, + data_days_available=len(date_keys), + data_window_start=data_window_start, + data_window_end=data_window_end, + indexing_health={ + "submitted_urls": submitted, + "indexed_urls": indexed, + "indexing_ratio": indexing_ratio, + "sitemaps_count": len(sitemaps) + } + ) + except Exception as e: + logger.error(f"Error getting GSC data quality: {e}") + raise HTTPException(status_code=500, detail=f"Error getting GSC data quality: {str(e)}") + + +@router.get("/opportunities", response_model=GSCCachedOpportunitiesResponse) +async def get_gsc_cached_opportunities( + site_url: str = Query(..., description="GSC site URL"), + user: dict = Depends(get_current_user) +): + """Get guided opportunities from cached query analytics.""" + try: + user_id = user.get('id') + if not user_id: + raise HTTPException(status_code=400, detail="User ID not found") + + cached = gsc_service.get_latest_cached_analytics(user_id=user_id, site_url=site_url) + if not cached: + return GSCCachedOpportunitiesResponse(site_url=site_url, opportunities=[], generated_from_cache=False) + + payload = cached.get("data", {}) + query_rows = payload.get("query_data", {}).get("rows", []) + + opportunities = [] + for row in query_rows: + keys = row.get("keys", []) + query = keys[0] if keys else None + impressions = float(row.get("impressions", 0) or 0) + ctr = float(row.get("ctr", 0) or 0) + position = float(row.get("position", 0) or 0) + clicks = float(row.get("clicks", 0) or 0) + + if query and impressions >= 100 and ctr < 0.03: + opportunities.append({ + "query": query, + "clicks": int(clicks), + "impressions": int(impressions), + "ctr": round(ctr * 100, 2), + "position": round(position, 2), + "recommended_action": "Improve title/meta and align intro to search intent" + }) + + opportunities = sorted(opportunities, key=lambda x: x["impressions"], reverse=True)[:10] + + return GSCCachedOpportunitiesResponse( + site_url=site_url, + opportunities=opportunities, + generated_from_cache=True + ) + except Exception as e: + logger.error(f"Error getting GSC opportunities: {e}") + raise HTTPException(status_code=500, detail=f"Error getting GSC opportunities: {str(e)}") + @router.post("/analytics") async def get_gsc_analytics( request: GSCAnalyticsRequest, @@ -269,7 +399,7 @@ async def get_gsc_status(user: dict = Depends(get_current_user)): @deprecated Use unified OAuth client: unifiedOAuthClient.getConnectionStatus('gsc') This method is preserved for backward compatibility but will be removed in future versions. """ - console.warn('GSC Router: get_gsc_status() is deprecated. Use unifiedOAuthClient.getConnectionStatus("gsc") instead') + logger.warning(f'GSC Router: /gsc/status is deprecated; use /oauth/gsc/status. Planned removal after {DEPRECATED_GSC_LEGACY_ROUTES_REMOVAL_DATE}') try: user_id = user.get('id') @@ -278,56 +408,25 @@ async def get_gsc_status(user: dict = Depends(get_current_user)): logger.info(f"Checking GSC status for user: {user_id}") - # Use unified OAuth client for consistent patterns - from frontend.src.api.unifiedOAuth import unifiedOAuthClient - unified_client = unifiedOAuthClient() - - try: - status_response = await unified_client.getConnectionStatus('gsc') - - # Transform unified response to GSC-specific format - details = status_response.details as any - sites = details?.sites || [] - - return { - "connected": status_response.connected, - "sites": sites, - "total_sites": len(sites), - "last_sync": details?.last_sync - } - except Exception as unified_error: - # Fall back to legacy service if unified client fails - logger.warning(f"Unified client failed, falling back to legacy GSC service: {unified_error}") - - # Use legacy GSC service for status check - credentials = gsc_service.load_user_credentials(user_id) - connected = credentials is not None - - sites = [] - if connected: - try: - sites = gsc_service.get_site_list(user_id) - except Exception as e: - logger.warning(f"Could not get sites for user {user_id}: {e}") - sites = [] - - last_sync = None # Could be enhanced to track last sync time - status_response = GSCStatusResponse( - connected=connected, - sites=sites, - total_sites=len(sites), - last_sync=last_sync - ) - else: - status_response = GSCStatusResponse( - connected=False, - sites=[], - total_sites=0, - last_sync=None - ) - - logger.info(f"GSC status checked for user: {user_id}, connected: {connected}") - return status_response + provider = get_provider("gsc") + if not provider: + raise HTTPException(status_code=500, detail="GSC provider not registered") + + status_response = provider.get_connection_status(user_id) + sites = [] + if status_response.connected: + try: + sites = gsc_service.get_site_list(user_id) + except Exception as e: + logger.warning(f"Could not get sites for user {user_id}: {e}") + + logger.info(f"GSC status checked for user: {user_id}, connected: {status_response.connected}") + return { + "connected": status_response.connected, + "sites": sites, + "total_sites": len(sites), + "last_sync": None + } except Exception as e: logger.error(f"Error checking GSC status: {e}") @@ -341,7 +440,7 @@ async def disconnect_gsc(user: dict = Depends(get_current_user)): @deprecated Use unified OAuth client: unifiedOAuthClient.disconnect('gsc') This method is preserved for backward compatibility but will be removed in future versions. """ - console.warn('GSC Router: disconnect_gsc() is deprecated. Use unifiedOAuthClient.disconnect("gsc") instead') + logger.warning(f'GSC Router: /gsc/disconnect is deprecated; use /oauth/gsc/disconnect. Planned removal after {DEPRECATED_GSC_LEGACY_ROUTES_REMOVAL_DATE}') try: user_id = user.get('id') @@ -350,36 +449,24 @@ async def disconnect_gsc(user: dict = Depends(get_current_user)): logger.info(f"Disconnecting GSC for user: {user_id}") - # Use unified OAuth client for consistent patterns - from frontend.src.api.unifiedOAuth import unifiedOAuthClient - unified_client = unifiedOAuthClient() - - try: - disconnect_response = await unified_client.disconnect('gsc') - + provider = get_provider("gsc") + if not provider: + raise HTTPException(status_code=500, detail="GSC provider not registered") + + success = provider.disconnect(user_id) + + if success: + logger.info(f"Successfully disconnected GSC for user {user_id}") return { - "success": disconnect_response.success, - "message": disconnect_response.message + "success": True, + "message": "Successfully disconnected from Google Search Console" } - except Exception as unified_error: - # Fall back to legacy service if unified client fails - logger.warning(f"Unified client failed, falling back to legacy GSC service: {unified_error}") - - # Use legacy GSC service for disconnection - success = gsc_service.revoke_access_token(user_id) - - if success: - logger.info(f"Successfully disconnected GSC for user {user_id}") - return { - "success": True, - "message": "Successfully disconnected from Google Search Console" - } - else: - logger.error(f"Failed to disconnect GSC for user {user_id}") - return { - "success": False, - "message": "Failed to disconnect from Google Search Console" - } + + logger.warning(f"No active GSC connection found for user {user_id}") + return { + "success": False, + "message": "No active Google Search Console connection found" + } except Exception as e: logger.error(f"Error disconnecting GSC: {e}") diff --git a/backend/services/integrations/registry.py b/backend/services/integrations/registry.py index 1a9953d3..3c6846b3 100644 --- a/backend/services/integrations/registry.py +++ b/backend/services/integrations/registry.py @@ -4,6 +4,9 @@ from typing import Callable, Dict, Optional from datetime import datetime +from urllib.parse import parse_qs, urlparse +from loguru import logger +from sqlalchemy import text from services.gsc_service import GSCService from services.integrations.bing_oauth import BingOAuthService @@ -179,38 +182,43 @@ def __init__(self, service: Optional[GSCService] = None) -> None: def get_auth_url(self, user_id: str, redirect_uri: Optional[str] = None) -> AuthUrlPayload: auth_url = self._service.get_oauth_url(user_id) + parsed_query = parse_qs(urlparse(auth_url).query) + oauth_state = parsed_query.get("state", [""])[0] return AuthUrlPayload( auth_url=auth_url, - state=user_id, + state=oauth_state, provider_id=self.key ) def handle_callback(self, code: str, state: str) -> ConnectionResult: try: - # GSC handles callback internally through the service - credentials = self._service.exchange_code_for_tokens(code) - if not credentials: + callback_result = self._service.handle_oauth_callback(code, state) + if not callback_result.get("success"): return ConnectionResult( success=False, - user_id=state, + user_id=callback_result.get("user_id", state), provider_id=self.key, - error="Failed to exchange code for tokens" + error=callback_result.get("error") or "Failed to handle OAuth callback" ) + + user_id = callback_result.get("user_id", state) + credentials = self._service.load_user_credentials(user_id) return ConnectionResult( success=True, - user_id=state, + user_id=user_id, provider_id=self.key, - access_token=getattr(credentials, 'token', None), - refresh_token=getattr(credentials, 'refresh_token', None), - expires_at=getattr(credentials, 'expiry', None), - scope=getattr(credentials, 'scopes', None), + access_token=getattr(credentials, 'token', None) if credentials else None, + refresh_token=getattr(credentials, 'refresh_token', None) if credentials else None, + expires_at=getattr(credentials, 'expiry', None) if credentials else None, + scope=getattr(credentials, 'scopes', None) if credentials else None, account_info={ - 'email': getattr(credentials, 'email', None), - 'verified': getattr(credentials, 'verified', False) + 'email': getattr(credentials, 'email', None) if credentials else None, + 'verified': getattr(credentials, 'verified', False) if credentials else False } ) except Exception as e: + logger.error(f"GSC callback failed: {e}") return ConnectionResult( success=False, user_id=state, @@ -290,9 +298,8 @@ def refresh_token(self, user_id: str) -> Optional[RefreshResult]: def disconnect(self, user_id: str) -> bool: try: # GSC doesn't have explicit disconnect, remove credentials from storage - return self._service.revoke_user_credentials(user_id) + return self._service.revoke_user_access(user_id) except Exception as e: - from loguru import logger logger.error(f"GSC disconnect failed for user {user_id}: {e}") return False diff --git a/backend/services/test_gsc_auth_integration.py b/backend/services/test_gsc_auth_integration.py new file mode 100644 index 00000000..e5df14aa --- /dev/null +++ b/backend/services/test_gsc_auth_integration.py @@ -0,0 +1,147 @@ +import sys +import types +from types import SimpleNamespace + +from fastapi import FastAPI +from fastapi.testclient import TestClient + + +# Stub heavy modules before importing router under test. +fake_registry_module = types.ModuleType("services.integrations.registry") +fake_registry_module.get_provider = lambda _key: None +sys.modules["services.integrations.registry"] = fake_registry_module + +fake_gsc_service_module = types.ModuleType("services.gsc_service") + + +class _FakeGSCService: + def handle_oauth_callback(self, code, state): + return {"success": True, "user_id": "user-123"} + + def get_site_list(self, user_id): + return [] + + def clear_incomplete_credentials(self, user_id): + return True + + def get_search_analytics(self, *args, **kwargs): + return {} + + def get_sitemaps(self, *args, **kwargs): + return [] + + def get_latest_cached_analytics(self, *args, **kwargs): + return None + + +fake_gsc_service_module.GSCService = _FakeGSCService +sys.modules["services.gsc_service"] = fake_gsc_service_module + +from routers import gsc_auth # noqa: E402 +from middleware.auth_middleware import get_current_user # noqa: E402 + + +class MockProvider: + def __init__(self, *, connected=True, disconnect_success=True): + self._connected = connected + self._disconnect_success = disconnect_success + + def get_auth_url(self, user_id: str): + return SimpleNamespace( + auth_url="https://accounts.google.com/o/oauth2/auth?state=test-state-123", + state="test-state-123", + ) + + def get_connection_status(self, user_id: str): + return SimpleNamespace(connected=self._connected) + + def disconnect(self, user_id: str): + return self._disconnect_success + + +def _build_client(monkeypatch, provider: MockProvider, gsc_service_mock: SimpleNamespace): + app = FastAPI() + app.include_router(gsc_auth.router) + + app.dependency_overrides[get_current_user] = lambda: {"id": "user-123"} + + monkeypatch.setattr(gsc_auth, "get_provider", lambda _key: provider) + monkeypatch.setattr(gsc_auth, "gsc_service", gsc_service_mock) + monkeypatch.setattr( + gsc_auth, + "get_trusted_origins_for_redirect", + lambda *_args, **_kwargs: ["http://testserver"], + ) + + return TestClient(app) + + +def test_auth_url_generation(monkeypatch): + provider = MockProvider() + gsc_service_mock = SimpleNamespace() + client = _build_client(monkeypatch, provider, gsc_service_mock) + + response = client.get("/gsc/auth/url") + + assert response.status_code == 200 + body = response.json() + assert body["auth_url"].startswith("https://accounts.google.com") + assert body["state"] == "test-state-123" + assert "http://testserver" in body["trusted_origins"] + + +def test_callback_success(monkeypatch): + provider = MockProvider() + gsc_service_mock = SimpleNamespace( + handle_oauth_callback=lambda code, state: {"success": True, "user_id": "user-123"} + ) + client = _build_client(monkeypatch, provider, gsc_service_mock) + + response = client.get("/gsc/callback", params={"code": "abc", "state": "state"}) + + assert response.status_code == 200 + assert "Connection Successful" in response.text + assert "GSC_AUTH_SUCCESS" in response.text + + +def test_callback_failure(monkeypatch): + provider = MockProvider() + gsc_service_mock = SimpleNamespace( + handle_oauth_callback=lambda code, state: {"success": False, "error": "invalid_state"} + ) + client = _build_client(monkeypatch, provider, gsc_service_mock) + + response = client.get("/gsc/callback", params={"code": "abc", "state": "state"}) + + assert response.status_code == 400 + assert "Connection Failed" in response.text + assert "GSC_AUTH_ERROR" in response.text + + +def test_status_connected(monkeypatch): + provider = MockProvider(connected=True) + gsc_service_mock = SimpleNamespace( + get_site_list=lambda _user_id: [{"siteUrl": "https://example.com/", "permissionLevel": "siteOwner"}] + ) + client = _build_client(monkeypatch, provider, gsc_service_mock) + + response = client.get("/gsc/status") + + assert response.status_code == 200 + body = response.json() + assert body["connected"] is True + assert body["total_sites"] == 1 + assert body["sites"][0]["siteUrl"] == "https://example.com/" + + +def test_disconnect(monkeypatch): + provider = MockProvider(disconnect_success=True) + gsc_service_mock = SimpleNamespace() + client = _build_client(monkeypatch, provider, gsc_service_mock) + + response = client.delete("/gsc/disconnect") + + assert response.status_code == 200 + body = response.json() + assert body["success"] is True + assert "Successfully disconnected" in body["message"] diff --git a/docs/reviews/gsc-onboarding-step5-review.md b/docs/reviews/gsc-onboarding-step5-review.md new file mode 100644 index 00000000..c170865b --- /dev/null +++ b/docs/reviews/gsc-onboarding-step5-review.md @@ -0,0 +1,136 @@ +# Step 5 Onboarding Integrations Review: GSC (Google Search Console) + +## Scope +This review focuses on **Onboarding Step 5 (Integrations)** with emphasis on: +- GSC integration behavior in current code. +- Robustness of the OAuth + token + callback flow. +- Product/value additions for ALwrity end users (content writers and digital marketers). + +--- + +## Current Implementation Snapshot + +### What works well +1. **Step 5 is clearly modeled as Integrations in onboarding** and is optional in the wizard, reducing initial friction for first-time users. +2. **Unified OAuth routes exist** (`/oauth/{provider}`), with common response models and sanitization hooks for status payloads. +3. **GSC service has state TTL and delete-on-consume behavior** in callback verification, which is a good anti-replay baseline. +4. **Credential refresh on load** is handled in `load_user_credentials`, improving resiliency for long-lived usage. +5. **Analytics calls are cached** in DB with expiration, reducing repeated external API usage. + +### Key robustness concerns (high-priority) +1. **Legacy GSC router has Python/JS mixing and backend↔frontend boundary breaks**. + - Uses `console.warn(...)` in Python router handlers. + - Attempts to import frontend module (`from frontend.src.api.unifiedOAuth import unifiedOAuthClient`) inside backend. + - Includes TypeScript-style constructs (`details = status_response.details as any`) in Python. + These patterns are brittle and can fail at runtime or mask real fallback behavior. + +2. **Provider callback contract mismatch in unified flow**. + - Unified route calls provider callback with `(code, state)`. + - GSC provider callback currently exchanges code for tokens but does not persist by user unless state maps reliably to user semantics in provider internals. + - GSC provider currently returns `state=user_id` in auth payload even though service-generated OAuth state is different, creating potential confusion and validation risk. + +3. **Disconnect method mismatch risk**. + - GSC integration provider calls `revoke_user_credentials(...)`, while GSC service exposes `revoke_user_access(...)`. + - This can silently break disconnects depending on call path. + +4. **Weak popup message-origin verification** in frontend GSC onboarding hook. + - The message handler accepts any `postMessage` object with expected `type` and does not validate `event.origin` against trusted origins. + - This is a cross-window trust gap. + +5. **State and callback UX split between legacy and unified endpoints**. + - Frontend GSC flow still uses `/gsc/*` endpoints. + - Unified OAuth client exists and is stronger architecturally, but onboarding GSC path is not fully migrated. + - This increases maintenance complexity and can create divergent behaviors. + +### Medium-priority concerns +1. **Over-aggressive pre-connect cleanup**. + - On connect, hook calls clear-incomplete and then disconnect before opening OAuth. + - If the OAuth popup fails or is blocked, user can end up disconnected unexpectedly. + +2. **Limited actionable error typing surfaced to UI**. + - Many errors are logged and rethrown generically. + - Marketers/writers need practical remediation (e.g., "add property in GSC", "grant full user", "scope missing"). + +3. **Site selection and intent capture is underpowered**. + - Connected sites are listed, but there is no explicit primary property selection during onboarding for downstream SEO workflows. + +--- + +## Value Additions for ALwrity End Users + +## A) Immediate UX + reliability wins (1-2 sprints) +1. **Full migration of Step 5 GSC flow to unified OAuth endpoints**. + - Route all GSC connect/status/disconnect calls via `/oauth/gsc/*`. + - Keep legacy routes as wrappers only until removal. + +2. **Harden popup callback security**. + - Enforce `event.origin` checks against backend-provided `trusted_origins`. + - Include nonce/correlation id in message payload and verify before accepting success. + +3. **Safer connect behavior**. + - Remove forced disconnect before OAuth start. + - Use staged connect: keep existing connection active until new callback success. + +4. **Improve Step 5 diagnostics panel**. + - Show status cards: Connected, token health, last sync, connected property count. + - Add one-click "Test connection" and "Fetch properties". + +## B) Content-writer value features (2-4 sprints) +1. **Keyword-to-content brief suggestions** from query-level GSC data. + - Flag high-impression/low-CTR queries and auto-propose title/meta/outline improvements. + +2. **Intent-aware refresh queue**. + - Weekly surfacing of opportunities: declining pages, rising queries, missing intent coverage. + +3. **Property-aware publishing recommendations**. + - If WordPress/Wix connected + GSC connected, inject SEO guardrails at publish time (target query, title length, schema hints, internal links). + +## C) Digital-marketing pro features (4-8 sprints) +1. **Multi-property portfolio dashboard** (agency mode). + - Compare GSC signals across properties, segments, countries, devices. + +2. **Automated content decay alerts**. + - Detect rank/CTR drops and trigger rewrite workflows. + +3. **Attribution bridge**. + - Combine GSC query/page trends with publishing calendar to show content impact windows. + +4. **Competitor-overlap expansion**. + - Feed GSC winners/losers into research/strategy modules to generate gap campaigns. + +--- + +## Recommended Technical Plan (Prioritized) + +### Priority 0: Correctness and security +- Replace invalid backend imports/syntax in `backend/routers/gsc_auth.py` with backend-native service calls only. +- Align provider contracts in `services/integrations/registry.py` for GSC: + - Ensure state semantics are consistent and mapped to persisted OAuth state. + - Ensure callback persists credentials through one canonical path. + - Fix disconnect method to call existing service method. +- Add popup message origin + nonce verification in `useGSCConnection.ts`. + +### Priority 1: Unified path completion +- Switch onboarding GSC API client to unified OAuth client. +- Keep `/gsc/*` only for backward compatibility and add explicit deprecation timeline. +- Add integration tests for auth URL generation, callback success/failure, status, disconnect. + +### Priority 2: User-facing outcomes +- Add primary-site selection during onboarding. +- Add immediate post-connect data quality checks (property permissions, data availability window, indexing health). +- Add guided opportunities panel from cached query analytics. + +--- + +## Suggested KPIs to track after improvements +- Step 5 completion rate (overall and GSC-specific). +- OAuth callback success rate (first attempt). +- % users with healthy token status after 7/30 days. +- Time-to-first-insight after connecting GSC. +- Content optimization adoption rate from GSC recommendations. +- Lift in CTR / clicks for pages acted upon through recommendations. + +--- + +## Bottom line +ALwrity has the right architecture direction (unified OAuth, token validation, onboarding step modeling), but the current GSC path still has **critical robustness gaps at the legacy bridge layer**. Stabilizing callback/state semantics and fully converging Step 5 onto unified OAuth will improve reliability quickly. From there, the largest end-user value comes from converting GSC data into **actionable writing and campaign decisions**, not just connection status. diff --git a/frontend/src/api/gsc.ts b/frontend/src/api/gsc.ts index 7a8795dc..da7d1261 100644 --- a/frontend/src/api/gsc.ts +++ b/frontend/src/api/gsc.ts @@ -43,6 +43,34 @@ export interface GSCStatusResponse { last_sync?: string; } +export interface GSCDataQualityResponse { + site_url: string; + permission_level?: string; + has_sufficient_permission: boolean; + data_days_available: number; + data_window_start?: string; + data_window_end?: string; + indexing_health: { + submitted_urls: number; + indexed_urls: number; + indexing_ratio?: number; + sitemaps_count: number; + }; +} + +export interface GSCCachedOpportunitiesResponse { + site_url: string; + opportunities: Array<{ + query: string; + clicks: number; + impressions: number; + ctr: number; + position: number; + recommended_action: string; + }>; + generated_from_cache: boolean; +} + class GSCAPI { private baseUrl = '/gsc'; private getAuthToken: (() => Promise) | null = null; @@ -185,6 +213,22 @@ class GSCAPI { } } + async getDataQuality(siteUrl: string): Promise { + const client = await this.getAuthenticatedClient(); + const response = await client.get(`${this.baseUrl}/data-quality`, { + params: { site_url: siteUrl } + }); + return response.data; + } + + async getOpportunities(siteUrl: string): Promise { + const client = await this.getAuthenticatedClient(); + const response = await client.get(`${this.baseUrl}/opportunities`, { + params: { site_url: siteUrl } + }); + return response.data; + } + /** * Health check */ @@ -199,4 +243,4 @@ class GSCAPI { } } -export const gscAPI = new GSCAPI(); \ No newline at end of file +export const gscAPI = new GSCAPI(); diff --git a/frontend/src/components/OnboardingWizard/IntegrationsStep.tsx b/frontend/src/components/OnboardingWizard/IntegrationsStep.tsx index a686a850..832cb374 100644 --- a/frontend/src/components/OnboardingWizard/IntegrationsStep.tsx +++ b/frontend/src/components/OnboardingWizard/IntegrationsStep.tsx @@ -33,6 +33,7 @@ import { useGSCConnection } from './common/useGSCConnection'; import { usePlatformConnections } from './common/usePlatformConnections'; import PlatformAnalytics from '../shared/PlatformAnalytics'; import { cachedAnalyticsAPI } from '../../api/cachedAnalytics'; +import { gscAPI, type GSCDataQualityResponse, type GSCCachedOpportunitiesResponse } from '../../api/gsc'; interface IntegrationsStepProps { onContinue: () => void; @@ -56,7 +57,10 @@ const IntegrationsStep: React.FC = ({ onContinue, updateH const [email, setEmail] = useState(''); // Use custom hooks - const { gscSites, connectedPlatforms, setConnectedPlatforms, handleGSCConnect } = useGSCConnection(); + const { gscSites, connectedPlatforms, setConnectedPlatforms, handleGSCConnect, refreshGSCStatus } = useGSCConnection(); + const [primaryGscSite, setPrimaryGscSite] = useState(() => localStorage.getItem('onboarding_primary_gsc_site') || ''); + const [gscDataQuality, setGscDataQuality] = useState(null); + const [gscOpportunities, setGscOpportunities] = useState(null); // Invalidate analytics cache when platform connections change const invalidateAnalyticsCache = useCallback(() => { @@ -211,6 +215,44 @@ const IntegrationsStep: React.FC = ({ onContinue, updateH }); }, [updateHeaderContent]); + + useEffect(() => { + if (!gscSites || gscSites.length === 0) return; + if (primaryGscSite && gscSites.some((s) => s.siteUrl === primaryGscSite)) return; + + const defaultSite = gscSites[0].siteUrl; + setPrimaryGscSite(defaultSite); + localStorage.setItem('onboarding_primary_gsc_site', defaultSite); + }, [gscSites, primaryGscSite]); + + useEffect(() => { + if (!connectedPlatforms.includes('gsc') || !primaryGscSite) { + setGscDataQuality(null); + setGscOpportunities(null); + return; + } + + (async () => { + try { + const [quality, opportunities] = await Promise.all([ + gscAPI.getDataQuality(primaryGscSite), + gscAPI.getOpportunities(primaryGscSite) + ]); + setGscDataQuality(quality); + setGscOpportunities(opportunities); + } catch (error) { + console.warn('Failed to load GSC onboarding insights', error); + setGscDataQuality(null); + setGscOpportunities(null); + } + })(); + }, [connectedPlatforms, primaryGscSite]); + + const handlePrimaryGscSiteChange = useCallback((siteUrl: string) => { + setPrimaryGscSite(siteUrl); + localStorage.setItem('onboarding_primary_gsc_site', siteUrl); + }, []); + // Handle WordPress connection status changes useEffect(() => { console.log('IntegrationsStep: WordPress status changed:', { @@ -392,6 +434,10 @@ const IntegrationsStep: React.FC = ({ onContinue, updateH setConnectedPlatforms(connectedPlatforms.filter(p => p !== platformId)); }} setConnectedPlatforms={setConnectedPlatforms} + primarySite={primaryGscSite} + onPrimarySiteChange={handlePrimaryGscSiteChange} + dataQuality={gscDataQuality} + opportunities={gscOpportunities} /> @@ -405,9 +451,13 @@ const IntegrationsStep: React.FC = ({ onContinue, updateH platforms={analyticsPlatforms} connectedPlatforms={connectedPlatforms} gscSites={gscSites} - isLoading={isLoading} + isLoading={isLoading} onConnect={handlePlatformConnect} - /> + primarySite={primaryGscSite} + onPrimarySiteChange={handlePrimaryGscSiteChange} + dataQuality={gscDataQuality} + opportunities={gscOpportunities} + /> @@ -463,6 +513,10 @@ const IntegrationsStep: React.FC = ({ onContinue, updateH gscSites={null} isLoading={isLoading} onConnect={handlePlatformConnect} + primarySite={primaryGscSite} + onPrimarySiteChange={handlePrimaryGscSiteChange} + dataQuality={gscDataQuality} + opportunities={gscOpportunities} /> diff --git a/frontend/src/components/OnboardingWizard/common/GSCPlatformCard.tsx b/frontend/src/components/OnboardingWizard/common/GSCPlatformCard.tsx index da7b573f..2d8f83ab 100644 --- a/frontend/src/components/OnboardingWizard/common/GSCPlatformCard.tsx +++ b/frontend/src/components/OnboardingWizard/common/GSCPlatformCard.tsx @@ -7,12 +7,22 @@ import { CardContent, Chip, IconButton, - Tooltip + Tooltip, + FormControl, + InputLabel, + Select, + MenuItem, + Alert, + List, + ListItem, + ListItemText, + Divider } from '@mui/material'; import { Refresh as RefreshIcon } from '@mui/icons-material'; import { type GSCSite } from '../../../api/gsc'; +import type { GSCDataQualityResponse, GSCCachedOpportunitiesResponse } from '../../../api/gsc'; interface GSCPlatformCardProps { platform: { @@ -29,6 +39,10 @@ interface GSCPlatformCardProps { getStatusText: (status: string) => string; getStatusColor: (status: string) => string; onRefresh?: () => void; + primarySite: string; + onPrimarySiteChange: (siteUrl: string) => void; + dataQuality: GSCDataQualityResponse | null; + opportunities: GSCCachedOpportunitiesResponse | null; } const GSCPlatformCard: React.FC = ({ @@ -39,7 +53,11 @@ const GSCPlatformCard: React.FC = ({ getStatusIcon, getStatusText, getStatusColor, - onRefresh + onRefresh, + primarySite, + onPrimarySiteChange, + dataQuality, + opportunities }) => { const handleRefresh = () => { if (onRefresh) { @@ -105,6 +123,58 @@ const GSCPlatformCard: React.FC = ({ {site.siteUrl} ))} + + + Primary Site + + + + )} + + {dataQuality && ( + + + Permission: {dataQuality.permission_level || 'unknown'} + + + Data window: {dataQuality.data_window_start || 'n/a'} → {dataQuality.data_window_end || 'n/a'} ({dataQuality.data_days_available} days) + + + Indexing health: {dataQuality.indexing_health.indexed_urls}/{dataQuality.indexing_health.submitted_urls} indexed + {typeof dataQuality.indexing_health.indexing_ratio === 'number' ? ` (${dataQuality.indexing_health.indexing_ratio}%)` : ''} + + + )} + + {opportunities && opportunities.opportunities.length > 0 && ( + + + Guided opportunities + + + {opportunities.opportunities.slice(0, 3).map((op, idx) => ( + + + + + {idx < Math.min(opportunities.opportunities.length, 3) - 1 && } + + ))} + )} @@ -200,4 +270,4 @@ const GSCPlatformCard: React.FC = ({ ); }; -export default GSCPlatformCard; \ No newline at end of file +export default GSCPlatformCard; diff --git a/frontend/src/components/OnboardingWizard/common/PlatformSection.tsx b/frontend/src/components/OnboardingWizard/common/PlatformSection.tsx index d9c7d7ba..6f6d96f7 100644 --- a/frontend/src/components/OnboardingWizard/common/PlatformSection.tsx +++ b/frontend/src/components/OnboardingWizard/common/PlatformSection.tsx @@ -21,6 +21,7 @@ import GSCPlatformCard from './GSCPlatformCard'; import WordPressOAuthPlatformCard from './WordPressOAuthPlatformCard'; import WixPlatformCard from './WixPlatformCard'; import { type GSCSite } from '../../../api/gsc'; +import type { GSCDataQualityResponse, GSCCachedOpportunitiesResponse } from '../../../api/gsc'; interface Platform { id: string; @@ -46,6 +47,10 @@ interface PlatformSectionProps { onDisconnect?: (platformId: string) => void; setConnectedPlatforms?: (platforms: string[]) => void; fadeTimeout?: number; + primarySite: string; + onPrimarySiteChange: (siteUrl: string) => void; + dataQuality: GSCDataQualityResponse | null; + opportunities: GSCCachedOpportunitiesResponse | null; } const PlatformSection: React.FC = ({ @@ -58,7 +63,11 @@ const PlatformSection: React.FC = ({ onConnect, onDisconnect, setConnectedPlatforms, - fadeTimeout = 800 + fadeTimeout = 800, + primarySite, + onPrimarySiteChange, + dataQuality, + opportunities }) => { const getStatusColor = (status: string) => { switch (status) { @@ -116,6 +125,10 @@ const PlatformSection: React.FC = ({ getStatusIcon={getStatusIcon} getStatusText={getStatusText} getStatusColor={getStatusColor} + primarySite={primarySite} + onPrimarySiteChange={onPrimarySiteChange} + dataQuality={dataQuality} + opportunities={opportunities} onRefresh={() => { // Trigger a refresh of GSC status console.log('Refreshing GSC status...'); diff --git a/frontend/src/components/OnboardingWizard/common/useGSCConnection.ts b/frontend/src/components/OnboardingWizard/common/useGSCConnection.ts index 17cf5948..cb23cece 100644 --- a/frontend/src/components/OnboardingWizard/common/useGSCConnection.ts +++ b/frontend/src/components/OnboardingWizard/common/useGSCConnection.ts @@ -1,6 +1,9 @@ -import { useState, useEffect } from 'react'; +import { useState, useEffect, useCallback } from 'react'; import { useAuth } from '@clerk/clerk-react'; import { gscAPI, type GSCSite } from '../../../api/gsc'; +import { unifiedOAuthClient } from '../../../api/unifiedOAuthClient'; + +const DEPRECATED_GSC_ROUTES_REMOVAL_DATE = '2026-06-30'; export const useGSCConnection = () => { const { getToken } = useAuth(); @@ -8,7 +11,7 @@ export const useGSCConnection = () => { const [connectedPlatforms, setConnectedPlatforms] = useState([]); useEffect(() => { - // Ensure GSC API uses authenticated client + // Ensure GSC API uses authenticated client for GSC analytics/sitemaps endpoints. try { gscAPI.setAuthTokenGetter(async () => { try { @@ -17,114 +20,101 @@ export const useGSCConnection = () => { return null; } }); - } catch {} + } catch { + // no-op + } }, [getToken]); + const refreshGSCStatus = useCallback(async () => { + const status = await unifiedOAuthClient.getConnectionStatus('gsc'); + if (!status.connected) { + setConnectedPlatforms(prev => prev.filter(p => p !== 'gsc')); + setGscSites(null); + return; + } + + setConnectedPlatforms(prev => Array.from(new Set([...prev, 'gsc']))); + + try { + const sitesResponse = await gscAPI.getSites(); + setGscSites(sitesResponse.sites ?? []); + } catch { + setGscSites([]); + } + }, []); + useEffect(() => { - // Check current GSC connection status on load (async () => { try { - const status = await gscAPI.getStatus(); - if (status.connected) { - setConnectedPlatforms(prev => Array.from(new Set([...prev, 'gsc']))); - if (status.sites && status.sites.length) setGscSites(status.sites); - } else { - setConnectedPlatforms(prev => prev.filter(p => p !== 'gsc')); - setGscSites(null); - } - } catch (error) { - console.log('GSC status check failed'); - try { - await gscAPI.clearIncomplete(); - } catch {} + await refreshGSCStatus(); + } catch { setConnectedPlatforms(prev => prev.filter(p => p !== 'gsc')); setGscSites(null); } })(); - }, []); + }, [refreshGSCStatus]); const handleGSCConnect = async () => { try { - // Clear any incomplete credentials and connection state before starting OAuth - try { - await gscAPI.clearIncomplete(); - } catch (e) { - console.log('Clear incomplete failed:', e); - } - - // Also try to disconnect completely - try { - await gscAPI.disconnect(); - } catch (e) { - console.log('Disconnect failed:', e); - } - - // Clear local connection state - setConnectedPlatforms(prev => prev.filter(p => p !== 'gsc')); - setGscSites(null); - - const { auth_url } = await gscAPI.getAuthUrl(); - + // Keep /gsc/* callback endpoint for backward compatibility only. + console.warn(`GSC legacy callback endpoint is deprecated and scheduled for removal after ${DEPRECATED_GSC_ROUTES_REMOVAL_DATE}`); + const authResponse = await unifiedOAuthClient.getAuthUrl('gsc'); + const { auth_url, trusted_origins = [] } = authResponse; + + const oauthNonce = crypto.randomUUID(); const popup = window.open( auth_url, - 'gsc-auth', + `gsc-auth-${oauthNonce}`, 'width=600,height=700,scrollbars=yes,resizable=yes' ); - if (!popup) { - // Fallback: navigate directly to OAuth URL if popup is blocked - console.log('Popup blocked, navigating directly to OAuth URL'); window.location.href = auth_url; return; } - // Check if popup was redirected immediately (OAuth consent screen issue) - setTimeout(() => { - try { - if (popup.closed) { - console.log('GSC popup closed immediately - possible OAuth consent screen issue'); - } - } catch (e) { - // Ignore cross-origin errors - } - }, 2000); - - // Prefer message-based completion from callback window to avoid COOP issues let messageHandled = false; const messageHandler = (event: MessageEvent) => { - if (messageHandled) return; // Prevent duplicate handling - if (!event?.data || typeof event.data !== 'object') return; - const { type } = event.data as { type?: string }; + if (messageHandled || !event?.data || typeof event.data !== 'object') return; + + const allowedOrigins = new Set([ + ...trusted_origins, + window.location.origin + ]); + + if (!allowedOrigins.has(event.origin)) return; + + const { type, nonce } = event.data as { type?: string; nonce?: string }; + if (nonce !== oauthNonce) return; + if (type === 'GSC_AUTH_SUCCESS' || type === 'GSC_AUTH_ERROR') { messageHandled = true; - try { popup.close(); } catch {} + try { popup.close(); } catch { + // no-op + } window.removeEventListener('message', messageHandler); + if (type === 'GSC_AUTH_SUCCESS') { - // Optimistically mark as connected; a later status refresh will confirm - setConnectedPlatforms(prev => Array.from(new Set([...prev, 'gsc']))); - // Refresh sites - (async () => { - try { - const status = await gscAPI.getStatus(); - if (status.connected && status.sites) setGscSites(status.sites); - } catch {} - })(); + void refreshGSCStatus(); } + setTimeout(() => { window.location.href = '/onboarding?step=5'; }, 250); } }; + window.addEventListener('message', messageHandler); - // Fallback: safety timeout in case message doesn't arrive setTimeout(() => { - try { if (!popup.closed) popup.close(); } catch {} + try { + if (!popup.closed) popup.close(); + } catch { + // no-op + } window.removeEventListener('message', messageHandler); }, 3 * 60 * 1000); - } catch (error) { console.error('GSC OAuth error:', error); throw error; @@ -136,6 +126,7 @@ export const useGSCConnection = () => { connectedPlatforms, setConnectedPlatforms, setGscSites, - handleGSCConnect + handleGSCConnect, + refreshGSCStatus }; };