Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
311 changes: 199 additions & 112 deletions backend/routers/gsc_auth.py

Large diffs are not rendered by default.

37 changes: 22 additions & 15 deletions backend/services/integrations/registry.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,9 @@

from typing import Callable, Dict, Optional
from datetime import datetime
from urllib.parse import parse_qs, urlparse
from loguru import logger
from sqlalchemy import text

from services.gsc_service import GSCService
from services.integrations.bing_oauth import BingOAuthService
Expand Down Expand Up @@ -179,38 +182,43 @@ def __init__(self, service: Optional[GSCService] = None) -> None:

def get_auth_url(self, user_id: str, redirect_uri: Optional[str] = None) -> AuthUrlPayload:
auth_url = self._service.get_oauth_url(user_id)
parsed_query = parse_qs(urlparse(auth_url).query)
oauth_state = parsed_query.get("state", [""])[0]
return AuthUrlPayload(
auth_url=auth_url,
state=user_id,
state=oauth_state,
provider_id=self.key
)

def handle_callback(self, code: str, state: str) -> ConnectionResult:
try:
# GSC handles callback internally through the service
credentials = self._service.exchange_code_for_tokens(code)
if not credentials:
callback_result = self._service.handle_oauth_callback(code, state)
if not callback_result.get("success"):
return ConnectionResult(
success=False,
user_id=state,
user_id=callback_result.get("user_id", state),
provider_id=self.key,
error="Failed to exchange code for tokens"
error=callback_result.get("error") or "Failed to handle OAuth callback"
)

user_id = callback_result.get("user_id", state)
credentials = self._service.load_user_credentials(user_id)

return ConnectionResult(
success=True,
user_id=state,
user_id=user_id,
provider_id=self.key,
access_token=getattr(credentials, 'token', None),
refresh_token=getattr(credentials, 'refresh_token', None),
expires_at=getattr(credentials, 'expiry', None),
scope=getattr(credentials, 'scopes', None),
access_token=getattr(credentials, 'token', None) if credentials else None,
refresh_token=getattr(credentials, 'refresh_token', None) if credentials else None,
expires_at=getattr(credentials, 'expiry', None) if credentials else None,
scope=getattr(credentials, 'scopes', None) if credentials else None,
account_info={
'email': getattr(credentials, 'email', None),
'verified': getattr(credentials, 'verified', False)
'email': getattr(credentials, 'email', None) if credentials else None,
'verified': getattr(credentials, 'verified', False) if credentials else False
}
)
except Exception as e:
logger.error(f"GSC callback failed: {e}")
return ConnectionResult(
success=False,
user_id=state,
Expand Down Expand Up @@ -290,9 +298,8 @@ def refresh_token(self, user_id: str) -> Optional[RefreshResult]:
def disconnect(self, user_id: str) -> bool:
try:
# GSC doesn't have explicit disconnect, remove credentials from storage
return self._service.revoke_user_credentials(user_id)
return self._service.revoke_user_access(user_id)
except Exception as e:
from loguru import logger
logger.error(f"GSC disconnect failed for user {user_id}: {e}")
return False

Expand Down
147 changes: 147 additions & 0 deletions backend/services/test_gsc_auth_integration.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,147 @@
import sys
import types
from types import SimpleNamespace

from fastapi import FastAPI
from fastapi.testclient import TestClient


# Stub heavy modules before importing router under test.
fake_registry_module = types.ModuleType("services.integrations.registry")
fake_registry_module.get_provider = lambda _key: None
sys.modules["services.integrations.registry"] = fake_registry_module

fake_gsc_service_module = types.ModuleType("services.gsc_service")


class _FakeGSCService:
def handle_oauth_callback(self, code, state):
return {"success": True, "user_id": "user-123"}

def get_site_list(self, user_id):
return []

def clear_incomplete_credentials(self, user_id):
return True

def get_search_analytics(self, *args, **kwargs):
return {}

def get_sitemaps(self, *args, **kwargs):
return []

def get_latest_cached_analytics(self, *args, **kwargs):
return None


fake_gsc_service_module.GSCService = _FakeGSCService
sys.modules["services.gsc_service"] = fake_gsc_service_module

from routers import gsc_auth # noqa: E402
from middleware.auth_middleware import get_current_user # noqa: E402


class MockProvider:
def __init__(self, *, connected=True, disconnect_success=True):
self._connected = connected
self._disconnect_success = disconnect_success

def get_auth_url(self, user_id: str):
return SimpleNamespace(
auth_url="https://accounts.google.com/o/oauth2/auth?state=test-state-123",
state="test-state-123",
)

def get_connection_status(self, user_id: str):
return SimpleNamespace(connected=self._connected)

def disconnect(self, user_id: str):
return self._disconnect_success


def _build_client(monkeypatch, provider: MockProvider, gsc_service_mock: SimpleNamespace):
app = FastAPI()
app.include_router(gsc_auth.router)

app.dependency_overrides[get_current_user] = lambda: {"id": "user-123"}

monkeypatch.setattr(gsc_auth, "get_provider", lambda _key: provider)
monkeypatch.setattr(gsc_auth, "gsc_service", gsc_service_mock)
monkeypatch.setattr(
gsc_auth,
"get_trusted_origins_for_redirect",
lambda *_args, **_kwargs: ["http://testserver"],
)

return TestClient(app)


def test_auth_url_generation(monkeypatch):
provider = MockProvider()
gsc_service_mock = SimpleNamespace()
client = _build_client(monkeypatch, provider, gsc_service_mock)

response = client.get("/gsc/auth/url")

assert response.status_code == 200
body = response.json()
assert body["auth_url"].startswith("https://accounts.google.com")
assert body["state"] == "test-state-123"
assert "http://testserver" in body["trusted_origins"]


def test_callback_success(monkeypatch):
provider = MockProvider()
gsc_service_mock = SimpleNamespace(
handle_oauth_callback=lambda code, state: {"success": True, "user_id": "user-123"}
)
client = _build_client(monkeypatch, provider, gsc_service_mock)

response = client.get("/gsc/callback", params={"code": "abc", "state": "state"})

assert response.status_code == 200
assert "Connection Successful" in response.text
assert "GSC_AUTH_SUCCESS" in response.text


def test_callback_failure(monkeypatch):
provider = MockProvider()
gsc_service_mock = SimpleNamespace(
handle_oauth_callback=lambda code, state: {"success": False, "error": "invalid_state"}
)
client = _build_client(monkeypatch, provider, gsc_service_mock)

response = client.get("/gsc/callback", params={"code": "abc", "state": "state"})

assert response.status_code == 400
assert "Connection Failed" in response.text
assert "GSC_AUTH_ERROR" in response.text


def test_status_connected(monkeypatch):
provider = MockProvider(connected=True)
gsc_service_mock = SimpleNamespace(
get_site_list=lambda _user_id: [{"siteUrl": "https://example.com/", "permissionLevel": "siteOwner"}]
)
client = _build_client(monkeypatch, provider, gsc_service_mock)

response = client.get("/gsc/status")

assert response.status_code == 200
body = response.json()
assert body["connected"] is True
assert body["total_sites"] == 1
assert body["sites"][0]["siteUrl"] == "https://example.com/"


def test_disconnect(monkeypatch):
provider = MockProvider(disconnect_success=True)
gsc_service_mock = SimpleNamespace()
client = _build_client(monkeypatch, provider, gsc_service_mock)

response = client.delete("/gsc/disconnect")

assert response.status_code == 200
body = response.json()
assert body["success"] is True
assert "Successfully disconnected" in body["message"]
136 changes: 136 additions & 0 deletions docs/reviews/gsc-onboarding-step5-review.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,136 @@
# Step 5 Onboarding Integrations Review: GSC (Google Search Console)

## Scope
This review focuses on **Onboarding Step 5 (Integrations)** with emphasis on:
- GSC integration behavior in current code.
- Robustness of the OAuth + token + callback flow.
- Product/value additions for ALwrity end users (content writers and digital marketers).

---

## Current Implementation Snapshot

### What works well
1. **Step 5 is clearly modeled as Integrations in onboarding** and is optional in the wizard, reducing initial friction for first-time users.
2. **Unified OAuth routes exist** (`/oauth/{provider}`), with common response models and sanitization hooks for status payloads.
3. **GSC service has state TTL and delete-on-consume behavior** in callback verification, which is a good anti-replay baseline.
4. **Credential refresh on load** is handled in `load_user_credentials`, improving resiliency for long-lived usage.
5. **Analytics calls are cached** in DB with expiration, reducing repeated external API usage.

### Key robustness concerns (high-priority)
1. **Legacy GSC router has Python/JS mixing and backend↔frontend boundary breaks**.
- Uses `console.warn(...)` in Python router handlers.
- Attempts to import frontend module (`from frontend.src.api.unifiedOAuth import unifiedOAuthClient`) inside backend.
- Includes TypeScript-style constructs (`details = status_response.details as any`) in Python.
These patterns are brittle and can fail at runtime or mask real fallback behavior.

2. **Provider callback contract mismatch in unified flow**.
- Unified route calls provider callback with `(code, state)`.
- GSC provider callback currently exchanges code for tokens but does not persist by user unless state maps reliably to user semantics in provider internals.
- GSC provider currently returns `state=user_id` in auth payload even though service-generated OAuth state is different, creating potential confusion and validation risk.

3. **Disconnect method mismatch risk**.
- GSC integration provider calls `revoke_user_credentials(...)`, while GSC service exposes `revoke_user_access(...)`.
- This can silently break disconnects depending on call path.

4. **Weak popup message-origin verification** in frontend GSC onboarding hook.
- The message handler accepts any `postMessage` object with expected `type` and does not validate `event.origin` against trusted origins.
- This is a cross-window trust gap.

5. **State and callback UX split between legacy and unified endpoints**.
- Frontend GSC flow still uses `/gsc/*` endpoints.
- Unified OAuth client exists and is stronger architecturally, but onboarding GSC path is not fully migrated.
- This increases maintenance complexity and can create divergent behaviors.

### Medium-priority concerns
1. **Over-aggressive pre-connect cleanup**.
- On connect, hook calls clear-incomplete and then disconnect before opening OAuth.
- If the OAuth popup fails or is blocked, user can end up disconnected unexpectedly.

2. **Limited actionable error typing surfaced to UI**.
- Many errors are logged and rethrown generically.
- Marketers/writers need practical remediation (e.g., "add property in GSC", "grant full user", "scope missing").

3. **Site selection and intent capture is underpowered**.
- Connected sites are listed, but there is no explicit primary property selection during onboarding for downstream SEO workflows.

---

## Value Additions for ALwrity End Users

## A) Immediate UX + reliability wins (1-2 sprints)
1. **Full migration of Step 5 GSC flow to unified OAuth endpoints**.
- Route all GSC connect/status/disconnect calls via `/oauth/gsc/*`.
- Keep legacy routes as wrappers only until removal.

2. **Harden popup callback security**.
- Enforce `event.origin` checks against backend-provided `trusted_origins`.
- Include nonce/correlation id in message payload and verify before accepting success.

3. **Safer connect behavior**.
- Remove forced disconnect before OAuth start.
- Use staged connect: keep existing connection active until new callback success.

4. **Improve Step 5 diagnostics panel**.
- Show status cards: Connected, token health, last sync, connected property count.
- Add one-click "Test connection" and "Fetch properties".

## B) Content-writer value features (2-4 sprints)
1. **Keyword-to-content brief suggestions** from query-level GSC data.
- Flag high-impression/low-CTR queries and auto-propose title/meta/outline improvements.

2. **Intent-aware refresh queue**.
- Weekly surfacing of opportunities: declining pages, rising queries, missing intent coverage.

3. **Property-aware publishing recommendations**.
- If WordPress/Wix connected + GSC connected, inject SEO guardrails at publish time (target query, title length, schema hints, internal links).

## C) Digital-marketing pro features (4-8 sprints)
1. **Multi-property portfolio dashboard** (agency mode).
- Compare GSC signals across properties, segments, countries, devices.

2. **Automated content decay alerts**.
- Detect rank/CTR drops and trigger rewrite workflows.

3. **Attribution bridge**.
- Combine GSC query/page trends with publishing calendar to show content impact windows.

4. **Competitor-overlap expansion**.
- Feed GSC winners/losers into research/strategy modules to generate gap campaigns.

---

## Recommended Technical Plan (Prioritized)

### Priority 0: Correctness and security
- Replace invalid backend imports/syntax in `backend/routers/gsc_auth.py` with backend-native service calls only.
- Align provider contracts in `services/integrations/registry.py` for GSC:
- Ensure state semantics are consistent and mapped to persisted OAuth state.
- Ensure callback persists credentials through one canonical path.
- Fix disconnect method to call existing service method.
- Add popup message origin + nonce verification in `useGSCConnection.ts`.

### Priority 1: Unified path completion
- Switch onboarding GSC API client to unified OAuth client.
- Keep `/gsc/*` only for backward compatibility and add explicit deprecation timeline.
- Add integration tests for auth URL generation, callback success/failure, status, disconnect.

### Priority 2: User-facing outcomes
- Add primary-site selection during onboarding.
- Add immediate post-connect data quality checks (property permissions, data availability window, indexing health).
- Add guided opportunities panel from cached query analytics.

---

## Suggested KPIs to track after improvements
- Step 5 completion rate (overall and GSC-specific).
- OAuth callback success rate (first attempt).
- % users with healthy token status after 7/30 days.
- Time-to-first-insight after connecting GSC.
- Content optimization adoption rate from GSC recommendations.
- Lift in CTR / clicks for pages acted upon through recommendations.

---

## Bottom line
ALwrity has the right architecture direction (unified OAuth, token validation, onboarding step modeling), but the current GSC path still has **critical robustness gaps at the legacy bridge layer**. Stabilizing callback/state semantics and fully converging Step 5 onto unified OAuth will improve reliability quickly. From there, the largest end-user value comes from converting GSC data into **actionable writing and campaign decisions**, not just connection status.
Loading