Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ async def normalize_gsc_analytics(gsc_data: Dict[str, Any]) -> Dict[str, Any]:
# Extract metrics from GSC data
metrics = gsc_data.get('metrics', {})
data = gsc_data.get('data', {})
query_page_opportunities = data.get('query_page_opportunities', []) or metrics.get('query_page_opportunities', [])

normalized = {
'traffic_metrics': {
Expand All @@ -31,6 +32,7 @@ async def normalize_gsc_analytics(gsc_data: Dict[str, Any]) -> Dict[str, Any]:
},
'top_queries': data.get('top_queries', []) or metrics.get('top_queries', []),
'top_pages': data.get('top_pages', []) or metrics.get('top_pages', []),
'query_page_opportunities': query_page_opportunities,
'traffic_sources': {
'organic_search': {
'clicks': metrics.get('total_clicks', 0) or data.get('clicks', 0),
Expand Down
36 changes: 36 additions & 0 deletions backend/services/analytics/handlers/gsc_handler.py
Original file line number Diff line number Diff line change
Expand Up @@ -286,6 +286,40 @@ def _process_gsc_metrics(self, search_analytics: Dict[str, Any]) -> Dict[str, An
except Exception as e:
logger.warning(f"Failed processing top_pages: {e}")

# Prepare query-page opportunities for refresh-vs-new decisions
query_page_opportunities = []
try:
qp_rows = search_analytics.get('query_page_data', {}).get('rows', [])
if qp_rows:
sorted_qp_rows = sorted(
qp_rows,
key=lambda x: (x.get('impressions', 0) or 0, x.get('clicks', 0) or 0),
reverse=True,
)[:100]
for row in sorted_qp_rows:
keys = row.get('keys', [])
if not keys or len(keys) < 2:
continue
query_key = keys[0]['keys'][0] if isinstance(keys[0], dict) else str(keys[0])
page_key = keys[1]['keys'][0] if isinstance(keys[1], dict) else str(keys[1])
clicks_val = row.get('clicks', 0) or 0
impr_val = row.get('impressions', 0) or 0
raw_ctr = row.get('ctr', None)
if raw_ctr is not None:
ctr_percent = round(float(raw_ctr) * 100, 2)
else:
ctr_percent = round(((clicks_val / impr_val) * 100), 2) if impr_val > 0 else 0.0
query_page_opportunities.append({
'query': query_key,
'page': page_key,
'clicks': clicks_val,
'impressions': impr_val,
'ctr': ctr_percent,
'position': round(row.get('position', 0) or 0, 2),
})
except Exception as e:
logger.warning(f"Failed processing query_page_opportunities: {e}")

# Detect Cannibalization (query mapping to multiple pages)
cannibalization = []
try:
Expand Down Expand Up @@ -382,6 +416,7 @@ def _process_gsc_metrics(self, search_analytics: Dict[str, Any]) -> Dict[str, An
'total_queries': len(top_queries_source) if top_queries_source else 0,
'top_queries': top_queries,
'top_pages': top_pages,
'query_page_opportunities': query_page_opportunities,
'cannibalization': cannibalization
}

Expand All @@ -397,6 +432,7 @@ def _process_gsc_metrics(self, search_analytics: Dict[str, Any]) -> Dict[str, An
'total_queries': 0,
'top_queries': [],
'top_pages': [],
'query_page_opportunities': [],
'error': str(e)
}

Expand Down
35 changes: 30 additions & 5 deletions backend/services/gsc_service.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,9 @@

from dotenv import load_dotenv

QUERY_PAGE_OPPORTUNITIES_ROW_LIMIT = 2500
QUERY_PAGE_OPPORTUNITIES_MAX_WINDOW_DAYS = 90

class GSCService:
"""Service for Google Search Console integration."""

Expand Down Expand Up @@ -514,15 +517,18 @@ def get_search_analytics(self, user_id: str, site_url: str,
page_rows = []
page_row_count = 0

# Step 5: Get query+page combined data for mapping queries to pages
# Step 5: Get query+page combined data for mapping queries to pages.
# Keep this request bounded because query-page combinations can grow quickly
# for larger date windows/sites.
qp_rows = []
qp_row_count = 0
try:
qp_start_date, qp_end_date = self._get_query_page_opportunity_window(start_date, end_date)
qp_request = {
'startDate': start_date,
'endDate': end_date,
'startDate': qp_start_date,
'endDate': qp_end_date,
'dimensions': ['query', 'page'],
'rowLimit': 1000
'rowLimit': QUERY_PAGE_OPPORTUNITIES_ROW_LIMIT
}
logger.info(f"GSC Query+Page request for user {user_id}: {qp_request}")
qp_response = service.searchanalytics().query(
Expand Down Expand Up @@ -553,7 +559,12 @@ def get_search_analytics(self, user_id: str, site_url: str,
},
'query_page_data': {
'rows': qp_rows,
'rowCount': qp_row_count
'rowCount': qp_row_count,
'requested_window': {
'startDate': qp_start_date,
'endDate': qp_end_date,
'rowLimit': QUERY_PAGE_OPPORTUNITIES_ROW_LIMIT,
},
},
'verification_data': {
'rows': verification_rows,
Expand Down Expand Up @@ -596,6 +607,20 @@ def get_search_analytics(self, user_id: str, site_url: str,
except Exception as e:
logger.error(f"Error getting search analytics for user {user_id}: {e}")
raise

def _get_query_page_opportunity_window(self, start_date: str, end_date: str) -> tuple[str, str]:
"""Build a bounded query-page window to prevent oversized opportunity payloads."""
try:
parsed_end = datetime.strptime(end_date, '%Y-%m-%d')
parsed_start = datetime.strptime(start_date, '%Y-%m-%d')
except Exception:
parsed_end = datetime.now()
parsed_start = parsed_end - timedelta(days=30)

max_window_start = parsed_end - timedelta(days=QUERY_PAGE_OPPORTUNITIES_MAX_WINDOW_DAYS - 1)
bounded_start = max(parsed_start, max_window_start)

return bounded_start.strftime('%Y-%m-%d'), parsed_end.strftime('%Y-%m-%d')

def get_sitemaps(self, user_id: str, site_url: str) -> List[Dict[str, Any]]:
"""Get sitemaps from GSC."""
Expand Down