-
Notifications
You must be signed in to change notification settings - Fork 2
Expand file tree
/
Copy pathshared.py
More file actions
396 lines (311 loc) · 14.5 KB
/
shared.py
File metadata and controls
396 lines (311 loc) · 14.5 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
"""
Shared Utilities Module for LinuxReport
This module contains core shared utilities, constants, and configuration management
for the LinuxReport project. It provides centralized access to common functionality
across the application including caching, mode management, RSS feed data structures,
and configuration handling.
Key Features:
- Configuration management and mode handling
- Distributed caching with diskcache and memory caches
- RSS feed data structures and utilities
- Lock management for distributed operations
- Application-wide constants and settings
- Dynamic configuration loading based on report modes
Note: Rate limiting, web bot detection, and request utilities have been moved to
request_utils.py. Application initialization and Flask setup have been moved to
app.py. Database models and cache wrappers have been moved to models.py.
Author: LinuxReport System
License: See LICENSE file
"""
# Standard library imports
import datetime
import os
import sys
from enum import Enum
from pathlib import Path
# Third-party imports
import diskcache
from cacheout import Cache
import ipaddress
# Flask-related imports
from flask_limiter import Limiter
from flask_restful import Api
# Local application imports
import FeedHistory
from SqliteLock import DiskcacheSqliteLock
from models import LockBase, DiskCacheWrapper, RssFeed, g_logger
from app_config import get_settings_config, get_allowed_domains, get_allowed_requester_domains, get_cdn_config, get_object_store_config, get_welcome_html, get_reports_config, get_storage_config, get_proxy_server, get_proxy_username, get_proxy_password
from request_utils import get_rate_limit_key, dynamic_rate_limit, get_ip_prefix, format_last_updated
# =============================================================================
# FLASK MONITORING DASHBOARD CONFIGURATION
# =============================================================================
# Flask-MonitoringDashboard configuration
FLASK_DASHBOARD = False
# =============================================================================
# BROWSER ENGINE CONFIGURATION
# =============================================================================
# Global flag to choose between Selenium and Playwright for web scraping
# Set to True to use Playwright, False to use Selenium
USE_PLAYWRIGHT = False
# =============================================================================
# CONFIGURATION LOADING AND SETTINGS
# =============================================================================
# Load configuration from centralized config manager
settings = get_settings_config()
# Export user-configurable settings
ALLOWED_DOMAINS = get_allowed_domains()
ALLOWED_REQUESTER_DOMAINS = get_allowed_requester_domains()
ENABLE_CORS = True
# =============================================================================
# MODE ENUMERATION AND CONFIGURATION
# =============================================================================
class Mode(str, Enum):
"""
Enumeration for different report modes using string values.
This enum defines the available report types in the system. It can be
dynamically extended with additional modes from the configuration file.
"""
# Base modes that are always available
LINUX_REPORT = "linux"
COVID_REPORT = "covid"
TECHNO_REPORT = "techno"
AI_REPORT = "ai"
PYTHON_REPORT = "python"
TRUMP_REPORT = "trump"
SPACE_REPORT = "space"
PV_REPORT = "pv"
ROBOT_REPORT = "robot"
@classmethod
def from_config(cls, config_modes):
"""
Create a new Mode enum with additional modes from config.
Args:
config_modes (list): List of mode configurations from config file
Returns:
Enum: New enum class with all base and configured modes
"""
# Start with base modes
mode_dict = {mode.name: mode.value for mode in cls}
# Add modes from config
for mode in config_modes:
name = mode['name'].upper()
if name not in mode_dict:
mode_dict[name] = mode['name']
# Create new enum class with all modes
return Enum('Mode', mode_dict, type=str)
# Create Mode enum with config modes
reports_config = get_reports_config()
Mode = Mode.from_config(reports_config.get('modes', []))
# Simple map from Mode enum to URL identifiers - identical to enum values
MODE_MAP = {mode: mode.value for mode in Mode}
# Config modules derived from mode names
CONFIG_MODULES = {mode: f"{mode.value}_report_settings" for mode in Mode}
# =============================================================================
# PATH AND STORAGE CONFIGURATION
# =============================================================================
# Path for code and cache
PATH = Path(__file__).parent
# Shared path for weather, etc.
storage_config = get_storage_config()
# Use test-friendly path when running tests
if 'pytest' in sys.modules or 'test' in sys.argv[0]:
SPATH = str(PATH / 'test_cache')
else:
SPATH = storage_config['shared_path']
TZ = FeedHistory.FeedConfig.TZ
# =============================================================================
# CACHE EXPIRATION CONSTANTS
# =============================================================================
# Cache expiration time constants (in seconds)
EXPIRE_MINUTES = 60 * 5 # 5 minutes
EXPIRE_HOUR = 3600 # 1 hour
EXPIRE_DAY = 3600 * 12 # 12 hours
EXPIRE_WEEK = 86400 * 7 # 7 days
EXPIRE_YEARS = 86400 * 365 * 2 # 2 years
# =============================================================================
# APPLICATION MODE AND VERSION SETTINGS
# =============================================================================
# Current application mode
MODE = Mode.ROBOT_REPORT
# URL cookie version for cache invalidation
URLS_COOKIE_VERSION = "2"
# Enable or disable URL customization functionality (both reordering and adding custom URLs)
ENABLE_URL_CUSTOMIZATION = True
# =============================================================================
# CDN AND IMAGE DELIVERY SETTINGS
# =============================================================================
# CDN and image settings from config
cdn_config = get_cdn_config()
CDN_IMAGE_URL = cdn_config['image_url']
ENABLE_URL_IMAGE_CDN_DELIVERY = cdn_config['enabled']
# =============================================================================
# OBJECT STORAGE CONFIGURATION
# =============================================================================
# Enable fetching non-custom feeds from object store instead of original URLs
object_store_config = get_object_store_config()
ENABLE_OBJECT_STORE_FEEDS = object_store_config['enabled']
OBJECT_STORE_FEED_URL = object_store_config['feed_url']
OBJECT_STORE_FEED_TIMEOUT = object_store_config['feed_timeout']
# Enable publishing feeds to object store when fetched
ENABLE_OBJECT_STORE_FEED_PUBLISH = object_store_config['enable_publish']
# =============================================================================
# USER INTERFACE SETTINGS
# =============================================================================
# Enable infinite scroll view mode for mobile
INFINITE_SCROLL_MOBILE = True
# Debug mode for infinite scroll (enables on desktop)
INFINITE_SCROLL_DEBUG = True
# =============================================================================
# REDDIT INTEGRATION SETTINGS
# =============================================================================
# When True:
# - workers.py will fetch Reddit feeds via the Reddit API, using fetch_reddit_feed_as_feedparser()
# from Reddit.py, instead of legacy Tor / HTML RSS scraping.
# When False (default):
# - Existing behavior is preserved; Reddit URLs are handled by the current RedditFetcher
# using RSS / Tor / feedparser logic.
ENABLE_REDDIT_API_FETCH = False
# =============================================================================
# GEOLOCATION SETTINGS
# =============================================================================
# Disable IP-based geolocation when user provides browser geolocation
# When True, the system will use default coordinates (Detroit) instead of IP-based location
# when browser geolocation is not available or denied
DISABLE_IP_GEOLOCATION = False
# Client geolocation configuration
# When True: Client geolocation is disabled, server uses IP-based location or defaults
# When False: Client geolocation is enabled, server respects client-provided coordinates
DISABLE_CLIENT_GEOLOCATION = True
# =============================================================================
# PROXYING SETTINGS
# =============================================================================
# Global flag to enable/disable worker proxying functionality
# When True, adds proxy headers to requests in workers.py, seleniumfetch.py, and Tor.py
# When False, requests are made directly without proxy headers
WORKER_PROXYING = False
# Proxy server configuration (loaded from config.yaml via app_config.py)
PROXY_SERVER = get_proxy_server()
PROXY_USERNAME = get_proxy_username()
PROXY_PASSWORD = get_proxy_password()
# =============================================================================
# RSS FEED CONFIGURATION
# =============================================================================
# Timeout value in seconds for RSS feed operations
RSS_TIMEOUT = 30
# Maximum number of items to process / remember in RSS feeds
MAX_ITEMS = 40
# Welcome message from config
WELCOME_HTML = get_welcome_html()
# =============================================================================
# DYNAMIC CONFIGURATION LOADING
# =============================================================================
# Load configuration module based on current mode
config_module_name = CONFIG_MODULES.get(MODE)
if not config_module_name:
raise ValueError("Invalid mode specified.")
config_settings = __import__(config_module_name, fromlist=["CONFIG"])
# Extract configuration values
ALL_URLS = config_settings.CONFIG.ALL_URLS
SITE_URLS = config_settings.CONFIG.SITE_URLS
USER_AGENT = config_settings.CONFIG.USER_AGENT
URL_IMAGES = config_settings.CONFIG.URL_IMAGES
FAVICON = URL_IMAGES + config_settings.CONFIG.FAVICON
LOGO_URL = URL_IMAGES + config_settings.CONFIG.LOGO_URL
WEB_DESCRIPTION = config_settings.CONFIG.WEB_DESCRIPTION
WEB_TITLE = config_settings.CONFIG.WEB_TITLE
WEB_TITLE = config_settings.CONFIG.WEB_TITLE
ABOVE_HTML_FILE = f"{MODE.value}reportabove.html"
CUSTOM_FETCH_CONFIG = config_settings.CONFIG.CUSTOM_FETCH_CONFIG
SITE_PATH = config_settings.CONFIG.PATH
DEFAULT_THEME = getattr(config_settings.CONFIG, 'DEFAULT_THEME', 'silver')
# Override image URLs with CDN if enabled
if ENABLE_URL_IMAGE_CDN_DELIVERY:
URL_IMAGES = CDN_IMAGE_URL
FAVICON = CDN_IMAGE_URL + config_settings.CONFIG.FAVICON
LOGO_URL = CDN_IMAGE_URL + config_settings.CONFIG.LOGO_URL
STANDARD_ORDER_STR = str(SITE_URLS)
# =============================================================================
# RATE LIMITING CONFIGURATION
# =============================================================================
# Initialize Flask-Limiter with dynamic rate limiting
limiter = Limiter(
key_func=get_rate_limit_key,
default_limits=["10 per minute"],
strategy="fixed-window"
)
# =============================================================================
# FLASK-RESTFUL API GLOBAL
# =============================================================================
# Global Flask-RESTful API instance (initialized in app.py)
API = None
def set_flask_restful_api(api_instance):
"""
Set the global Flask-RESTful API instance.
Args:
api_instance: Flask-RESTful API instance
"""
global API
API = api_instance
# =============================================================================
# GLOBAL CACHE INSTANCES
# =============================================================================
# Initialize global cache instances
history = FeedHistory.FeedHistory(data_file=f"{PATH}/feed_history-{str(MODE.value)}")
g_c = DiskCacheWrapper(PATH) # Private cache for each instance
g_cs = DiskCacheWrapper(SPATH) # Shared cache for all instances stored in /run/linuxreport, for weather, etc.
g_cm = Cache() # In-memory cache with per-item TTL
# =============================================================================
# LOCK MANAGEMENT
# =============================================================================
# Shared lock key for global fetch operations
GLOBAL_FETCH_MODE_LOCK_KEY = "global_fetch_mode"
# Selectable lock class and factory
LOCK_CLASS = DiskcacheSqliteLock
def get_lock(lock_name, owner_prefix=None):
"""
Get a DiskcacheSqliteLock instance for distributed locking.
Args:
lock_name: Name of the lock to acquire
owner_prefix: Prefix for lock owner identification
Returns:
Lock instance for distributed operations
"""
return DiskcacheSqliteLock(lock_name, g_cs.cache, owner_prefix)
# Original factory implementation (commented out for reference):
# def get_lock(lock_name, owner_prefix=None):
# """Factory to get a lock instance using the selected lock class."""
# if issubclass(LOCK_CLASS, FileLockWrapper):
# return LOCK_CLASS(lock_name)
# elif issubclass(LOCK_CLASS, DiskcacheSqliteLock):
# return LOCK_CLASS(lock_name, g_cs.cache, owner_prefix)
# else:
# raise TypeError(f"Unsupported lock class: {LOCK_CLASS}")
# =============================================================================
# CHAT CACHE CONFIGURATION
# =============================================================================
# Configuration for Chat Cache
# Set to True to use the shared cache (g_cs) for chat comments and banned IPs
# Set to False to use the site-specific cache (g_c)
USE_SHARED_CACHE_FOR_CHAT = False
def get_chat_cache() -> DiskCacheWrapper:
"""
Returns the cache instance to use for chat features based on configuration.
Returns:
DiskCacheWrapper: Appropriate cache instance for chat functionality
"""
return g_cs if USE_SHARED_CACHE_FOR_CHAT else g_c
# =============================================================================
# UTILITY FUNCTIONS
# =============================================================================
def clear_page_caches():
"""
Clear all page caches from the in-memory cache.
This function removes all cached page data to force fresh content
generation on the next request.
"""
# Get all keys from the cache
keys = list(g_cm.keys())
# Delete all keys that start with page-cache:
for key in keys:
if key.startswith('page-cache:'):
g_cm.delete(key)