-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathgithub_repo_scanner.py
More file actions
392 lines (328 loc) Β· 14.6 KB
/
github_repo_scanner.py
File metadata and controls
392 lines (328 loc) Β· 14.6 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
#!/usr/bin/env python3
"""
BrainSAIT GitHub Repository Scanner
Automatically discovers and categorizes your GitHub repositories for store integration
"""
import requests
import json
import os
from datetime import datetime, timedelta
from typing import List, Dict, Any
class BrainSAITRepoScanner:
def __init__(self, github_token: str, username: str = "fadil369"):
self.github_token = github_token
self.username = username
self.headers = {
"Authorization": f"token {github_token}",
"Accept": "application/vnd.github.v3+json"
}
self.base_url = "https://api.github.com"
def get_repositories(self) -> List[Dict]:
"""Fetch all public repositories for the user"""
repos = []
page = 1
while True:
url = f"{self.base_url}/users/{self.username}/repos"
params = {
"type": "owner",
"sort": "updated",
"per_page": 100,
"page": page
}
response = requests.get(url, headers=self.headers, params=params)
if response.status_code != 200:
print(f"Error fetching repositories: {response.status_code}")
break
batch = response.json()
if not batch:
break
repos.extend(batch)
page += 1
print(f"Found {len(repos)} repositories")
return repos
def get_repository_details(self, repo_name: str) -> Dict:
"""Get detailed information about a specific repository"""
url = f"{self.base_url}/repos/{self.username}/{repo_name}"
response = requests.get(url, headers=self.headers)
if response.status_code == 200:
return response.json()
return {}
def get_repository_readme(self, repo_name: str) -> str:
"""Get repository README content"""
url = f"{self.base_url}/repos/{self.username}/{repo_name}/readme"
response = requests.get(url, headers=self.headers)
if response.status_code == 200:
readme_data = response.json()
if readme_data.get('encoding') == 'base64':
import base64
return base64.b64decode(readme_data['content']).decode('utf-8')
return ""
def categorize_repository(self, repo: Dict) -> str:
"""Categorize repository based on language and description"""
language = (repo.get('language') or '').lower()
description = (repo.get('description') or '').lower()
name = repo.get('name', '').lower()
# AI/ML projects
ai_keywords = ['ai', 'ml', 'machine learning', 'neural', 'tensorflow', 'pytorch',
'openai', 'llm', 'chatbot', 'nlp', 'computer vision']
if any(keyword in description or keyword in name for keyword in ai_keywords):
return 'ai'
# Web applications
web_keywords = ['website', 'web', 'frontend', 'backend', 'api', 'server']
web_languages = ['javascript', 'typescript', 'html', 'css', 'php', 'ruby']
if (language in web_languages or
any(keyword in description or keyword in name for keyword in web_keywords)):
return 'websites'
# Mobile apps
mobile_keywords = ['mobile', 'ios', 'android', 'app', 'flutter', 'react native']
mobile_languages = ['swift', 'kotlin', 'dart', 'objective-c']
if (language in mobile_languages or
any(keyword in description or keyword in name for keyword in mobile_keywords)):
return 'apps'
# Educational content
edu_keywords = ['tutorial', 'guide', 'course', 'learning', 'documentation',
'book', 'cookbook', 'examples']
if any(keyword in description or keyword in name for keyword in edu_keywords):
return 'ebooks'
# Templates
template_keywords = ['template', 'boilerplate', 'starter', 'scaffold']
if any(keyword in description or keyword in name for keyword in template_keywords):
return 'templates'
# Default to tools
return 'tools'
def calculate_pricing(self, repo: Dict, category: str) -> int:
"""Calculate pricing based on repository metrics and category"""
base_prices = {
'ai': 1499,
'apps': 2999,
'websites': 1999,
'courses': 2499,
'ebooks': 199,
'templates': 899,
'tools': 599
}
base_price = base_prices.get(category, 599)
# Adjust based on stars (popularity)
stars = repo.get('stargazers_count', 0)
if stars > 100:
base_price += 500
elif stars > 50:
base_price += 200
elif stars > 10:
base_price += 100
# Adjust based on recent activity
updated_at = repo.get('updated_at', '')
if updated_at:
try:
update_date = datetime.strptime(updated_at, "%Y-%m-%dT%H:%M:%SZ")
days_since_update = (datetime.now() - update_date).days
if days_since_update < 30:
base_price += 200 # Recently maintained
elif days_since_update < 90:
base_price += 100
except:
pass
return base_price
def generate_arabic_title(self, title: str, category: str) -> str:
"""Generate Arabic title based on category"""
category_translations = {
'ai': 'Ψ°ΩΩ',
'apps': 'ΨͺΨ·Ψ¨ΩΩ',
'websites': 'Ω
ΩΩΨΉ',
'courses': 'Ψ―ΩΨ±Ψ©',
'ebooks': 'ΩΨͺΨ§Ψ¨',
'templates': 'ΩΨ§ΩΨ¨',
'tools': 'Ψ£Ψ―Ψ§Ψ©'
}
prefix = category_translations.get(category, 'Ω
ΩΨͺΨ¬')
return f"{prefix} {title}"
def get_features_from_readme(self, readme_content: str) -> List[str]:
"""Extract features from README content"""
features = []
if not readme_content:
return ["π Open Source", "β Community Driven", "π§ Customizable", "π± Production Ready"]
# Look for common feature indicators
lines = readme_content.lower().split('\n')
feature_indicators = ['features', 'capabilities', 'what it does', 'includes']
in_features_section = False
for line in lines:
if any(indicator in line for indicator in feature_indicators):
in_features_section = True
continue
if in_features_section:
if line.strip().startswith(('- ', '* ', '+ ')):
feature = line.strip()[2:].strip()
if len(feature) > 10 and len(feature) < 80:
features.append(feature.capitalize())
if len(features) >= 6:
break
elif line.strip() == '' or line.startswith('#'):
break
# Default features if none found
if not features:
features = [
"π Well Documented",
"β Production Ready",
"π§ Easy to Setup",
"π High Performance"
]
return features[:4] # Limit to 4 features
def analyze_repository(self, repo: Dict) -> Dict[str, Any]:
"""Analyze repository and prepare store entry data"""
category = self.categorize_repository(repo)
price = self.calculate_pricing(repo, category)
# Get README for features
readme = self.get_repository_readme(repo['name'])
features = self.get_features_from_readme(readme)
# Determine badge
badge = "OPEN SOURCE"
badge_type = "new"
if repo.get('stargazers_count', 0) > 50:
badge = "POPULAR"
badge_type = "hot"
elif self.is_recently_updated(repo.get('updated_at', '')):
badge = "UPDATED"
badge_type = "new"
# Category icons
icons = {
'ai': 'π€',
'apps': 'π±',
'websites': 'π',
'courses': 'π',
'ebooks': 'π',
'templates': 'π',
'tools': 'π οΈ'
}
title = repo['name'].replace('-', ' ').replace('_', ' ').title()
return {
'id': f"gh_{repo['id']}",
'category': category,
'title': title,
'arabicTitle': self.generate_arabic_title(title, category),
'description': repo.get('description', 'Open source solution from BrainSAIT'),
'price': price,
'badge': badge,
'badgeType': badge_type,
'icon': icons.get(category, 'π οΈ'),
'features': features,
'github_url': repo['html_url'],
'clone_url': repo['clone_url'],
'stars': repo.get('stargazers_count', 0),
'forks': repo.get('forks_count', 0),
'language': repo.get('language', 'Multiple'),
'created_at': repo.get('created_at', ''),
'updated_at': repo.get('updated_at', ''),
'demo': self.generate_demo_content(repo, category)
}
def is_recently_updated(self, updated_at: str) -> bool:
"""Check if repository was updated in last 3 months"""
if not updated_at:
return False
try:
update_date = datetime.strptime(updated_at, "%Y-%m-%dT%H:%M:%SZ")
three_months_ago = datetime.now() - timedelta(days=90)
return update_date > three_months_ago
except:
return False
def generate_demo_content(self, repo: Dict, category: str) -> Dict:
"""Generate demo content for the repository"""
title = repo['name'].replace('-', ' ').replace('_', ' ').title()
category_descriptions = {
'ai': 'AI-powered solution with machine learning capabilities',
'apps': 'Mobile application with native performance',
'websites': 'Modern web application with responsive design',
'courses': 'Educational content with hands-on examples',
'ebooks': 'Comprehensive guide with practical insights',
'templates': 'Ready-to-use template for rapid development',
'tools': 'Utility tool for enhanced productivity'
}
return {
'title': f"{title} - Live Demo",
'arabicTitle': f"ΨΉΨ±ΨΆ Ω
Ψ¨Ψ§Ψ΄Ψ± - {title}",
'preview': f"π Explore {title} - {category_descriptions.get(category, 'Open source solution')}. Built with {repo.get('language', 'modern technologies')} and designed for production use.",
'features': [
{
'icon': 'π',
'title': 'Source Code',
'desc': 'Full access to well-documented, production-ready code'
},
{
'icon': 'π',
'title': 'Documentation',
'desc': 'Comprehensive setup guides and API documentation'
},
{
'icon': 'π§',
'title': 'Customizable',
'desc': 'Easy to modify and extend for your specific needs'
},
{
'icon': 'π',
'title': 'Deploy Ready',
'desc': 'Optimized for production deployment and scaling'
}
]
}
def should_include_in_store(self, repo: Dict) -> bool:
"""Determine if repository should be included in store"""
# Skip forks, archived repos, and repos without descriptions
if (repo.get('fork', False) or
repo.get('archived', False) or
not repo.get('description')):
return False
# Skip very old repositories (over 2 years without updates)
updated_at = repo.get('updated_at', '')
if updated_at:
try:
update_date = datetime.strptime(updated_at, "%Y-%m-%dT%H:%M:%SZ")
two_years_ago = datetime.now() - timedelta(days=730)
if update_date < two_years_ago:
return False
except:
pass
return True
def scan_and_generate_store_entries(self) -> List[Dict]:
"""Main method to scan repositories and generate store entries"""
print("π Scanning GitHub repositories...")
repositories = self.get_repositories()
store_entries = []
for repo in repositories:
if self.should_include_in_store(repo):
print(f"β
Processing: {repo['name']}")
store_entry = self.analyze_repository(repo)
store_entries.append(store_entry)
else:
print(f"βοΈ Skipping: {repo['name']} (fork/archived/no description)")
print(f"\nπ Generated {len(store_entries)} store entries")
return store_entries
def save_to_file(self, store_entries: List[Dict], filename: str = "brainsait_github_products.json"):
"""Save store entries to JSON file"""
with open(filename, 'w', encoding='utf-8') as f:
json.dump(store_entries, f, indent=2, ensure_ascii=False)
print(f"πΎ Saved to {filename}")
def main():
"""Main execution function"""
# Get GitHub token from environment variable
github_token = os.environ.get('GITHUB_TOKEN')
if not github_token:
print("β Error: GITHUB_TOKEN environment variable not set")
print("Please set your GitHub personal access token:")
print("export GITHUB_TOKEN='your_token_here'")
return
# Initialize scanner
scanner = BrainSAITRepoScanner(github_token)
# Scan repositories and generate store entries
store_entries = scanner.scan_and_generate_store_entries()
# Save to file
scanner.save_to_file(store_entries)
# Print summary
print("\nπ Summary:")
categories = {}
for entry in store_entries:
cat = entry['category']
categories[cat] = categories.get(cat, 0) + 1
for category, count in categories.items():
print(f" {category}: {count} products")
print(f"\nπ Ready to integrate {len(store_entries)} products into BrainSAIT store!")
if __name__ == "__main__":
main()