Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions core/admin.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
from django.contrib import admin

from django.contrib import admin
# Register your models here.
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is this comment supposed to be here? Also, whenever possible, it would be best to add an extra empty line at the end of each file (that will get rid of the red symbols you see here)

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Those comments are there in the starter files when you install Django. I should have cleared them after I added my code. I will do that in my next commit.

I have always wondered why many files I see in repos online have an extra empty line at the bottom. Thanks for pointing this out to me😅

79 changes: 76 additions & 3 deletions core/models.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,76 @@
from django.db import models

# Create your models here.
from django.db import models
from typing import Optional


class Contributor(models.Model):
"""
Django model representing a pyOpenSci contributor.

This model mirrors the PersonModel from pyosMeta for future database migration.
Currently, contributor data is read directly from YAML files.
"""

# Basic information
name = models.CharField(max_length=255, null=True, blank=True)
github_username = models.CharField(max_length=100, unique=True)
github_image_id = models.IntegerField(null=True, blank=True)
bio = models.TextField(null=True, blank=True)
organization = models.CharField(max_length=255, null=True, blank=True)
location = models.CharField(max_length=255, null=True, blank=True)
email = models.EmailField(null=True, blank=True)

# Dates
date_added = models.DateField(null=True, blank=True)

# Role flags
deia_advisory = models.BooleanField(default=False)
editorial_board = models.BooleanField(default=False)
emeritus_editor = models.BooleanField(default=False)
advisory = models.BooleanField(default=False)
emeritus_advisory = models.BooleanField(default=False)
board = models.BooleanField(default=False)

# Social media and external links
twitter = models.CharField(max_length=50, null=True, blank=True)
mastodon = models.URLField(null=True, blank=True)
orcidid = models.CharField(max_length=50, null=True, blank=True)
website = models.URLField(null=True, blank=True)

# JSON fields for lists (SQLite compatible)
title = models.JSONField(default=list, blank=True)
partners = models.JSONField(default=list, blank=True)
contributor_type = models.JSONField(default=list, blank=True)
packages_eic = models.JSONField(default=list, blank=True)
packages_editor = models.JSONField(default=list, blank=True)
packages_submitted = models.JSONField(default=list, blank=True)
packages_reviewed = models.JSONField(default=list, blank=True)

# Metadata
sort = models.IntegerField(null=True, blank=True)
created_at = models.DateTimeField(auto_now_add=True)
updated_at = models.DateTimeField(auto_now=True)

class Meta:
ordering = ['-date_added', 'sort', 'name']
verbose_name = "Contributor"
verbose_name_plural = "Contributors"

def __str__(self) -> str:
return self.display_name

@property
def display_name(self) -> str:
"""Return name if available, otherwise GitHub username."""
return self.name or f"@{self.github_username}"

@property
def github_avatar_url(self) -> Optional[str]:
"""Generate GitHub avatar URL from image ID."""
if self.github_image_id:
return f"https://avatars.githubusercontent.com/u/{self.github_image_id}?s=400&v=4"
return None

@property
def github_profile_url(self) -> str:
"""Generate GitHub profile URL."""
return f"https://github.com/{self.github_username}"
247 changes: 247 additions & 0 deletions core/utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,247 @@
"""
Utility functions for working with contributor data.
This module provides functions to fetch and parse contributor data from YAML files,
following the same format used by the Jekyll site and pyosMeta package.
"""

import yaml
import logging
from datetime import datetime
from typing import List, Dict, Any, Optional
from urllib.request import urlopen
from urllib.error import URLError

logger = logging.getLogger(__name__)


class ContributorDataError(Exception):
"""Custom exception for contributor data related errors."""
pass


def fetch_contributors_yaml(url: str = None) -> List[Dict[str, Any]]:
"""
Fetch contributor data from YAML source.
Args:
url: URL to fetch YAML from. If None, uses the default pyOpenSci GitHub URL.
Returns:
List of contributor dictionaries.
Raises:
ContributorDataError: If data cannot be fetched or parsed.
"""
if url is None:
url = "https://raw.githubusercontent.com/pyOpenSci/pyopensci.github.io/main/_data/contributors.yml"

try:
with urlopen(url) as response:
yaml_content = response.read().decode('utf-8')
contributors = yaml.safe_load(yaml_content)

if not isinstance(contributors, list):
raise ContributorDataError("YAML data should be a list of contributors")

logger.info(f"Successfully fetched {len(contributors)} contributors from {url}")
return contributors

except URLError as e:
logger.error(f"Failed to fetch contributors from {url}: {e}")
raise ContributorDataError(f"Network error: {e}")
except yaml.YAMLError as e:
logger.error(f"Failed to parse YAML: {e}")
raise ContributorDataError(f"YAML parsing error: {e}")
except Exception as e:
logger.error(f"Unexpected error fetching contributors: {e}")
raise ContributorDataError(f"Unexpected error: {e}")


def parse_contributor_date(date_str: Any) -> Optional[datetime]:
"""
Parse a date string into a datetime object.
Args:
date_str: Date string in various formats.
Returns:
Parsed datetime object or None if parsing fails.
"""
if not date_str:
return None

# Convert to string if it's not already
date_str = str(date_str).strip()

# Try common date formats
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm curious about this - are we parsing dates rom the yaml file? If we are, then pyosmeta has already cleaned up the date in that file and it should be consistent (but perhaps you saw something that I missed?)

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@Phinart98 did you have any insight into this?

date_formats = [
'%Y-%m-%d',
'%Y/%m/%d',
'%m/%d/%Y',
'%d/%m/%Y'
]

for fmt in date_formats:
try:
return datetime.strptime(date_str, fmt)
except ValueError:
continue

logger.warning(f"Could not parse date: {date_str}")
return None


def clean_contributor_data(contributor: Dict[str, Any]) -> Dict[str, Any]:
"""
Clean and normalize contributor data.
Args:
contributor: Raw contributor dictionary from YAML.
Returns:
Cleaned contributor dictionary.
"""
cleaned = {}

# Required field
if 'github_username' not in contributor:
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@banesullivan I'm curious what you think about this here. I worry about trying to clean data in multiple places vs handling cleaning when we parse the data over in pyosMeta.

@Phinart98, we have a workflow that generates that YAML file, and it uses helpers in pyosMeta to clean the data. I just think we should do that once - rather than twice UNLESS there is a good reason to do it here.

raise ContributorDataError("Contributor missing required field: github_username")

cleaned['github_username'] = contributor['github_username']

# String fields - strip whitespace
string_fields = [
'name', 'bio', 'organization', 'location', 'email',
'twitter', 'mastodon', 'orcidid', 'website'
]

for field in string_fields:
value = contributor.get(field)
if value and str(value).strip():
cleaned[field] = str(value).strip()

# Integer fields
for field in ['github_image_id', 'sort']:
value = contributor.get(field)
if value is not None:
try:
cleaned[field] = int(value)
except (ValueError, TypeError):
logger.warning(f"Invalid {field} for {contributor.get('github_username')}: {value}")

# Date field
date_added = contributor.get('date_added')
if date_added:
parsed_date = parse_contributor_date(date_added)
if parsed_date:
cleaned['date_added'] = parsed_date.date()

# Boolean fields
boolean_fields = [
'deia_advisory', 'editorial_board', 'emeritus_editor',
'advisory', 'emeritus_advisory', 'board'
]

for field in boolean_fields:
value = contributor.get(field)
if value is not None:
cleaned[field] = bool(value)

# List fields - ensure they're lists
list_fields = [
'title', 'partners', 'contributor_type', 'packages_eic',
'packages_editor', 'packages_submitted', 'packages_reviewed'
]

for field in list_fields:
value = contributor.get(field, [])
if value is None:
cleaned[field] = []
elif isinstance(value, list):
# Filter out empty/None values and strip strings
cleaned[field] = [
str(item).strip() for item in value
if item is not None and str(item).strip()
]
else:
# Single value, convert to list
cleaned[field] = [str(value).strip()] if str(value).strip() else []

return cleaned


def get_recent_contributors(count: int = 4) -> List[Dict[str, Any]]:
"""
Get the most recent contributors.
Args:
count: Number of recent contributors to return.
Returns:
List of recent contributor dictionaries, sorted by date_added descending.
"""
try:
contributors = fetch_contributors_yaml()

# Clean all contributor data
cleaned_contributors = []
for contributor in contributors:
try:
cleaned = clean_contributor_data(contributor)
cleaned_contributors.append(cleaned)
except ContributorDataError as e:
logger.warning(f"Skipping invalid contributor: {e}")
continue

# Sort by date_added (newest first), then by sort field
def sort_key(contributor):
date_added = contributor.get('date_added')
sort_value = contributor.get('sort', 999999) # Default high sort value

# Contributors without dates go to the end
if date_added is None:
return (datetime.min.date(), sort_value)

return (date_added, sort_value)

sorted_contributors = sorted(
cleaned_contributors,
key=sort_key,
reverse=True
)

return sorted_contributors[:count]

except ContributorDataError as e:
logger.error(f"Failed to get recent contributors: {e}")
return []
except Exception as e:
logger.error(f"Unexpected error getting recent contributors: {e}")
return []


def generate_github_avatar_url(github_image_id: int) -> str:
"""
Generate GitHub avatar URL from image ID.
Args:
github_image_id: GitHub user's image ID.
Returns:
GitHub avatar URL.
"""
return f"https://avatars.githubusercontent.com/u/{github_image_id}?s=400&v=4"


def generate_github_profile_url(github_username: str) -> str:
"""
Generate GitHub profile URL from username.
Args:
github_username: GitHub username.
Returns:
GitHub profile URL.
"""
return f"https://github.com/{github_username}"
47 changes: 37 additions & 10 deletions core/views.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,38 @@
from django.shortcuts import render


def home(request):
"""Homepage view for PyOpenSci."""
context = {
'page_title': 'Welcome to pyOpenSci',
'hero_title': 'We make it easier for scientists to create, find, maintain, and contribute to reusable code and software.',
'hero_subtitle': 'pyOpenSci broadens participation in scientific open source by breaking down social and technical barriers. Join our global community.',
}
from django.shortcuts import render
import logging

from .utils import get_recent_contributors, generate_github_avatar_url, generate_github_profile_url

logger = logging.getLogger(__name__)


def home(request):
"""Homepage view for PyOpenSci."""
# Fetch recent contributors from YAML
recent_contributors = get_recent_contributors(count=4)

# Enhance contributor data with computed properties
for contributor in recent_contributors:
# Add avatar URL if github_image_id exists
if contributor.get('github_image_id'):
contributor['github_avatar_url'] = generate_github_avatar_url(
contributor['github_image_id']
)

# Add profile URL
contributor['github_profile_url'] = generate_github_profile_url(
contributor['github_username']
)

# Add display name
contributor['display_name'] = (
contributor.get('name') or f"@{contributor['github_username']}"
)

context = {
'page_title': 'Welcome to pyOpenSci',
'hero_title': 'We make it easier for scientists to create, find, maintain, and contribute to reusable code and software.',
'hero_subtitle': 'pyOpenSci broadens participation in scientific open source by breaking down social and technical barriers. Join our global community.',
'recent_contributors': recent_contributors,
}
return render(request, 'core/home.html', context)
1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -6,4 +6,5 @@ requires-python = ">=3.12"
dependencies = [
"Django>=5.1.6",
"wagtail>=7.0.2",
"PyYAML>=6.0",
]
Loading