Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion augur/application/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,6 @@ def redact_setting_value(section_name, setting_name, value):
"rebuild_caches": 1,
"run_analysis": 1,
"run_facade_contributors": 1,
"facade_contributor_full_recollect": 0,
"commit_messages": 1,
},
"Server": {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -131,7 +131,6 @@ def __init__(self,logger: Logger):
self.rebuild_caches = worker_options["rebuild_caches"]
self.multithreaded = worker_options["multithreaded"]
self.create_xlsx_summary_files = worker_options["create_xlsx_summary_files"]
self.facade_contributor_full_recollect = worker_options["facade_contributor_full_recollect"]
self.commit_messages = worker_options["commit_messages"]

self.tool_source = "Facade"
Expand Down
120 changes: 48 additions & 72 deletions augur/tasks/github/facade_github/tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -196,54 +196,25 @@ def insert_facade_contributors(self, repo_git):
repo_id = repo.repo_id
facade_helper = FacadeHelper(logger)

with get_session() as session:
query = session.query(CollectionStatus).filter(CollectionStatus.repo_id == repo.repo_id)
collection_status = execute_session_query(query,'one')
last_collected_date = collection_status.facade_data_last_collected if not facade_helper.facade_contributor_full_recollect else None

# Get all of the commit data's emails and names from the commit table that do not appear
# in the contributors table or the contributors_aliases table.
# Find commits not yet linked to a contributor (cmt_ght_author_id IS NULL),
# skipping emails already marked unresolvable.

logger.info(
"Beginning process to insert contributors from facade commits for repo w entry info: {}\n".format(repo_id))
new_contrib_sql = s.sql.text("""
SELECT DISTINCT
commits.cmt_author_name AS NAME,
commits.cmt_commit_hash AS hash,
commits.cmt_author_raw_email AS email_raw,
'not_unresolved' as resolution_status
FROM
commits
WHERE
commits.repo_id = :repo_id
AND (:since_date is NULL OR commits.data_collection_date > :since_date)
AND (NOT EXISTS ( SELECT contributors.cntrb_canonical FROM contributors WHERE contributors.cntrb_canonical = commits.cmt_author_raw_email )
or NOT EXISTS ( SELECT contributors_aliases.alias_email from contributors_aliases where contributors_aliases.alias_email = commits.cmt_author_raw_email)
AND ( commits.cmt_author_name ) IN ( SELECT C.cmt_author_name FROM commits AS C WHERE C.repo_id = :repo_id GROUP BY C.cmt_author_name ))
GROUP BY
commits.cmt_author_name,
commits.cmt_commit_hash,
commits.cmt_author_raw_email
UNION
SELECT DISTINCT
commits.cmt_author_name AS NAME,--commits.cmt_id AS id,
commits.cmt_commit_hash AS hash,
commits.cmt_author_raw_email AS email_raw,
'unresolved' as resolution_status
FROM
commits
WHERE
commits.repo_id = :repo_id
AND (:since_date is NULL OR commits.data_collection_date > :since_date)
AND EXISTS ( SELECT unresolved_commit_emails.email FROM unresolved_commit_emails WHERE unresolved_commit_emails.email = commits.cmt_author_raw_email )
AND ( commits.cmt_author_name ) IN ( SELECT C.cmt_author_name FROM commits AS C WHERE C.repo_id = :repo_id GROUP BY C.cmt_author_name )
GROUP BY
commits.cmt_author_name,
commits.cmt_commit_hash,
commits.cmt_author_raw_email
ORDER BY
hash
""").bindparams(repo_id=repo_id,since_date=last_collected_date)
SELECT DISTINCT
commits.cmt_author_name AS NAME,
commits.cmt_commit_hash AS hash,
commits.cmt_author_raw_email AS email_raw
FROM
augur_data.commits
WHERE
commits.repo_id = :repo_id AND
commits.cmt_ght_author_id IS NULL AND
commits.cmt_author_raw_email NOT IN (
SELECT email FROM augur_data.unresolved_commit_emails
)
""").bindparams(repo_id=repo_id)

#Execute statement with session.
result = execute_sql(new_contrib_sql)
Expand Down Expand Up @@ -278,37 +249,42 @@ def insert_facade_contributors(self, repo_git):

logger.debug("DEBUG: Got through the new_contribs")

# sql query used to find corresponding cntrb_id's of emails found in the contributor's table
# i.e., if a contributor already exists, we use it!
# Match unlinked commits to contributors via email from any source (cntrb_email, canonical email, or alias).
resolve_email_to_cntrb_id_sql = s.sql.text("""
SELECT DISTINCT
cntrb_id,
contributors.cntrb_login AS login,
contributors.cntrb_canonical AS email,
commits.cmt_author_raw_email
FROM
contributors,
commits
WHERE
contributors.cntrb_canonical = commits.cmt_author_raw_email
AND (:since_date is NULL OR commits.data_collection_date > :since_date)
AND commits.repo_id = :repo_id
UNION
SELECT DISTINCT
contributors_aliases.cntrb_id,
contributors.cntrb_login as login,
contributors_aliases.alias_email AS email,
commits.cmt_author_raw_email
WITH email_to_contributor AS (
SELECT cntrb_email AS email, cntrb_id
FROM augur_data.contributors
WHERE cntrb_email IS NOT NULL

UNION ALL

SELECT cntrb_canonical AS email, cntrb_id
FROM augur_data.contributors
WHERE cntrb_canonical IS NOT NULL

UNION ALL

SELECT alias_email AS email, cntrb_id
FROM augur_data.contributors_aliases
WHERE alias_email IS NOT NULL
),
deduplicated AS (
SELECT DISTINCT ON (email) email, cntrb_id
FROM email_to_contributor
ORDER BY email
)
SELECT
d.cntrb_id,
c.cmt_author_email AS email
FROM
contributors,
contributors_aliases,
commits
augur_data.commits c
INNER JOIN
deduplicated d
ON c.cmt_author_email = d.email
WHERE
contributors_aliases.alias_email = commits.cmt_author_raw_email
AND contributors.cntrb_id = contributors_aliases.cntrb_id
AND commits.repo_id = :repo_id
AND (:since_date is NULL OR commits.data_collection_date > :since_date)
""").bindparams(repo_id=repo_id,since_date=last_collected_date)
c.cmt_ght_author_id IS NULL AND
c.repo_id = :repo_id
""").bindparams(repo_id=repo_id)


result = execute_sql(resolve_email_to_cntrb_id_sql)
Expand Down
Loading