Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions ynr/apps/candidatebot/helpers.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import contextlib
import re

import pypandoc
Expand Down Expand Up @@ -196,3 +197,10 @@ def add_theyworkforyou_id(self, twfy_id):
value = f"https://www.theyworkforyou.com/mp/{twfy_id}/"
internal_id = f"uk.org.publicwhip/person/{twfy_id}"
self.edit_field("theyworkforyou", value, internal_id=internal_id)

def remove_person_identifier(self, identifier):
with contextlib.suppress(PersonIdentifier.DoesNotExist):
self.person.get_all_identifiers.remove(identifier)
self.person.save()
identifier.delete()
return self.person
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
from typing import List
from urllib.parse import urlparse

import requests
from candidatebot.helpers import CandidateBot
from django.core.management.base import BaseCommand
from people.models import Person
from popolo.models import Membership


def get_domain(url):
parsed_url = urlparse(url)
return parsed_url.netloc


def is_facebook_url(url):
domain = get_domain(url)
return "facebook.com" in domain or "fb.com" in domain

Check failure

Code scanning / CodeQL

Incomplete URL substring sanitization

The string [facebook.com](1) may be at an arbitrary position in the sanitized URL.

Check failure

Code scanning / CodeQL

Incomplete URL substring sanitization

The string [fb.com](1) may be at an arbitrary position in the sanitized URL.


class Command(BaseCommand):
"""
Test and remove inactive or dead links from Person objects.
"""

def add_arguments(self, parser):
parser.add_argument(
"--person-id",
help="Person ID to test",
)

def handle(self, *args, **options):
"""
Iterate over all Person objects and check if the
person identifier urls return a 200 status code.
"""
inactive_links: List[List] = []
# facebook_url is any url with facebook or fb in the url
memberships = Membership.objects.filter(
ballot__election__slug="parl.2024-07-04"
)

people = Person.objects.all().filter(memberships__in=memberships)
for person in people:
person_identifiers = person.get_all_identifiers
person_identifiers = [
identifier
for identifier in person_identifiers
if identifier.value.startswith("http")
]

if not person_identifiers:
continue
for identifier in person_identifiers:
resp = None
try:
resp = requests.get(identifier.value, timeout=2).status_code
except requests.exceptions.RequestException as e:
self.stdout.write(
f"Request exception: {e} for {person.name}"
)
pass
if resp == 404 and not is_facebook_url(identifier.value):
self.stdout.write(
f"Status code: {resp} for {person.name} {identifier.value}"
)
inactive_links.append(
[
str(person.pk),
person.name,
identifier.value,
str(resp),
]
)
# delete the identifier from the person identifiers
bot = CandidateBot(person.pk, ignore_errors=True)
bot.remove_person_identifier(identifier)
print(
f"Candidatebot deleted {identifier.value_type}:{identifier.value} from {person.name}"
)
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
from unittest import TestCase

import pytest
from candidates.models.popolo_extra import Ballot
from candidates.tests.factories import (
ElectionFactory,
MembershipFactory,
PostFactory,
)
from django.core.management import call_command
from parties.tests.factories import PartyFactory
from people.tests.factories import PersonFactory, PersonIdentifierFactory


class TestPersonIdentifiers(TestCase):
def setUp(self):
self.person = PersonFactory.create()
# 200 example
PersonIdentifierFactory.create(
person=self.person,
value="https://en.wikipedia.org/wiki/Rishi_Sunak",
value_type="https://en.wikipedia_url",
)
# 404 example
PersonIdentifierFactory.create(
person=self.person,
value="http://www.conservatives.com/about/our-team/example.com",
value_type="party_ppc_page_url",
)
post = PostFactory.create(slug="parl.2024-07-04")

election = ElectionFactory.create(
slug="parl.2024-07-04",
election_date="2024-07-04",
name="2024 General Election",
)
ballot = Ballot.objects.create(
election=election, post=post, ballot_paper_id="parl.2024-07-04"
)
party = PartyFactory.create()
MembershipFactory.create(
person=self.person,
post=post,
party=party,
ballot=ballot,
)

@pytest.mark.django_db
def test_remove_inactive_person_identifiers(self):
self.assertEqual(len(self.person.get_all_identifiers), 2)
self.assertEqual(
self.person.get_all_identifiers[0].value,
"https://en.wikipedia.org/wiki/Rishi_Sunak",
)
self.assertEqual(
self.person.get_all_identifiers[1].value,
"http://www.conservatives.com/about/our-team/example.com",
)

call_command(
"candidatebot_remove_inactive_person_links",
"--person-id",
self.person.id,
)
self.person.refresh_from_db()
self.assertEqual(len(self.person.get_all_identifiers), 1)
self.assertEqual(
self.person.get_all_identifiers[0].value,
"https://en.wikipedia.org/wiki/Rishi_Sunak",
)