diff --git a/apps/blogs/migrations/0004_auto_20260222_0834.py b/apps/blogs/migrations/0004_auto_20260222_0834.py new file mode 100644 index 000000000..3fed07340 --- /dev/null +++ b/apps/blogs/migrations/0004_auto_20260222_0834.py @@ -0,0 +1,24 @@ +# Generated by Django 5.2.11 on 2026-02-22 08:34 + +from django.db import migrations + + +def rewrite_blog_urls(apps, schema_editor): + BlogEntry = apps.get_model("blogs", "BlogEntry") + entries_to_update = [] + for entry in BlogEntry.objects.filter(url__contains="pythoninsider.blogspot.com"): + entry.url = entry.url.replace("pythoninsider.blogspot.com", "blog.python.org") + entries_to_update.append(entry) + if entries_to_update: + BlogEntry.objects.bulk_update(entries_to_update, ['url']) + + +class Migration(migrations.Migration): + + dependencies = [ + ('blogs', '0003_alter_relatedblog_creator_and_more'), + ] + + operations = [ + migrations.RunPython(rewrite_blog_urls, migrations.RunPython.noop), + ] diff --git a/apps/blogs/parser.py b/apps/blogs/parser.py index 8f557dd9a..e918de9c5 100644 --- a/apps/blogs/parser.py +++ b/apps/blogs/parser.py @@ -18,11 +18,16 @@ def get_all_entries(feed_url): for e in d["entries"]: published = datetime.datetime(*e["published_parsed"][:7], tzinfo=datetime.UTC) + # Rewrite Blogger domains to canonical python.org domain (Issue #2685) + url = e["link"].replace( + "//pythoninsider.blogspot.com", + "//blog.python.org" + ) entry = { "title": e["title"], "summary": e.get("summary", ""), "pub_date": published, - "url": e["link"], + "url": url, } entries.append(entry) diff --git a/apps/blogs/tests/test_parser.py b/apps/blogs/tests/test_parser.py index 1b1a83ae8..1857ce66c 100644 --- a/apps/blogs/tests/test_parser.py +++ b/apps/blogs/tests/test_parser.py @@ -1,5 +1,6 @@ import datetime import unittest +from unittest.mock import patch from apps.blogs.parser import get_all_entries from apps.blogs.tests.utils import get_test_rss_path @@ -24,3 +25,26 @@ def test_entries(self): self.entries[0]["url"], "http://feedproxy.google.com/~r/PythonInsider/~3/tGNCqyOiun4/introducing-electronic-contributor.html", ) + + @patch("apps.blogs.parser.feedparser.parse") + def test_rewrites_blogspot_url(self, mock_parse): + mock_parse.return_value = { + "entries": [ + { + "title": "Test Title HTTPS", + "summary": "Summary", + "published_parsed": (2024, 1, 15, 12, 0, 0, 0, 0, 0), + "link": "https://pythoninsider.blogspot.com/2024/01/test.html", + }, + { + "title": "Test Title HTTP", + "summary": "Summary", + "published_parsed": (2024, 1, 15, 12, 0, 0, 0, 0, 0), + "link": "http://pythoninsider.blogspot.com/2024/01/test2.html", + } + ] + } + entries = get_all_entries("http://fake.url") + self.assertEqual(len(entries), 2) + self.assertEqual(entries[0]["url"], "https://blog.python.org/2024/01/test.html") + self.assertEqual(entries[1]["url"], "http://blog.python.org/2024/01/test2.html")