Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[MC-1539] Provide backup stories for "Need to Know" feed #671

Open
wants to merge 5 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
31 changes: 29 additions & 2 deletions merino/curated_recommendations/provider.py
Original file line number Diff line number Diff line change
Expand Up @@ -245,7 +245,7 @@ def rank_recommendations(

return recommendations[: request.count]

def rank_need_to_know_recommendations(
async def rank_need_to_know_recommendations(
self,
recommendations: list[CuratedRecommendation],
surface_id: ScheduledSurfaceId,
Expand All @@ -266,6 +266,11 @@ def rank_need_to_know_recommendations(
# Filter out all time-sensitive recommendations into the need_to_know feed
need_to_know_feed = [r for r in recommendations if r.isTimeSensitive]

# If fewer than five stories have been curated for this feed, use yesterday's data
if len(need_to_know_feed) < 5:
yesterdays_recs = await self.fetch_backup_recommendations(surface_id)
Copy link
Collaborator Author

@nina-py nina-py Oct 23, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I initially called the fetch function here directly, but split it out to be able to unit-test the parent method.

need_to_know_feed = [r for r in yesterdays_recs if r.isTimeSensitive]

# Update received_rank for need_to_know recommendations
for rank, rec in enumerate(need_to_know_feed):
rec.receivedRank = rank
Expand Down Expand Up @@ -317,7 +322,7 @@ async def fetch(
# two different feeds: the "general" feed and the "need to know" feed.
if self.is_need_to_know_experiment(curated_recommendations_request, surface_id):
# this applies ranking to the general_feed!
general_feed, need_to_know_recs, title = self.rank_need_to_know_recommendations(
general_feed, need_to_know_recs, title = await self.rank_need_to_know_recommendations(
recommendations, surface_id, curated_recommendations_request
)

Expand Down Expand Up @@ -345,6 +350,28 @@ async def fetch(

return response

async def fetch_backup_recommendations(
self, surface_id: ScheduledSurfaceId
) -> list[CuratedRecommendation]:
"""Return recommended stories for yesterday's date
for a given New Tab surface

@param: surface_id: a ScheduledSurfaceId
@return: A re-ranked list of curated recommendations
"""
corpus_items = await self.corpus_backend.fetch(surface_id, -1)

# Convert the CorpusItem list to a CuratedRecommendation list.
recommendations = [
CuratedRecommendation(
**item.model_dump(),
receivedRank=rank,
)
for rank, item in enumerate(corpus_items)
]

return recommendations

@staticmethod
def time_ms() -> int:
"""Return the time in milliseconds since the epoch as an integer."""
Expand Down
67 changes: 59 additions & 8 deletions tests/unit/curated_recommendations/test_provider.py
Original file line number Diff line number Diff line change
Expand Up @@ -257,10 +257,13 @@ class TestCuratedRecommendationsProviderRankNeedToKnowRecommendations:
"""Unit tests for rank_need_to_know_recommendations method"""

@staticmethod
def generate_recommendations(length: int) -> list[CuratedRecommendation]:
def generate_recommendations(
length: int, include_time_sensitive: bool = True
) -> list[CuratedRecommendation]:
"""Create dummy recommendations for the tests below.

@param length: how many recommendations are needed for a test
@param include_time_sensitive: whether the entries have the isTimeSensitive flag on randomly or not at all
@return: A list of curated recommendations
"""
recs = []
Expand All @@ -274,7 +277,7 @@ def generate_recommendations(length: int) -> list[CuratedRecommendation]:
excerpt="is failing english",
topic=random.choice(list(Topic)),
publisher="cohens",
isTimeSensitive=random.choice([True, False]),
isTimeSensitive=random.choice([True, False]) if include_time_sensitive else False,
imageUrl=HttpUrl("https://placehold.co/600x400/"),
)

Expand All @@ -284,19 +287,29 @@ def generate_recommendations(length: int) -> list[CuratedRecommendation]:

@staticmethod
def mock_curated_recommendations_provider(
mocker: MockerFixture,
mocker: MockerFixture, backup_recommendations: list[CuratedRecommendation] | None = None
) -> CuratedRecommendationsProvider:
"""Mock the necessary components of CuratedRecommendationsProvider.

@param mocker: MockerFixture
@param backup_recommendations: a list of curated recommendations to use as backup if there are not enough
time-sensitive stories in the main feed
@return: A mocked CuratedRecommendationsProvider
"""
# Mock the __init__ methods to prevent actual initialization
# Mock the __init__ method to prevent actual initialization
mocker.patch.object(CuratedRecommendationsProvider, "__init__", return_value=None)

# Mock the rank_recommendations method
mocker.patch.object(CuratedRecommendationsProvider, "rank_recommendations")

# Mock backup recommendations with the data that was provided to this method
if backup_recommendations:
mocker.patch.object(
CuratedRecommendationsProvider,
"fetch_backup_recommendations",
return_value=backup_recommendations,
)

# Create and return the mocked provider instance
provider = CuratedRecommendationsProvider(
mocker.patch.object(CorpusBackend, "__init__", return_value=None),
Expand All @@ -323,7 +336,8 @@ def mock_curated_recommendations_request(

return request

def test_rank_need_to_know_recommendations(self, mocker: MockerFixture):
@pytest.mark.asyncio
async def test_rank_need_to_know_recommendations(self, mocker: MockerFixture):
"""Test the main flow of logic in the function

@param mocker: MockerFixture
Expand All @@ -347,7 +361,7 @@ def test_rank_need_to_know_recommendations(self, mocker: MockerFixture):
)

# Call the method
general_feed, need_to_know_feed, title = provider.rank_need_to_know_recommendations(
general_feed, need_to_know_feed, title = await provider.rank_need_to_know_recommendations(
recommendations, surface_id, request
)

Expand All @@ -369,7 +383,10 @@ def test_rank_need_to_know_recommendations(self, mocker: MockerFixture):
# Verify that the localized title is correct
assert title == "In the news"

def test_rank_need_to_know_recommendations_different_surface(self, mocker: MockerFixture):
@pytest.mark.asyncio
async def test_rank_need_to_know_recommendations_different_surface(
self, mocker: MockerFixture
):
"""Test localization with a non-English New Tab surface

@param mocker: MockerFixture
Expand All @@ -385,9 +402,43 @@ def test_rank_need_to_know_recommendations_different_surface(self, mocker: Mocke
request = self.mock_curated_recommendations_request(mocker)

# Call the method
general_feed, need_to_know_feed, title = provider.rank_need_to_know_recommendations(
general_feed, need_to_know_feed, title = await provider.rank_need_to_know_recommendations(
recommendations, surface_id, request
)

# Verify that the title is correct for the German New Tab surface
assert title == "In den News"

@pytest.mark.asyncio
async def test_rank_need_to_know_recommendations_backup_stories(self, mocker: MockerFixture):
"""Test an edge case when today's stories for the 'Need to know' feed
have not yet been curated and the feed specifically for these stories
needs to fall back to yesterday's data.

@param mocker: MockerFixture
"""
# Create mock recommendations for today - this batch won't have any
# time-sensitive stories available
recs = self.generate_recommendations(20, False)

# Create backup recommendations - this batch WILL have a mix of normal
# and time-sensitive stories
backup_recs = self.generate_recommendations(100, True)

# Define the surface ID
surface_id = ScheduledSurfaceId.NEW_TAB_EN_US

# Instantiate the mocked classes
provider = self.mock_curated_recommendations_provider(mocker, backup_recs)
request = self.mock_curated_recommendations_request(mocker)

# Call the method
general_feed, need_to_know_feed, title = await provider.rank_need_to_know_recommendations(
recs, surface_id, request
)

# Double-check the initial recommendations don't have any time-sensitive items
assert len([r for r in recs if r.isTimeSensitive]) == 0

# Make sure the items in the need_to_know feed came from the backup recommendations
assert len(need_to_know_feed) > 5