Skip to content

Commit

Permalink
Adding holiday support to prophet detectors
Browse files Browse the repository at this point in the history
Summary:
We are starting to adding ability to pass holiday data for prophet.
ON the current moment, we are not providing it for one detection users, but we preparing backend.
RFC.
In this case we just using prophet ability to recieve a country code list.
https://docs.google.com/document/d/1u2xuTq0_AtQeqMGbpV4JAngQdcxOBgKkVT0h_Ds1eYQ/edit#heading=h.cu5uys7l64vv

Reviewed By: akindyakov

Differential Revision: D54201010

fbshipit-source-id: 020b1abeaffe2aa2e46cb46b5efc72491a5bd7e7
  • Loading branch information
irumata authored and facebook-github-bot committed Mar 11, 2024
1 parent 281a522 commit 2e4a7c8
Show file tree
Hide file tree
Showing 2 changed files with 96 additions and 9 deletions.
30 changes: 30 additions & 0 deletions kats/detectors/prophet_detector.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,8 @@
PROPHET_YHAT_COLUMN = "yhat"
PROPHET_YHAT_LOWER_COLUMN = "yhat_lower"
PROPHET_YHAT_UPPER_COLUMN = "yhat_upper"
HOLIDAY_NAMES_COLUMN_NAME = "holiday"
HOLIDAY_DATES_COLUMN_NAME = "ds"
import os
import sys

Expand Down Expand Up @@ -177,6 +179,9 @@ class SeasonalityTypes(Enum):
WEEKEND = 3


USER_HOLIDAY_NAME = "user_provided_holiday"


def to_seasonality(seasonality: Union[str, SeasonalityTypes]) -> SeasonalityTypes:
if isinstance(seasonality, str):
try:
Expand Down Expand Up @@ -316,6 +321,7 @@ def __init__(
]
] = None,
countries_holidays: Optional[List[str]] = None,
holidays_list: Optional[Union[List[str], Dict[str, List[str]]]] = None,
) -> None:
"""
Initializartion of Prophet
Expand All @@ -333,6 +339,8 @@ def __init__(
If argument SeasonalityTypes, List[SeasonalityTypes], than mentioned seasonilities will be used in Prophet. If argument Dict[SeasonalityTypes, bool] - each seasonality can be setted directly (True - means used it, False - not to use, 'auto' according to Prophet.).
SeasonalityTypes enum values: DAY, WEEK , YEAR, WEEKEND
Daily, Weekly, Yearly seasonlities used as "auto" by default.
countries_holidays: Optional[List[str]]: List of countries for which holidays should be added to the model.
holidays_list: Optional[Union[List[str], Dict[str, List[str]]]] : List of holiday dates to be added to the model. like ["2022-01-01","2022-03-31"], or dict of list if we have diffreent holidays patterns for example {"ds":["2022-01-01","2022-03-31"], "holidays":["playoff","superbowl"]}
"""

if serialized_model:
Expand Down Expand Up @@ -368,6 +376,7 @@ def __init__(
if countries_holidays is None:
countries_holidays = []
self.countries_holidays: List[str] = countries_holidays
self.holidays_list = holidays_list

def serialize(self) -> bytes:
"""Serialize the model into a json.
Expand Down Expand Up @@ -449,13 +458,34 @@ def fit(
additional_seasonalities = []
if self.seasonalities_to_fit[SeasonalityTypes.WEEKEND]:
additional_seasonalities = prophet_weekend_masks(data_df)
holidays = self.holidays_list
if holidays is not None and len(holidays) > 0:
if isinstance(holidays, List):
if isinstance(holidays[0], str):
holidays = {
HOLIDAY_DATES_COLUMN_NAME: self.holidays_list,
HOLIDAY_NAMES_COLUMN_NAME: ["holiday"] * len(holidays),
}
else:
raise ValueError(
"holidays_list should be a list of str or dict of list of str"
)
if not isinstance(holidays, Dict):
raise ValueError(
"holidays_list should be a list of str or dict of list of str"
)
# we use default lower and upper bound for holidays

holidays = pd.DataFrame(holidays)

# No incremental training. Create a model and train from scratch
model = Prophet(
interval_width=self.scoring_confidence_interval,
uncertainty_samples=self.uncertainty_samples,
daily_seasonality=self.seasonalities_to_fit[SeasonalityTypes.DAY],
yearly_seasonality=self.seasonalities_to_fit[SeasonalityTypes.YEAR],
weekly_seasonality=self.seasonalities_to_fit[SeasonalityTypes.WEEK],
holidays=holidays,
)
for country in self.countries_holidays:
model.add_country_holidays(country)
Expand Down
75 changes: 66 additions & 9 deletions kats/tests/detectors/test_prophet_detector.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,13 +25,15 @@
from kats.utils.simulator import Simulator
from parameterized.parameterized import parameterized

START_DATE_TEST_DATA = "2018-01-01"


class TestProphetDetector(TestCase):
def create_random_ts(
self, seed: int, length: int, magnitude: float, slope_factor: float
) -> TimeSeriesData:
np.random.seed(seed)
sim = Simulator(n=length, freq="1D", start=pd.to_datetime("2020-01-01"))
sim = Simulator(n=length, freq="1D", start=pd.to_datetime(START_DATE_TEST_DATA))

sim.add_trend(magnitude=magnitude * np.random.rand() * slope_factor)
sim.add_seasonality(
Expand All @@ -51,7 +53,7 @@ def create_ts(
freq: str = "1D",
) -> TimeSeriesData:
np.random.seed(seed)
sim = Simulator(n=length, freq=freq, start=pd.to_datetime("2020-01-01"))
sim = Simulator(n=length, freq=freq, start=pd.to_datetime(START_DATE_TEST_DATA))

sim.add_seasonality(magnitude, period=timedelta(days=7))
sim.add_noise(magnitude=signal_to_noise_ratio * magnitude)
Expand All @@ -68,7 +70,7 @@ def create_multi_seasonality_ts(
) -> TimeSeriesData:
np.random.seed(seed)

sim = Simulator(n=length, freq=freq, start=pd.to_datetime("2020-01-01"))
sim = Simulator(n=length, freq=freq, start=pd.to_datetime(START_DATE_TEST_DATA))
magnitude = (max_val - min_val) / 2

sim.add_trend(-0.2 * magnitude)
Expand Down Expand Up @@ -153,7 +155,9 @@ def add_smooth_anomaly(
# start time and freq don't matter, since we only care about the values
np.random.seed(seed)

anomaly_sim = Simulator(n=length, freq="1D", start=pd.to_datetime("2020-01-01"))
anomaly_sim = Simulator(
n=length, freq="1D", start=pd.to_datetime(START_DATE_TEST_DATA)
)
anomaly_sim.add_seasonality(magnitude, period=timedelta(days=2 * length))
# anomaly_sim.add_noise(magnitude=0.3 * magnitude * np.random.rand())

Expand All @@ -170,7 +174,7 @@ def add_trend_shift(
self, ts: TimeSeriesData, length: int, freq: str, magnitude: float
) -> None:
ts_df = ts.to_dataframe()
sim = Simulator(n=length, freq=freq, start=pd.to_datetime("2020-01-01"))
sim = Simulator(n=length, freq=freq, start=pd.to_datetime(START_DATE_TEST_DATA))
elevation = sim.trend_shift_sim(
cp_arr=[0, 1],
trend_arr=[0, 0, 0],
Expand Down Expand Up @@ -215,7 +219,7 @@ def add_multi_event(
event_relative_magnitude: float,
) -> TimeSeriesData:
np.random.seed(seed)
sim = Simulator(n=length, freq=freq, start=pd.to_datetime("2020-01-01"))
sim = Simulator(n=length, freq=freq, start=pd.to_datetime(START_DATE_TEST_DATA))

event_start = int(length * event_start_ratio)
event_end = int(length * event_end_ratio)
Expand Down Expand Up @@ -653,14 +657,14 @@ def test_heteroskedastic_noise_signal(self) -> None:
verifies that anomalies in low-noise segments have higher z-scores than those
in high-noise segments. This occurs because low noise segments will have lower
standard deviations, which result in higher z-scores.
With call ProphetDetectorMopdel without weekend seasonaluty this taest fails
With call ProphetDetectorModel without weekend seasonaluty this taest fails
"""
ts = self.create_ts(length=100 * 24, signal_to_noise_ratio=0.05, freq="1h")

# add heteroskedastic noise to the data

ts.value *= (
(ts.time - pd.to_datetime("2020-01-01")) % timedelta(days=7)
(ts.time - pd.to_datetime(START_DATE_TEST_DATA)) % timedelta(days=7)
> timedelta(days=3.5)
) * np.random.rand(100 * 24) * 2.5 + 0.5

Expand All @@ -684,14 +688,15 @@ def test_heteroskedastic_noise_signal_with_holidays(self) -> None:
verifies that anomalies in low-noise segments have higher z-scores than those
in high-noise segments. This occurs because low noise segments will have lower
standard deviations, which result in higher z-scores.
We are addingh holiday, to check the param works
With call ProphetDetectorMopdel without weekend seasonaluty this taest fails
"""
ts = self.create_ts(length=100 * 24, signal_to_noise_ratio=0.05, freq="1h")

# add heteroskedastic noise to the data

ts.value *= (
(ts.time - pd.to_datetime("2020-01-01")) % timedelta(days=7)
(ts.time - pd.to_datetime(START_DATE_TEST_DATA)) % timedelta(days=7)
> timedelta(days=3.5)
) * np.random.rand(100 * 24) * 2.5 + 0.5

Expand All @@ -709,6 +714,58 @@ def test_heteroskedastic_noise_signal_with_holidays(self) -> None:
response.scores.value[13 * 24], response.scores.value[16 * 24]
)

def test_heteroskedastic_noise_signal_with_specific_holidays(self) -> None:
"""Tests the z-score strategy on signals with heteroskedastic noise
This test creates synthetic data with heteroskedastic noise. Then, it adds
anomalies of identical magnitudes to segments with different noise. Finally, it
verifies that anomalies in low-noise segments have higher z-scores than those
in high-noise segments. This occurs because low noise segments will have lower
standard deviations, which result in higher z-scores.
We also adding value for the first day abnormakl, which shouldn;'t affects outcome as it holiday
With call ProphetDetectorModel without weekend seasonaluty this taest fails
"""
ts = self.create_ts(length=100 * 24, signal_to_noise_ratio=0.05, freq="1h")

# add heteroskedastic noise to the data
playoffs = [
START_DATE_TEST_DATA,
(pd.to_datetime(START_DATE_TEST_DATA) + pd.Timedelta(days=4)).strftime(
"%Y-%m-%d"
),
]
ts.value *= (
(ts.time - pd.to_datetime(START_DATE_TEST_DATA)) % timedelta(days=7)
> timedelta(days=3.5)
) * np.random.rand(100 * 24) * 2.5 + 0.5
ts.value[0] += 1000
ts.value[93 * 24] += 100
ts.value[96 * 24] += 100

model = ProphetDetectorModel(
score_func="z_score",
seasonalities={SeasonalityTypes.WEEKEND: True},
countries_holidays=["US", "UK"],
holidays_list=playoffs,
)
response = model.fit_predict(ts[80 * 24 :], ts[: 80 * 24])

self.assertGreater(
response.scores.value[13 * 24], response.scores.value[16 * 24]
)

model = ProphetDetectorModel(
score_func="z_score",
seasonalities={SeasonalityTypes.WEEKEND: True},
countries_holidays=["US", "UK"],
holidays_list={"ds": playoffs, "holiday": ["playoff"] * len(playoffs)},
)
response = model.fit_predict(ts[80 * 24 :], ts[: 80 * 24])

self.assertGreater(
response.scores.value[13 * 24], response.scores.value[16 * 24]
)

def test_weekend_seasonality_noise_signal(self) -> None:
"""Tests the accuracy with heteroskedastic series and noise
Expand Down

0 comments on commit 2e4a7c8

Please sign in to comment.