diff --git a/plinth/modules/backups/schedule.py b/plinth/modules/backups/schedule.py new file mode 100644 index 000000000..ee3729719 --- /dev/null +++ b/plinth/modules/backups/schedule.py @@ -0,0 +1,323 @@ +# SPDX-License-Identifier: AGPL-3.0-or-later +"""Schedule for automatic backups. + +Every day automatic backups are triggered. Daily, weekly and monthly backups +are taken. Cleanup of old backups is triggered and specified number of backups +are kept back in each category. + +""" + +import json +import logging +from datetime import datetime, timedelta + +logger = logging.getLogger(__name__) + + +class Schedule: + """Description of a schedule for backups.""" + + def __init__(self, repository_uuid, enabled=None, daily_to_keep=None, + weekly_to_keep=None, monthly_to_keep=None, run_at_hour=None, + unselected_apps=None): + """Initialize the schedule object instance. + + 'repository_uuid' is the unique ID of the repository that this + schedule is applied to. + + 'enabled' is a boolean indicating whether scheduled backups are enabled + or disabled. + + 'daily_to_keep' is a whole number indicating the number of daily + backups to keep. Older backups are removed after creating a new backup + to keep these many backups. A value of 0 means no such backups are + scheduled. + + 'weekly_to_keep' is a whole number indicating the number of weekly + backups to keep. Older backups are removed after creating a new backup + to keep these many backups. A value of 0 means no such backups are + scheduled. + + 'monthly_to_keep' is a whole number indicating the number of monthly + backups to keep. Older backups are removed after creating a new backup + to keep these many backups. A value of 0 means no such backups are + scheduled. + + 'run_at_hour' is a whole number indicating the hour of the day when the + backups must be scheduled. + + 'unselected_apps' is a list of app IDs that should not be included when + scheduling backups. A negative list is maintained because when a new + app is installed, it is included into the schedule by default unless + explicitly removed. This is the safer option. + + """ + self.repository_uuid = repository_uuid + self.enabled = enabled or False + self.daily_to_keep = daily_to_keep if daily_to_keep is not None else 5 + self.weekly_to_keep = weekly_to_keep if weekly_to_keep is not None \ + else 3 + self.monthly_to_keep = monthly_to_keep if monthly_to_keep is not None \ + else 3 + # Run at 02:00 by default everyday + self.run_at_hour = run_at_hour if run_at_hour is not None else 2 + self.unselected_apps = unselected_apps or [] + + def get_storage_format(self): + """Return the object serialized as dict suitable for instantiation.""" + return { + 'enabled': self.enabled, + 'daily_to_keep': self.daily_to_keep, + 'weekly_to_keep': self.weekly_to_keep, + 'monthly_to_keep': self.monthly_to_keep, + 'run_at_hour': self.run_at_hour, + 'unselected_apps': self.unselected_apps, + } + + @staticmethod + def _is_backup_too_soon(recent_backup_times): + """Return whether a backup was already taken recently.""" + now = datetime.now() + if now - recent_backup_times['daily'] < timedelta(seconds=2 * 3600): + return True + + if now - recent_backup_times['weekly'] < timedelta(seconds=2 * 3600): + return True + + if now - recent_backup_times['monthly'] < timedelta(seconds=2 * 3600): + return True + + return False + + @staticmethod + def _too_long_since_last_backup(recent_backup_times): + """Return periods for which it has been too long since last backup.""" + periods = [] + local_time = datetime.now() + + if local_time - recent_backup_times['daily'] > timedelta( + days=1, seconds=3600): + periods.append('daily') + + if local_time - recent_backup_times['weekly'] > timedelta( + days=7, seconds=3600): + periods.append('weekly') + + last_monthly = recent_backup_times['monthly'] + try: + next_monthly = last_monthly.replace(month=last_monthly.month + 1) + except ValueError: + next_monthly = last_monthly.replace(month=1, + year=last_monthly.year + 1) + if local_time > next_monthly + timedelta(seconds=3600): + periods.append('monthly') + + return periods + + def _time_for_periods(self): + """Return periods for which it is scheduled time for backup.""" + periods = [] + local_time = datetime.now() + + if local_time.hour == self.run_at_hour: + periods.append('daily') # At specified hour + + if local_time.hour == self.run_at_hour and local_time.isoweekday( + ) == 7: + periods.append('weekly') # At specified hour on Sunday + + if local_time.hour == self.run_at_hour and local_time.day == 1: + periods.append('monthly') # At specified hour on 1st of the month + + return periods + + def _get_disabled_periods(self): + """Return the list of periods for which backup are disabled.""" + periods = [] + if self.daily_to_keep == 0: + periods.append('daily') + + if self.weekly_to_keep == 0: + periods.append('weekly') + + if self.monthly_to_keep == 0: + periods.append('monthly') + + return periods + + def run_schedule(self): + """Return scheduled backups, throw exception on failure. + + Frequent triggering: If the method is triggered too frequently for any + reason, later triggers will not result in more backups as the previous + backup has to be more than 2 hours old at least to trigger a new + backup. + + Daemon offline: When the daemon is offline, no backups will be made. + However, an hour after it is back online, backup check will be done and + if it is determined that too much time has passed since the last + backup, a new backup will be taken. + + Errors: When an error occurs during backup process, the method raises + an exception. An hour later, it is triggered again. This time it will + determine that too much time has passed since the last backup and will + attempt to backup again. During a day about 24 attempts to backup will + be made and reported. A backup may made at an unscheduled time due to + this. This won't prevent the next backup from happening at the + scheduled time (unless it is too close to the previous successful one). + + Clock changes: When the clock changes and is set forward, within an + hour of the change, the schedule check will determine that it has been + too long since the last backup and the backup will be triggered. This + will result in a backup at an unscheduled time. When the clock changes + and is set backward, it will result all previous backup that are more + recent and current time being ignored for scheduling. Backups will be + scheduled on time and error handling works. + + Day light saving time: When gaining an hour, it is possible that + schedule qualifies a second time during a day for backup. This is + avoided by checking if the backup is too soon since the earlier one. + When loosing an hour, the schedule may not quality for backup on that + day at all. However, an hour after scheduled time, it will deemed that + too much time has passed since the previous backup and a backup will be + scheduled. + + """ + if not self.enabled: + return False + + repository = self._get_repository() + repository.prepare() + + recent_backup_times = self._get_recent_backup_times(repository) + if self._is_backup_too_soon(recent_backup_times): + return False + + too_long_periods = self._too_long_since_last_backup( + recent_backup_times) + time_for_periods = self._time_for_periods() + disabled_periods = self._get_disabled_periods() + periods = set(too_long_periods).union(time_for_periods) + periods = periods.difference(disabled_periods) + if not periods: + return False + + self._run_backup(periods) + self._run_cleanup(repository) + return True + + def _get_repository(self): + """Return the repository to which this schedule is assigned.""" + from . import repository as repository_module + return repository_module.get_instance(self.repository_uuid) + + @staticmethod + def _serialize_comment(data): + """Represent dictionary data as comment. + + Borg substitutes python like placeholders with {}. + + """ + comment = json.dumps(data) + return comment.replace('{', '{{').replace('}', '}}') + + @staticmethod + def _list_scheduled_archives(repository): + """Return a list of archives due to scheduled backups.""" + now = datetime.now() + + archives = repository.list_archives() + scheduled_archives = [] + for archive in archives: + + try: + comment = json.loads(archive['comment']) + except json.decoder.JSONDecodeError: + continue + + if not isinstance(comment, dict) or \ + comment.get('type') != 'scheduled' or \ + not isinstance(comment.get('periods'), list): + continue + + archive['comment'] = comment + + start_time = datetime.strptime(archive['start'], + '%Y-%m-%dT%H:%M:%S.%f') + if start_time > now: + # This backup was taken when clock was set in future. Ignore it + # to ensure backups continue to be taken. + continue + + archive['start'] = start_time + scheduled_archives.append(archive) + + return scheduled_archives + + def _get_recent_backup_times(self, repository): + """Get the time since most recent daily, weekly and monthly backups.""" + times = { + 'daily': datetime.min, + 'weekly': datetime.min, + 'monthly': datetime.min + } + + archives = self._list_scheduled_archives(repository) + for archive in archives: + periods = {'daily', 'weekly', 'monthly'} + periods = periods.intersection(archive['comment']['periods']) + for period in periods: + if times[period] < archive['start']: + times[period] = archive['start'] + + return times + + def _run_backup(self, periods): + """Run a backup and mark it for given period.""" + logger.info('Running backup for repository %s, periods %s', + self.repository_uuid, periods) + + from . import api + periods = list(periods) + periods.sort() + name = 'scheduled: {periods}: {datetime}'.format( + periods=', '.join(periods), + datetime=datetime.now().strftime('%Y-%m-%d:%H:%M')) + comment = self._serialize_comment({ + 'type': 'scheduled', + 'periods': periods + }) + app_ids = [ + component.app_id + for component in api.get_all_components_for_backup() + if component.app_id not in self.unselected_apps + ] + + repository = self._get_repository() + repository.create_archive(name, app_ids, archive_comment=comment) + + def _run_cleanup(self, repository): + """Cleanup old backups.""" + archives = self._list_scheduled_archives(repository) + counts = {'daily': 0, 'weekly': 0, 'monthly': 0} + for archive in archives: + keep = False + archive_periods = archive['comment']['periods'] + for period in set(counts).intersection(archive_periods): + counts[period] += 1 + + if period == 'daily' and counts[period] <= self.daily_to_keep: + keep = True + + if period == 'weekly' and \ + counts[period] <= self.weekly_to_keep: + keep = True + + if period == 'monthly' and \ + counts[period] <= self.monthly_to_keep: + keep = True + + if not keep: + logger.info('Cleaning up in repository %s backup archive %s', + self.repository_uuid, archive['name']) + repository.delete_archive(archive['name']) diff --git a/plinth/modules/backups/tests/test_schedule.py b/plinth/modules/backups/tests/test_schedule.py new file mode 100644 index 000000000..be553f0a5 --- /dev/null +++ b/plinth/modules/backups/tests/test_schedule.py @@ -0,0 +1,481 @@ +# SPDX-License-Identifier: AGPL-3.0-or-later +""" +Test scheduling of backups. +""" + +import json +from datetime import datetime, timedelta +from unittest.mock import MagicMock, call, patch + +import pytest + +import plinth.modules.backups.repository # noqa, pylint: disable=unused-import +from plinth.app import App + +from ..components import BackupRestore +from ..schedule import Schedule + +setup_helper = MagicMock() + + +class AppTest(App): + """Sample App for testing.""" + app_id = 'test-app' + + +def _get_backup_component(name): + """Return a BackupRestore component.""" + return BackupRestore(name) + + +def _get_test_app(name): + """Return an App.""" + app = AppTest() + app.app_id = name + app._all_apps[name] = app + app.add(BackupRestore(name + '-component')) + return app + + +def test_init_default_values(): + """Test initialization of schedule with default values.""" + schedule = Schedule('test-uuid') + assert schedule.repository_uuid == 'test-uuid' + assert not schedule.enabled + assert schedule.daily_to_keep == 5 + assert schedule.weekly_to_keep == 3 + assert schedule.monthly_to_keep == 3 + assert schedule.run_at_hour == 2 + assert schedule.unselected_apps == [] + + +def test_init(): + """Test initialization with explicit values.""" + schedule = Schedule('test-uuid', enabled=True, daily_to_keep=1, + weekly_to_keep=2, monthly_to_keep=5, run_at_hour=0, + unselected_apps=['test-app1', 'test-app2']) + assert schedule.repository_uuid == 'test-uuid' + assert schedule.enabled + assert schedule.daily_to_keep == 1 + assert schedule.weekly_to_keep == 2 + assert schedule.monthly_to_keep == 5 + assert schedule.run_at_hour == 0 + assert schedule.unselected_apps == ['test-app1', 'test-app2'] + + +def test_get_storage_format(): + """Test that storage format is properly returned.""" + schedule = Schedule('test-uuid', enabled=True, daily_to_keep=1, + weekly_to_keep=2, monthly_to_keep=5, run_at_hour=23, + unselected_apps=['test-app1', 'test-app2']) + assert schedule.get_storage_format() == { + 'enabled': True, + 'daily_to_keep': 1, + 'weekly_to_keep': 2, + 'monthly_to_keep': 5, + 'run_at_hour': 23, + 'unselected_apps': ['test-app1', 'test-app2'], + } + + +def _get_archives_from_test_data(data): + """Return a list of archives from test data.""" + archives = [] + for index, item in enumerate(data): + archive_time = item['time'] + if isinstance(archive_time, timedelta): + archive_time = datetime.now() + archive_time + elif isinstance(archive_time, str): + archive_time = datetime.strptime(archive_time, + '%Y-%m-%d %H:%M:%S+0000') + archive = { + 'comment': + json.dumps({ + 'type': 'scheduled', + 'periods': item['periods'] + }), + 'start': + archive_time.strftime('%Y-%m-%dT%H:%M:%S.%f'), + 'name': + f'archive-{index}' + } + archives.append(archive) + + return archives + + +# - First item is the arguments to send construct Schedule() +# - Second item is the list of previous backups in the system. +# - Third item is the return value of datetime.datetime.now(). +# - Fourth item is the list of periods for which backups must be triggered. +# - Fifth item is the list of expected archives to be deleted after backup. +cases = [ + # Schedule is disabled + [ + [False, 10, 10, 10, 0], + [], + datetime.now(), + [], + [], + ], + # No past backups + [ + [True, 10, 10, 10, 0], + [], + datetime.now(), + ['daily', 'weekly', 'monthly'], + [], + ], + # Daily backup taken recently + [ + [True, 10, 10, 10, 0], + [{ + 'periods': ['daily'], + 'time': timedelta(seconds=-600) + }], + datetime.now(), + [], + [], + ], + # Weekly backup taken recently + [ + [True, 10, 10, 10, 0], + [{ + 'periods': ['weekly'], + 'time': timedelta(seconds=-600) + }], + datetime.now(), + [], + [], + ], + # Monthly backup taken recently + [ + [True, 10, 10, 10, 0], + [{ + 'periods': ['monthly'], + 'time': timedelta(seconds=-600) + }], + datetime.now(), + [], + [], + ], + # Backup taken not so recently + [ + [True, 10, 10, 10, 0], + [{ + 'periods': ['daily'], + 'time': datetime(2021, 1, 1) - timedelta(seconds=3 * 3600) + }], + datetime(2021, 1, 1), + ['daily', 'weekly', 'monthly'], + [], + ], + # Too long since a daily backup, not scheduled time + [ + [True, 10, 10, 10, 2], + [{ + 'periods': ['daily'], + 'time': datetime(2021, 1, 1) - timedelta(days=1, seconds=3601) + }], + datetime(2021, 1, 1), + ['daily', 'weekly', 'monthly'], + [], + ], + # No too long since a daily backup, not scheduled time + [ + [True, 10, 10, 10, 2], + [{ + 'periods': ['daily'], + 'time': datetime(2021, 1, 1) - timedelta(days=1, seconds=3600) + }], + datetime(2021, 1, 1), + ['weekly', 'monthly'], + [], + ], + # Too long since a weekly backup, not scheduled time + [ + [True, 10, 10, 10, 2], + [{ + 'periods': ['weekly'], + 'time': datetime(2021, 1, 1) - timedelta(days=7, seconds=3601) + }], + datetime(2021, 1, 1), + ['daily', 'weekly', 'monthly'], + [], + ], + # No too long since a daily backup, not scheduled time + [ + [True, 10, 10, 10, 2], + [{ + 'periods': ['weekly'], + 'time': datetime(2021, 1, 1) - timedelta(days=7, seconds=3600) + }], + datetime(2021, 1, 1), + ['daily', 'monthly'], + [], + ], + # Too long since a monthly backup, not scheduled time, year rounding + [ + [True, 10, 10, 10, 2], + [{ + 'periods': ['monthly'], + 'time': datetime(2020, 12, 1) + }], + datetime(2021, 1, 1, 1, 0, 1), + ['daily', 'weekly', 'monthly'], + [], + ], + # No too long since a monthly backup, not scheduled time, year rounding + [ + [True, 10, 10, 10, 2], + [{ + 'periods': ['monthly'], + 'time': datetime(2020, 12, 1) + }], + datetime(2021, 1, 1, 1), + ['daily', 'weekly'], + [], + ], + # Too long since a monthly backup, not scheduled time, no year rounding + [ + [True, 10, 10, 10, 2], + [{ + 'periods': ['monthly'], + 'time': datetime(2020, 11, 1) + }], + datetime(2020, 12, 1, 1, 0, 1), + ['daily', 'weekly', 'monthly'], + [], + ], + # No too long since a monthly backup, not scheduled time, no year rounding + [ + [True, 10, 10, 10, 2], + [{ + 'periods': ['monthly'], + 'time': datetime(2020, 11, 1) + }], + datetime(2020, 12, 1, 1), + ['daily', 'weekly'], + [], + ], + # Time for daily backup + [ + [True, 10, 10, 10, 0], + [{ + 'periods': ['daily', 'weekly', 'monthly'], + 'time': datetime(2021, 1, 2) - timedelta(seconds=3 * 3600) + }], + datetime(2021, 1, 2), + ['daily'], + [], + ], + # Time for daily backup, different scheduled time + [ + [True, 10, 10, 10, 11], + [{ + 'periods': ['daily', 'weekly', 'monthly'], + 'time': datetime(2021, 1, 2, 11) - timedelta(seconds=3 * 3600) + }], + datetime(2021, 1, 2, 11), + ['daily'], + [], + ], + # Time for daily/weekly backup, 2021-01-03 is a Sunday + [ + [True, 10, 10, 10, 0], + [{ + 'periods': ['daily', 'weekly', 'monthly'], + 'time': datetime(2021, 1, 3) - timedelta(seconds=3 * 3600) + }], + datetime(2021, 1, 3), + ['daily', 'weekly'], + [], + ], + # Time for daily/monthly backup + [ + [True, 10, 10, 10, 0], + [{ + 'periods': ['daily', 'weekly', 'monthly'], + 'time': datetime(2021, 1, 1) - timedelta(seconds=3 * 3600) + }], + datetime(2021, 1, 1), + ['daily', 'monthly'], + [], + ], + # Daily backups disabled by setting the no. of backups to keep to 0 + [ + [True, 0, 10, 10, 0], + [], + datetime(2021, 1, 1), + ['weekly', 'monthly'], + [], + ], + # Weekly backups disabled by setting the no. of backups to keep to 0 + [ + [True, 10, 0, 10, 0], + [], + datetime(2021, 1, 1), + ['daily', 'monthly'], + [], + ], + # Monthly backups disabled by setting the no. of backups to keep to 0 + [ + [True, 10, 10, 0, 0], + [], + datetime(2021, 1, 1), + ['daily', 'weekly'], + [], + ], + # Not scheduled, not too long since last, no backup necessary + [ + [True, 10, 10, 10, 0], + [{ + 'periods': ['daily', 'weekly', 'monthly'], + 'time': datetime(2021, 1, 2, 1) - timedelta(seconds=3 * 3600) + }], + datetime(2021, 1, 2, 1), + [], + [], + ], + # Cleanup daily backups + [ + [True, 2, 10, 10, 0], + [{ + 'periods': ['daily'], + 'time': datetime(2021, 1, 3, 1) - timedelta(seconds=3 * 3600) + }, { + 'periods': ['daily'], + 'time': datetime(2021, 1, 2, 1) - timedelta(seconds=3 * 3600) + }, { + 'periods': ['daily'], + 'time': datetime(2021, 1, 1, 1) - timedelta(seconds=3 * 3600) + }, { + 'periods': ['daily'], + 'time': datetime(2021, 1, 1, 0) - timedelta(seconds=3 * 3600) + }], + datetime(2021, 1, 4, 1), + ['daily', 'weekly', 'monthly'], + ['archive-2', 'archive-3'], + ], + # Cleanup weekly backups + [ + [True, 10, 2, 10, 0], + [{ + 'periods': ['weekly'], + 'time': datetime(2021, 1, 3, 1) - timedelta(seconds=3 * 3600) + }, { + 'periods': ['weekly'], + 'time': datetime(2021, 1, 2, 1) - timedelta(seconds=3 * 3600) + }, { + 'periods': ['weekly'], + 'time': datetime(2021, 1, 1, 1) - timedelta(seconds=3 * 3600) + }, { + 'periods': ['weekly'], + 'time': datetime(2021, 1, 1, 0) - timedelta(seconds=3 * 3600) + }], + datetime(2021, 1, 4, 1), + ['daily', 'monthly'], + ['archive-2', 'archive-3'], + ], + # Cleanup monthly backups + [ + [True, 10, 10, 2, 0], + [{ + 'periods': ['monthly'], + 'time': datetime(2021, 1, 3, 1) - timedelta(seconds=3 * 3600) + }, { + 'periods': ['monthly'], + 'time': datetime(2021, 1, 2, 1) - timedelta(seconds=3 * 3600) + }, { + 'periods': ['monthly'], + 'time': datetime(2021, 1, 1, 1) - timedelta(seconds=3 * 3600) + }, { + 'periods': ['monthly'], + 'time': datetime(2021, 1, 1, 0) - timedelta(seconds=3 * 3600) + }], + datetime(2021, 1, 4, 1), + ['daily', 'weekly'], + ['archive-2', 'archive-3'], + ], + # Cleanup daily backups, but keep due to them being weekly/monthly too + [ + [True, 2, 1, 10, 0], + [{ + 'periods': ['daily'], + 'time': datetime(2021, 1, 6, 1) - timedelta(seconds=3 * 3600) + }, { + 'periods': ['daily'], + 'time': datetime(2021, 1, 5, 1) - timedelta(seconds=3 * 3600) + }, { + 'periods': ['daily', 'weekly'], + 'time': datetime(2021, 1, 4, 1) - timedelta(seconds=3 * 3600) + }, { + 'periods': ['daily', 'weekly'], + 'time': datetime(2021, 1, 3, 1) - timedelta(seconds=3 * 3600) + }, { + 'periods': ['daily', 'monthly'], + 'time': datetime(2021, 1, 2, 0) - timedelta(seconds=3 * 3600) + }, { + 'periods': ['daily'], + 'time': datetime(2021, 1, 1, 0) - timedelta(seconds=3 * 3600) + }], + datetime(2021, 1, 7, 1), + ['daily'], + ['archive-3', 'archive-5'], + ], +] + + +@pytest.mark.parametrize( + 'schedule_params,archives_data,test_now,run_periods,cleanups', cases) +@patch('plinth.modules.backups.repository.get_instance') +def test_run_schedule(get_instance, schedule_params, archives_data, test_now, + run_periods, cleanups): + """Test that backups are run at expected time.""" + setup_helper.get_state.return_value = 'up-to-date' + + repository = MagicMock() + repository.list_archives.side_effect = \ + lambda: _get_archives_from_test_data(archives_data) + get_instance.return_value = repository + + with patch('plinth.modules.backups.schedule.datetime') as mock_datetime, \ + patch('plinth.app.App.list') as app_list: + app_list.return_value = [ + _get_test_app('test-app1'), + _get_test_app('test-app2'), + _get_test_app('test-app3') + ] + + mock_datetime.now.return_value = test_now + mock_datetime.strptime = datetime.strptime + mock_datetime.min = datetime.min + mock_datetime.side_effect = lambda *args, **kwargs: datetime( + *args, **kwargs) + + schedule = Schedule('test_uuid', schedule_params[0], + schedule_params[1], schedule_params[2], + schedule_params[3], schedule_params[4], + ['test-app2']) + schedule.run_schedule() + + if not run_periods: + repository.create_archive.assert_not_called() + else: + run_periods.sort() + name = 'scheduled: {periods}: {datetime}'.format( + periods=', '.join(run_periods), + datetime=mock_datetime.now().strftime('%Y-%m-%d:%H:%M')) + app_ids = ['test-app1', 'test-app3'] + archive_comment = json.dumps({ + 'type': 'scheduled', + 'periods': run_periods + }).replace('{', '{{').replace('}', '}}') + repository.create_archive.assert_has_calls( + [call(name, app_ids, archive_comment=archive_comment)]) + + if not cleanups: + repository.delete_archive.assert_not_called() + else: + calls = [call(name) for name in cleanups] + repository.delete_archive.assert_has_calls(calls)