mirror of
https://github.com/freedombox/FreedomBox.git
synced 2026-01-21 07:55:00 +00:00
Tests: - Run functional tests on miniflux app. - Run backup and restore on ttrss app manually and notice that data is restored and there are no errors. Signed-off-by: Sunil Mohan Adapa <sunil@medhas.org> Reviewed-by: Joseph Nuthalapati <njoseph@riseup.net>
136 lines
5.1 KiB
Python
136 lines
5.1 KiB
Python
# SPDX-License-Identifier: AGPL-3.0-or-later
|
|
"""
|
|
Common utilities to help with handling a database.
|
|
"""
|
|
|
|
import pathlib
|
|
import subprocess
|
|
import threading
|
|
from typing import ClassVar
|
|
|
|
|
|
class DBLock:
|
|
"""A re-entrant lock with a fixed timeout (not -1) by default."""
|
|
|
|
TIMEOUT: ClassVar[float] = 30
|
|
|
|
def __init__(self, *args, **kwargs):
|
|
"""Create an RLock object."""
|
|
self._lock = threading.RLock(*args, **kwargs)
|
|
self.timeout = DBLock.TIMEOUT
|
|
|
|
def __getattr__(self, name):
|
|
"""Return RLock attributes."""
|
|
return getattr(self._lock, name)
|
|
|
|
def __enter__(self):
|
|
"""Use RLock context management."""
|
|
return self._lock.acquire(timeout=self.timeout)
|
|
|
|
def __exit__(self, exc_type, exc_val, exc_tb):
|
|
"""Use RLock context management."""
|
|
try:
|
|
self._lock.release()
|
|
except RuntimeError:
|
|
# Lock was not acquired
|
|
pass
|
|
|
|
|
|
# The Problem
|
|
# -----------
|
|
# Since this is a small application (in terms of data stored and requests per
|
|
# second handled), we use sqlite3 as the backend database. This gives us more
|
|
# reliability. However, sqlite3 has limited capability to handle parallel
|
|
# requests. When we try to use multiple Model.update_or_create() calls
|
|
# simultaneously, only the first one succeeds and the remaining fail with
|
|
# 'Database is locked' errors.
|
|
#
|
|
# The 'timeout' value passed during connection creation means that queries will
|
|
# wait for a maximum of given timeout period when trying to acquire locks.
|
|
# However, in some cases, to prevent deadlocks caused by threads waiting on
|
|
# each other, sqlite3 will not wait and immediately throw an exception.
|
|
#
|
|
# If we set the isolation level to 'EXCLUSIVE' or 'IMMEDIATE' instead of the
|
|
# default 'DEFERRED', each transaction should acquire a write lock immediately
|
|
# after transaction is started. This reduces the situations in which threads
|
|
# deadlock on each other. This improves the situation somewhat. However, this
|
|
# is still causing immediate timeout errors due to the way Django does its
|
|
# update_or_create() queries.
|
|
#
|
|
# This is true even in 'WAL' (write-ahead-log) journaling mode. While WAL mode
|
|
# makes it easier to have many read transactions while one write query is
|
|
# happening, it does not prevent this situation.
|
|
#
|
|
# Django should ideally provide a way to serialize these queries specifically
|
|
# to work with sqlite3 locking behavior. There seems to be no such way
|
|
# currently.
|
|
#
|
|
# Workaround
|
|
# ----------
|
|
# To workaround the problem, use a simple lock to serialize all database
|
|
# queries. Like this:
|
|
#
|
|
# with db.lock:
|
|
# # do some short database operation
|
|
#
|
|
# Queries usually have short execution time (unless stuck on disk I/O). A lot
|
|
# of parallel request processing is not expected from this service. We want
|
|
# more reliability (not failing on DB locks) over the ability to run parallel
|
|
# requests. Typically, requests waiting to perform DB queries will only have to
|
|
# wait < 1 second to get their turn. In the worst case, the database lock will
|
|
# not be acquired and the code continues to anyway.
|
|
#
|
|
# This locking can't be done in all situations. For example, queries made
|
|
# within Django framework are not locked. Still, this approach should prevent
|
|
# most of the significant cases where we have seen database lock issues.
|
|
|
|
lock = DBLock()
|
|
|
|
|
|
#
|
|
# PostgreSQL utilites
|
|
#
|
|
def _run_as_postgres(command, stdin=None, stdout=None):
|
|
"""Run a command as postgres user."""
|
|
command = ['sudo', '--user', 'postgres'] + command
|
|
return subprocess.run(command, stdin=stdin, stdout=stdout, check=True)
|
|
|
|
|
|
def postgres_dump_database(backup_file: str, database_name: str,
|
|
database_user: str):
|
|
"""Dump PostgreSQL database to a file.
|
|
|
|
Overwrites file if it exists. Uses pg_dump utility from postgres package
|
|
(needs to be installed).
|
|
"""
|
|
backup_path = pathlib.Path(backup_file)
|
|
backup_path.parent.mkdir(parents=True, exist_ok=True)
|
|
with open(backup_path, 'w', encoding='utf-8') as file_handle:
|
|
process = _run_as_postgres(['pg_dumpall', '--roles-only'],
|
|
stdout=subprocess.PIPE)
|
|
file_handle.write(f'DROP ROLE IF EXISTS {database_user};\n')
|
|
for line in process.stdout.decode().splitlines():
|
|
if database_user in line:
|
|
file_handle.write(line + '\n')
|
|
|
|
with open(backup_path, 'a', encoding='utf-8') as file_handle:
|
|
_run_as_postgres(
|
|
['pg_dump', '--create', '--clean', '--if-exists', database_name],
|
|
stdout=file_handle)
|
|
|
|
|
|
def postgres_restore_database(backup_file: str, database_name):
|
|
"""Restore PostgreSQL database from a file.
|
|
|
|
Drops database and recreates it. Uses pg_dump utility from postgres package
|
|
(needs to be installed).
|
|
"""
|
|
# This is needed for old backups only. New backups include 'DROP DATABASE
|
|
# IF EXISTS' and 'CREATE DATABASE' statements.
|
|
_run_as_postgres(['dropdb', database_name])
|
|
_run_as_postgres(['createdb', database_name])
|
|
|
|
with open(backup_file, 'r', encoding='utf-8') as file_handle:
|
|
_run_as_postgres(['psql', '--dbname', database_name],
|
|
stdin=file_handle)
|