From 655c2be214acef65a14358217573e9049b07fbfb Mon Sep 17 00:00:00 2001
From: Sunil Mohan Adapa <sunil@medhas.org>
Date: Wed, 30 Jan 2019 14:55:51 -0800
Subject: [PATCH] backups: Improve performance of backup download

Signed-off-by: Sunil Mohan Adapa <sunil@medhas.org>
Reviewed-by: James Valleroy <jvalleroy@mailbox.org>
---
 plinth/modules/backups/repository.py | 25 ++++++++++++++++++++++++-
 1 file changed, 24 insertions(+), 1 deletion(-)

diff --git a/plinth/modules/backups/repository.py b/plinth/modules/backups/repository.py
index adba67262..cdf39dc19 100644
--- a/plinth/modules/backups/repository.py
+++ b/plinth/modules/backups/repository.py
@@ -18,6 +18,7 @@
 Remote and local Borg backup repositories
 """
 
+import io
 import json
 import logging
 import os
@@ -141,10 +142,32 @@ class BorgRepository():
         self.run(['init', '--path', self.repo_path, '--encryption', 'none'])
 
     def get_download_stream(self, archive_name):
+        """Return an stream of .tar.gz binary data for a backup archive."""
+        class BufferedReader(io.BufferedReader):
+            """Improve performance of buffered binary streaming.
+
+            Django simply returns the iterator as a response for the WSGI app.
+            CherryPy then iterates over this iterator and writes to HTTP
+            response. This calls __next__ over the BufferedReader that is
+            process.stdout. However, this seems to call readline() which looks
+            for \n in binary data which leads to short unpredictably sized
+            chunks which in turn lead to severe performance degradation. So,
+            overwrite this and call read() which is better geared for handling
+            binary data.
+
+            """
+            def __next__(self):
+                """Override to call read() instead of readline()."""
+                chunk = self.read(io.DEFAULT_BUFFER_SIZE)
+                if not chunk:
+                    raise StopIteration
+
+                return chunk
+
         args = ['export-tar', '--path', self._get_archive_path(archive_name)]
         args += self._get_encryption_arguments(self.credentials)
         proc = self._run('backups', args, run_in_background=True)
-        return proc.stdout
+        return BufferedReader(proc.stdout)
 
     def get_archive(self, name):
         for archive in self.list_archives():