From 77a10c62c9a44a27e8030eff6e5b3fb182be55ae Mon Sep 17 00:00:00 2001 From: Aarni Koskela Date: Mon, 29 May 2023 00:41:12 +0300 Subject: Patch GitPython to not use leaky persistent processes --- modules/gitpython_hack.py | 42 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 42 insertions(+) create mode 100644 modules/gitpython_hack.py (limited to 'modules/gitpython_hack.py') diff --git a/modules/gitpython_hack.py b/modules/gitpython_hack.py new file mode 100644 index 00000000..e537c1df --- /dev/null +++ b/modules/gitpython_hack.py @@ -0,0 +1,42 @@ +from __future__ import annotations + +import io +import subprocess + +import git + + +class Git(git.Git): + """ + Git subclassed to never use persistent processes. + """ + + def _get_persistent_cmd(self, attr_name, cmd_name, *args, **kwargs): + raise NotImplementedError(f"Refusing to use persistent process: {attr_name} ({cmd_name} {args} {kwargs})") + + def get_object_header(self, ref: str | bytes) -> tuple[str, str, int]: + ret = subprocess.check_output( + [self.GIT_PYTHON_GIT_EXECUTABLE, "cat-file", "--batch-check"], + input=self._prepare_ref(ref), + cwd=self._working_dir, + timeout=2, + ) + return self._parse_object_header(ret) + + def stream_object_data(self, ref: str) -> tuple[str, str, int, "Git.CatFileContentStream"]: + # Not really streaming, per se; this buffers the entire object in memory. + # Shouldn't be a problem for our use case, since we're only using this for + # object headers (commit objects). + ret = subprocess.check_output( + [self.GIT_PYTHON_GIT_EXECUTABLE, "cat-file", "--batch"], + input=self._prepare_ref(ref), + cwd=self._working_dir, + timeout=30, + ) + bio = io.BytesIO(ret) + hexsha, typename, size = self._parse_object_header(bio.readline()) + return (hexsha, typename, size, self.CatFileContentStream(size, bio)) + + +class Repo(git.Repo): + GitCommandWrapperType = Git -- cgit v1.2.1