From 86f682269bf07b78ee2619938ae5901111069f91 Mon Sep 17 00:00:00 2001 From: Nicolas Mowen Date: Sat, 9 Aug 2025 21:47:39 -0600 Subject: [PATCH] Add inference speed and keep alive --- frigate/data_processing/post/review_descriptions.py | 11 +++++++++++ frigate/data_processing/types.py | 4 ++++ frigate/genai/ollama.py | 1 + frigate/stats/util.py | 8 ++++++++ 4 files changed, 24 insertions(+) diff --git a/frigate/data_processing/post/review_descriptions.py b/frigate/data_processing/post/review_descriptions.py index a249de802..72ea0f9cb 100644 --- a/frigate/data_processing/post/review_descriptions.py +++ b/frigate/data_processing/post/review_descriptions.py @@ -15,6 +15,7 @@ from frigate.config import FrigateConfig from frigate.const import CLIPS_DIR, UPDATE_REVIEW_DESCRIPTION from frigate.data_processing.types import PostProcessDataEnum from frigate.genai import GenAIClient +from frigate.util.builtin import EventsPerSecond, InferenceSpeed from ..post.api import PostProcessorApi from ..types import DataProcessorMetrics @@ -35,8 +36,13 @@ class ReviewDescriptionProcessor(PostProcessorApi): self.metrics = metrics self.tracked_review_items: dict[str, list[tuple[int, bytes]]] = {} self.genai_client = client + self.review_desc_speed = InferenceSpeed(self.metrics.review_desc_speed) + self.review_descs_dps = EventsPerSecond() + self.review_descs_dps.start() def process_data(self, data, data_type): + self.metrics.review_desc_dps.value = self.review_descs_dps.eps() + if data_type != PostProcessDataEnum.review: return @@ -101,11 +107,13 @@ class ReviewDescriptionProcessor(PostProcessorApi): return # kickoff analysis + self.review_descs_dps.update() threading.Thread( target=run_analysis, args=( self.requestor, self.genai_client, + self.review_desc_speed, camera, final_data, copy.copy([r[1] for r in self.tracked_review_items[id]]), @@ -121,10 +129,12 @@ class ReviewDescriptionProcessor(PostProcessorApi): def run_analysis( requestor: InterProcessRequestor, genai_client: GenAIClient, + review_inference_speed: InferenceSpeed, camera: str, final_data: dict[str, str], thumbs: list[bytes], ) -> None: + start = datetime.datetime.now().timestamp() metadata = genai_client.generate_review_description( { "camera": camera, @@ -135,6 +145,7 @@ def run_analysis( }, thumbs, ) + review_inference_speed.update(datetime.datetime.now().timestamp() - start) if not metadata: return None diff --git a/frigate/data_processing/types.py b/frigate/data_processing/types.py index d18a1175a..c77880535 100644 --- a/frigate/data_processing/types.py +++ b/frigate/data_processing/types.py @@ -20,6 +20,8 @@ class DataProcessorMetrics: alpr_pps: Synchronized yolov9_lpr_speed: Synchronized yolov9_lpr_pps: Synchronized + review_desc_speed: Synchronized + review_desc_dps: Synchronized classification_speeds: dict[str, Synchronized] classification_cps: dict[str, Synchronized] @@ -34,6 +36,8 @@ class DataProcessorMetrics: self.alpr_pps = manager.Value("d", 0.0) self.yolov9_lpr_speed = manager.Value("d", 0.0) self.yolov9_lpr_pps = manager.Value("d", 0.0) + self.review_desc_speed = manager.Value("d", 0.0) + self.review_desc_dps = manager.Value("d", 0.0) self.classification_speeds = manager.dict() self.classification_cps = manager.dict() diff --git a/frigate/genai/ollama.py b/frigate/genai/ollama.py index e67d532f0..ea88579cb 100644 --- a/frigate/genai/ollama.py +++ b/frigate/genai/ollama.py @@ -48,6 +48,7 @@ class OllamaClient(GenAIClient): self.genai_config.model, prompt, images=images, + options={"keep_alive": "1h"}, ) return result["response"].strip() except (TimeoutException, ResponseError) as e: diff --git a/frigate/stats/util.py b/frigate/stats/util.py index 3c41ca3b1..7d7a27653 100644 --- a/frigate/stats/util.py +++ b/frigate/stats/util.py @@ -356,6 +356,14 @@ def stats_snapshot( embeddings_metrics.yolov9_lpr_pps.value, 2 ) + if embeddings_metrics.review_desc_dps.value > 0.0: + stats["embeddings"]["review_description_speed"] = round( + embeddings_metrics.review_desc_speed.value * 1000, 2 + ) + stats["embeddings"]["review_descriptions"] = round( + embeddings_metrics.review_desc_dps.value, 2 + ) + for key in embeddings_metrics.classification_speeds.keys(): stats["embeddings"][f"{key}_classification_speed"] = round( embeddings_metrics.classification_speeds[key].value * 1000, 2