Add inference speed and keep alive

This commit is contained in:
Nicolas Mowen 2025-08-09 21:47:39 -06:00
parent 1bea9aec99
commit 86f682269b
4 changed files with 24 additions and 0 deletions

View File

@ -15,6 +15,7 @@ from frigate.config import FrigateConfig
from frigate.const import CLIPS_DIR, UPDATE_REVIEW_DESCRIPTION
from frigate.data_processing.types import PostProcessDataEnum
from frigate.genai import GenAIClient
from frigate.util.builtin import EventsPerSecond, InferenceSpeed
from ..post.api import PostProcessorApi
from ..types import DataProcessorMetrics
@ -35,8 +36,13 @@ class ReviewDescriptionProcessor(PostProcessorApi):
self.metrics = metrics
self.tracked_review_items: dict[str, list[tuple[int, bytes]]] = {}
self.genai_client = client
self.review_desc_speed = InferenceSpeed(self.metrics.review_desc_speed)
self.review_descs_dps = EventsPerSecond()
self.review_descs_dps.start()
def process_data(self, data, data_type):
self.metrics.review_desc_dps.value = self.review_descs_dps.eps()
if data_type != PostProcessDataEnum.review:
return
@ -101,11 +107,13 @@ class ReviewDescriptionProcessor(PostProcessorApi):
return
# kickoff analysis
self.review_descs_dps.update()
threading.Thread(
target=run_analysis,
args=(
self.requestor,
self.genai_client,
self.review_desc_speed,
camera,
final_data,
copy.copy([r[1] for r in self.tracked_review_items[id]]),
@ -121,10 +129,12 @@ class ReviewDescriptionProcessor(PostProcessorApi):
def run_analysis(
requestor: InterProcessRequestor,
genai_client: GenAIClient,
review_inference_speed: InferenceSpeed,
camera: str,
final_data: dict[str, str],
thumbs: list[bytes],
) -> None:
start = datetime.datetime.now().timestamp()
metadata = genai_client.generate_review_description(
{
"camera": camera,
@ -135,6 +145,7 @@ def run_analysis(
},
thumbs,
)
review_inference_speed.update(datetime.datetime.now().timestamp() - start)
if not metadata:
return None

View File

@ -20,6 +20,8 @@ class DataProcessorMetrics:
alpr_pps: Synchronized
yolov9_lpr_speed: Synchronized
yolov9_lpr_pps: Synchronized
review_desc_speed: Synchronized
review_desc_dps: Synchronized
classification_speeds: dict[str, Synchronized]
classification_cps: dict[str, Synchronized]
@ -34,6 +36,8 @@ class DataProcessorMetrics:
self.alpr_pps = manager.Value("d", 0.0)
self.yolov9_lpr_speed = manager.Value("d", 0.0)
self.yolov9_lpr_pps = manager.Value("d", 0.0)
self.review_desc_speed = manager.Value("d", 0.0)
self.review_desc_dps = manager.Value("d", 0.0)
self.classification_speeds = manager.dict()
self.classification_cps = manager.dict()

View File

@ -48,6 +48,7 @@ class OllamaClient(GenAIClient):
self.genai_config.model,
prompt,
images=images,
options={"keep_alive": "1h"},
)
return result["response"].strip()
except (TimeoutException, ResponseError) as e:

View File

@ -356,6 +356,14 @@ def stats_snapshot(
embeddings_metrics.yolov9_lpr_pps.value, 2
)
if embeddings_metrics.review_desc_dps.value > 0.0:
stats["embeddings"]["review_description_speed"] = round(
embeddings_metrics.review_desc_speed.value * 1000, 2
)
stats["embeddings"]["review_descriptions"] = round(
embeddings_metrics.review_desc_dps.value, 2
)
for key in embeddings_metrics.classification_speeds.keys():
stats["embeddings"][f"{key}_classification_speed"] = round(
embeddings_metrics.classification_speeds[key].value * 1000, 2