From 625b3d5176f2c274e91fcf28bda8e45cc0477722 Mon Sep 17 00:00:00 2001 From: Ken D'Ambrosio Date: Mon, 8 Jun 2026 18:36:07 +0000 Subject: Separate face detection into standalone daemon - Strip all face code from update.rb; add shared log helper writing to /opt/albumen/log/albumen.log with [update] prefix. update.rb now owns only album.json; face_daemon.rb owns faces.json. - New scripts/face_daemon.rb: polls MEDIA_ROOT for unprocessed images, calls faces.py in batches, writes per-directory faces.json sidecars atomically. Graceful SIGTERM/SIGINT shutdown between directories. - New config/face_daemon.service: systemd unit running as albumen user, Restart=on-failure, logs via SyslogIdentifier=albumen-faces. - app.rb: add FACES_ENABLED constant; load_faces() helper reads faces.json; album_files() merges face data into each entry as :faces field. - Update README.md and DESIGN.md to document the new daemon architecture, faces.json schema, and service management commands. Co-Authored-By: Claude Sonnet 4.6 --- scripts/face_daemon.rb | 148 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 148 insertions(+) create mode 100644 scripts/face_daemon.rb (limited to 'scripts/face_daemon.rb') diff --git a/scripts/face_daemon.rb b/scripts/face_daemon.rb new file mode 100644 index 0000000..5e817cd --- /dev/null +++ b/scripts/face_daemon.rb @@ -0,0 +1,148 @@ +#!/usr/bin/env ruby +# frozen_string_literal: true +# +# Face detection daemon for Albumen. +# +# Polls MEDIA_ROOT for images not yet in a per-directory faces.json sidecar +# and runs faces.py (dlib CNN model) on them. Never touches album.json — +# zero write contention with update.rb. +# +# faces.json schema (per directory): +# filename → null error during detection; will retry next pass +# filename → [] processed, no faces found +# filename → [{box,encoding}] face data +# (key absent) not yet processed +# +# Configuration — from ENV or /opt/albumen/config.yml (under `faces:` key): +# workers: 20 # ThreadPoolExecutor workers passed to faces.py +# poll_interval: 300 # seconds between full-tree sweeps +# +# Signal handling: SIGTERM / SIGINT triggers graceful shutdown between dirs. + +require 'json' +require 'yaml' +require 'fileutils' +require 'open3' + +MEDIA_ROOT = (ENV['MEDIA_ROOT'] || '/var/albumen').freeze +CONFIG_PATH = (ENV['CONFIG_PATH'] || '/opt/albumen/config.yml').freeze +LOG_PATH = (ENV['LOG_PATH'] || '/opt/albumen/log/albumen.log').freeze +VENV_PYTHON = (ENV['VENV_PYTHON'] || '/opt/albumen/venv/bin/python3').freeze +FACES_SCRIPT = (ENV['FACES_SCRIPT'] || '/opt/albumen/scripts/faces.py').freeze + +IMAGE_EXTS = %w[jpg jpeg png gif webp heic heif tiff bmp].freeze + +_cfg = File.exist?(CONFIG_PATH) ? (YAML.load_file(CONFIG_PATH, symbolize_names: true) rescue {}) : {} +FACES_WORKERS = (_cfg.dig(:faces, :workers) || 20).to_i.freeze +POLL_INTERVAL = (_cfg.dig(:faces, :poll_interval) || 300).to_i.freeze + +$shutdown = false +Signal.trap('TERM') { $shutdown = true } +Signal.trap('INT') { $shutdown = true } + +# ── Logging ─────────────────────────────────────────────────────────────────── + +def log(msg) + $stdout.puts msg + $stdout.flush + ts = Time.now.strftime('%Y-%m-%d %H:%M:%S') + File.open(LOG_PATH, 'a') { |f| f.puts "[#{ts}] [faces] #{msg}" } +rescue StandardError + # never crash on log failure +end + +# ── faces.json helpers ──────────────────────────────────────────────────────── + +def load_faces_json(path) + return {} unless File.exist?(path) + JSON.parse(File.read(path)) +rescue JSON::ParserError + {} +end + +def save_faces_atomic(path, data) + tmp = "#{path}.tmp.#{Process.pid}" + File.write(tmp, JSON.generate(data)) + File.rename(tmp, path) +rescue StandardError => e + File.unlink(tmp) rescue nil + log " Error saving #{path}: #{e.message}" +end + +# Returns image filenames that still need processing. +# null-valued entries (prior errors) are retried; [] entries are done. +def pending_images(dir) + faces = load_faces_json(File.join(dir, 'faces.json')) + Dir.children(dir) + .select { |n| IMAGE_EXTS.include?(File.extname(n).downcase.delete_prefix('.')) } + .reject { |n| faces.key?(n) && !faces[n].nil? } + .sort +end + +# ── Core processing ─────────────────────────────────────────────────────────── + +def process_dir(dir) + pending = pending_images(dir) + return if pending.empty? + + rel = dir.delete_prefix(MEDIA_ROOT).delete_prefix('/') + label = rel.empty? ? '(root)' : rel + log "#{label}: #{pending.size} image(s) pending" + + paths = pending.map { |n| File.join(dir, n) } + cmd = [VENV_PYTHON, FACES_SCRIPT, '--workers', FACES_WORKERS.to_s, *paths] + + stdout, stderr, status = Open3.capture3(*cmd) + + unless status.success? || stdout.strip.start_with?('{') + log " faces.py error (exit #{status.exitstatus}): #{stderr.strip}" + return + end + + begin + results = JSON.parse(stdout) + rescue JSON::ParserError => e + log " faces.py output is not valid JSON: #{e.message}" + return + end + + faces_path = File.join(dir, 'faces.json') + faces = load_faces_json(faces_path) # re-read before writing (pick up concurrent changes) + + pending.each do |name| + full = File.join(dir, name) + faces[name] = results[full] + detail = faces[name].nil? ? 'error (will retry)' : + faces[name].empty? ? 'no faces' : + "#{faces[name].length} face(s)" + log " #{name}: #{detail}" + end + + save_faces_atomic(faces_path, faces) +end + +def run_pass + dirs = [MEDIA_ROOT] + Dir.glob("#{MEDIA_ROOT}/**/*/").sort + dirs.each do |dir| + return if $shutdown + process_dir(dir) + end +end + +# ── Main loop ───────────────────────────────────────────────────────────────── + +log "Starting (workers=#{FACES_WORKERS}, poll_interval=#{POLL_INTERVAL}s, media=#{MEDIA_ROOT})" + +loop do + break if $shutdown + run_pass + break if $shutdown + + # Sleep in 1-second increments so SIGTERM/SIGINT takes effect promptly + POLL_INTERVAL.times do + break if $shutdown + sleep 1 + end +end + +log 'Shutting down.' -- cgit v1.2.3