summaryrefslogtreecommitdiffstats
path: root/scripts
diff options
context:
space:
mode:
authorKen D'Ambrosio <ken@jots.org>2026-06-08 18:36:07 +0000
committerKen D'Ambrosio <ken@jots.org>2026-06-08 18:36:07 +0000
commit625b3d5176f2c274e91fcf28bda8e45cc0477722 (patch)
tree6ca16ad6f4a830b65dcddbd78ad7e7a2f1655682 /scripts
parentecc872a1fd43c0863e3171a1faf533adc3e3a4c5 (diff)
Separate face detection into standalone daemon
- Strip all face code from update.rb; add shared log helper writing to /opt/albumen/log/albumen.log with [update] prefix. update.rb now owns only album.json; face_daemon.rb owns faces.json. - New scripts/face_daemon.rb: polls MEDIA_ROOT for unprocessed images, calls faces.py in batches, writes per-directory faces.json sidecars atomically. Graceful SIGTERM/SIGINT shutdown between directories. - New config/face_daemon.service: systemd unit running as albumen user, Restart=on-failure, logs via SyslogIdentifier=albumen-faces. - app.rb: add FACES_ENABLED constant; load_faces() helper reads faces.json; album_files() merges face data into each entry as :faces field. - Update README.md and DESIGN.md to document the new daemon architecture, faces.json schema, and service management commands. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
Diffstat (limited to 'scripts')
-rw-r--r--scripts/face_daemon.rb148
-rw-r--r--scripts/update.rb103
2 files changed, 177 insertions, 74 deletions
diff --git a/scripts/face_daemon.rb b/scripts/face_daemon.rb
new file mode 100644
index 0000000..5e817cd
--- /dev/null
+++ b/scripts/face_daemon.rb
@@ -0,0 +1,148 @@
+#!/usr/bin/env ruby
+# frozen_string_literal: true
+#
+# Face detection daemon for Albumen.
+#
+# Polls MEDIA_ROOT for images not yet in a per-directory faces.json sidecar
+# and runs faces.py (dlib CNN model) on them. Never touches album.json —
+# zero write contention with update.rb.
+#
+# faces.json schema (per directory):
+# filename → null error during detection; will retry next pass
+# filename → [] processed, no faces found
+# filename → [{box,encoding}] face data
+# (key absent) not yet processed
+#
+# Configuration — from ENV or /opt/albumen/config.yml (under `faces:` key):
+# workers: 20 # ThreadPoolExecutor workers passed to faces.py
+# poll_interval: 300 # seconds between full-tree sweeps
+#
+# Signal handling: SIGTERM / SIGINT triggers graceful shutdown between dirs.
+
+require 'json'
+require 'yaml'
+require 'fileutils'
+require 'open3'
+
+MEDIA_ROOT = (ENV['MEDIA_ROOT'] || '/var/albumen').freeze
+CONFIG_PATH = (ENV['CONFIG_PATH'] || '/opt/albumen/config.yml').freeze
+LOG_PATH = (ENV['LOG_PATH'] || '/opt/albumen/log/albumen.log').freeze
+VENV_PYTHON = (ENV['VENV_PYTHON'] || '/opt/albumen/venv/bin/python3').freeze
+FACES_SCRIPT = (ENV['FACES_SCRIPT'] || '/opt/albumen/scripts/faces.py').freeze
+
+IMAGE_EXTS = %w[jpg jpeg png gif webp heic heif tiff bmp].freeze
+
+_cfg = File.exist?(CONFIG_PATH) ? (YAML.load_file(CONFIG_PATH, symbolize_names: true) rescue {}) : {}
+FACES_WORKERS = (_cfg.dig(:faces, :workers) || 20).to_i.freeze
+POLL_INTERVAL = (_cfg.dig(:faces, :poll_interval) || 300).to_i.freeze
+
+$shutdown = false
+Signal.trap('TERM') { $shutdown = true }
+Signal.trap('INT') { $shutdown = true }
+
+# ── Logging ───────────────────────────────────────────────────────────────────
+
+def log(msg)
+ $stdout.puts msg
+ $stdout.flush
+ ts = Time.now.strftime('%Y-%m-%d %H:%M:%S')
+ File.open(LOG_PATH, 'a') { |f| f.puts "[#{ts}] [faces] #{msg}" }
+rescue StandardError
+ # never crash on log failure
+end
+
+# ── faces.json helpers ────────────────────────────────────────────────────────
+
+def load_faces_json(path)
+ return {} unless File.exist?(path)
+ JSON.parse(File.read(path))
+rescue JSON::ParserError
+ {}
+end
+
+def save_faces_atomic(path, data)
+ tmp = "#{path}.tmp.#{Process.pid}"
+ File.write(tmp, JSON.generate(data))
+ File.rename(tmp, path)
+rescue StandardError => e
+ File.unlink(tmp) rescue nil
+ log " Error saving #{path}: #{e.message}"
+end
+
+# Returns image filenames that still need processing.
+# null-valued entries (prior errors) are retried; [] entries are done.
+def pending_images(dir)
+ faces = load_faces_json(File.join(dir, 'faces.json'))
+ Dir.children(dir)
+ .select { |n| IMAGE_EXTS.include?(File.extname(n).downcase.delete_prefix('.')) }
+ .reject { |n| faces.key?(n) && !faces[n].nil? }
+ .sort
+end
+
+# ── Core processing ───────────────────────────────────────────────────────────
+
+def process_dir(dir)
+ pending = pending_images(dir)
+ return if pending.empty?
+
+ rel = dir.delete_prefix(MEDIA_ROOT).delete_prefix('/')
+ label = rel.empty? ? '(root)' : rel
+ log "#{label}: #{pending.size} image(s) pending"
+
+ paths = pending.map { |n| File.join(dir, n) }
+ cmd = [VENV_PYTHON, FACES_SCRIPT, '--workers', FACES_WORKERS.to_s, *paths]
+
+ stdout, stderr, status = Open3.capture3(*cmd)
+
+ unless status.success? || stdout.strip.start_with?('{')
+ log " faces.py error (exit #{status.exitstatus}): #{stderr.strip}"
+ return
+ end
+
+ begin
+ results = JSON.parse(stdout)
+ rescue JSON::ParserError => e
+ log " faces.py output is not valid JSON: #{e.message}"
+ return
+ end
+
+ faces_path = File.join(dir, 'faces.json')
+ faces = load_faces_json(faces_path) # re-read before writing (pick up concurrent changes)
+
+ pending.each do |name|
+ full = File.join(dir, name)
+ faces[name] = results[full]
+ detail = faces[name].nil? ? 'error (will retry)' :
+ faces[name].empty? ? 'no faces' :
+ "#{faces[name].length} face(s)"
+ log " #{name}: #{detail}"
+ end
+
+ save_faces_atomic(faces_path, faces)
+end
+
+def run_pass
+ dirs = [MEDIA_ROOT] + Dir.glob("#{MEDIA_ROOT}/**/*/").sort
+ dirs.each do |dir|
+ return if $shutdown
+ process_dir(dir)
+ end
+end
+
+# ── Main loop ─────────────────────────────────────────────────────────────────
+
+log "Starting (workers=#{FACES_WORKERS}, poll_interval=#{POLL_INTERVAL}s, media=#{MEDIA_ROOT})"
+
+loop do
+ break if $shutdown
+ run_pass
+ break if $shutdown
+
+ # Sleep in 1-second increments so SIGTERM/SIGINT takes effect promptly
+ POLL_INTERVAL.times do
+ break if $shutdown
+ sleep 1
+ end
+end
+
+log 'Shutting down.'
diff --git a/scripts/update.rb b/scripts/update.rb
index 5671330..f909510 100644
--- a/scripts/update.rb
+++ b/scripts/update.rb
@@ -16,6 +16,9 @@
# - Safe to re-run at any time; all operations are idempotent.
# - Unchanged directories are skipped via a .albumen_scanned sentinel file;
# pass --force to bypass.
+#
+# Face detection is NOT handled here. Run face_daemon.rb (or let the systemd
+# service manage it) to detect faces and write per-directory faces.json files.
require 'json'
require 'yaml'
@@ -23,27 +26,30 @@ require 'fileutils'
require 'mini_magick'
require 'mini_exiftool'
-MEDIA_ROOT = (ENV['MEDIA_ROOT'] || '/var/albumen').freeze
-CACHE_ROOT = (ENV['CACHE_ROOT'] || '/opt/albumen/cache/thumbs').freeze
-CONFIG_PATH = (ENV['CONFIG_PATH'] || '/opt/albumen/config.yml').freeze
-THUMB_SIZE = 300
+MEDIA_ROOT = (ENV['MEDIA_ROOT'] || '/var/albumen').freeze
+CACHE_ROOT = (ENV['CACHE_ROOT'] || '/opt/albumen/cache/thumbs').freeze
+LOG_PATH = (ENV['LOG_PATH'] || '/opt/albumen/log/albumen.log').freeze
+THUMB_SIZE = 300
IMAGE_EXTS = %w[jpg jpeg png gif webp heic heif tiff bmp].freeze
VIDEO_EXTS = %w[mp4 mov avi mkv webm m4v ogv].freeze
AUDIO_EXTS = %w[mp3 flac ogg wav m4a aac].freeze
MEDIA_EXTS = (IMAGE_EXTS + VIDEO_EXTS + AUDIO_EXTS).freeze
-TRANSCODE_EXTS = %w[avi mkv mov].freeze # not universally browser-playable; convert to MP4
+TRANSCODE_EXTS = %w[avi mkv mov].freeze
SENTINEL_FILE = '.albumen_scanned'.freeze
-_cfg = File.exist?(CONFIG_PATH) ? YAML.load_file(CONFIG_PATH, symbolize_names: true) : {}
-FACES_ENABLED = (_cfg.dig(:faces, :enabled) == true).freeze
-FACES_WORKERS = (_cfg.dig(:faces, :workers) || 4).freeze
-VENV_PYTHON = File.expand_path('../venv/bin/python3', __dir__).freeze
-FACES_SCRIPT = File.expand_path('faces.py', __dir__).freeze
-
# Explicit directory argument implies force — you asked for it, it should run.
FORCE_UPDATE = !!(ARGV.delete('--force') || ARGV[0])
+def log(msg)
+ $stdout.puts msg
+ $stdout.flush
+ ts = Time.now.strftime('%Y-%m-%d %H:%M:%S')
+ File.open(LOG_PATH, 'a') { |f| f.puts "[#{ts}] [update] #{msg}" }
+rescue StandardError
+ # never crash on log failure
+end
+
# ── Directory processing ───────────────────────────────────────────────────────
def process_dir(dir, idx, total)
@@ -51,20 +57,15 @@ def process_dir(dir, idx, total)
label = rel.empty? ? '(root)' : rel
prefix = "[#{idx}/#{total}]"
- pending_faces = false
unless FORCE_UPDATE
sentinel = File.join(dir, SENTINEL_FILE)
if File.exist?(sentinel) && File.mtime(sentinel) >= File.mtime(dir)
- if faces_pending?(dir)
- pending_faces = true # fall through, but only to run face detection
- else
- puts "#{prefix} Skipping #{label} (unchanged)"
- return
- end
+ log "#{prefix} Skipping #{label} (unchanged)"
+ return
end
end
- puts "#{prefix} Scanning #{label}#{' (face detection pending)' if pending_faces}"
+ log "#{prefix} Scanning #{label}"
json_path = File.join(dir, 'album.json')
data = load_json(json_path)
@@ -84,9 +85,9 @@ def process_dir(dir, idx, total)
thumb = File.join(CACHE_ROOT, rel.empty? ? "#{n}.th.jpg" : "#{rel}/#{n}.th.jpg")
if File.exist?(thumb)
File.unlink(thumb)
- puts " Removed: #{n} (+ thumb)"
+ log " Removed: #{n} (+ thumb)"
else
- puts " Removed: #{n}"
+ log " Removed: #{n}"
end
end
@@ -96,20 +97,19 @@ def process_dir(dir, idx, total)
base = File.basename(name, '.*')
target = "#{base}.mp4"
if current.include?(target)
- # MP4 already exists — just ensure the marker is recorded
data['files'][name] ||= {}
data['files'][name]['transcoded_to'] = target
next
end
full = File.join(dir, name)
dest = File.join(dir, target)
- puts " Transcoding: #{name} → #{target}"
+ log " Transcoding: #{name} → #{target}"
transcode_to_mp4(full, dest)
if File.exist?(dest)
data['files'][name] ||= {}
data['files'][name]['transcoded_to'] = target
current << target
- puts " → done"
+ log " → done"
else
warn " Transcode failed: #{name}"
end
@@ -132,8 +132,6 @@ def process_dir(dir, idx, total)
generate_thumb_if_needed(full, rel, name, ext)
end
- batch_detect_faces(dir, current, data) if FACES_ENABLED
-
atomic_write_json(json_path, data)
FileUtils.touch(File.join(dir, SENTINEL_FILE))
end
@@ -152,7 +150,7 @@ def enrich_image(full, name, meta)
raw = exif.date_time_original || exif.create_date || exif.date_time
if raw
meta['taken_at'] = raw.respond_to?(:strftime) ? raw.strftime('%Y-%m-%dT%H:%M:%S') : raw.to_s
- puts " #{name}: taken_at = #{meta['taken_at']}"
+ log " #{name}: taken_at = #{meta['taken_at']}"
end
end
@@ -169,14 +167,12 @@ def enrich_image(full, name, meta)
warn " #{name}: EXIF error — #{e.message}"
end
- # If exiftool found nothing at all, record that so we don't retry on every re-scan.
if meta['taken_at'].nil? && meta['camera'].nil? &&
meta['aperture'].nil? && meta['shutter'].nil? && meta['iso'].nil?
meta['exif_absent'] = true
end
end
- # Dimensions (skip if already recorded)
if meta['width'].nil?
begin
img = MiniMagick::Image.open(full)
@@ -186,37 +182,6 @@ def enrich_image(full, name, meta)
warn " #{name}: dimension error — #{e.message}"
end
end
-
-end
-
-def batch_detect_faces(dir, names, data)
- return unless File.exist?(VENV_PYTHON) && File.exist?(FACES_SCRIPT)
-
- unprocessed = names.select do |name|
- IMAGE_EXTS.include?(File.extname(name).downcase.delete_prefix('.')) &&
- (data['files'][name] || {})['faces'].nil?
- end
- return if unprocessed.empty?
-
- puts " Detecting faces in #{unprocessed.length} image(s) (#{FACES_WORKERS} workers)…"
- paths = unprocessed.map { |n| File.join(dir, n) }
- cmd = [VENV_PYTHON, FACES_SCRIPT, '--workers', FACES_WORKERS.to_s] + paths
-
- begin
- out = IO.popen(cmd, err: '/dev/null', &:read).strip
- results = JSON.parse(out.empty? ? '{}' : out)
- raise 'expected Hash' unless results.is_a?(Hash)
-
- results.each do |path, faces|
- name = File.basename(path)
- next unless data['files'].key?(name)
- next if faces.nil? # error on this file — leave faces: null to retry
- data['files'][name]['faces'] = faces
- puts " #{name}: #{faces.length} face(s)" unless faces.empty?
- end
- rescue StandardError => e
- warn " Face detection batch error — #{e.message}"
- end
end
def enrich_video(full, name, meta)
@@ -232,12 +197,12 @@ end
# ── Thumbnail generation ───────────────────────────────────────────────────────
def generate_thumb_if_needed(full, rel, name, ext)
- return if AUDIO_EXTS.include?(ext) # audio uses a static icon
+ return if AUDIO_EXTS.include?(ext)
cache = File.join(CACHE_ROOT, rel.empty? ? "#{name}.th.jpg" : "#{rel}/#{name}.th.jpg")
return if File.exist?(cache)
- puts " Generating thumb: #{name}"
+ log " Generating thumb: #{name}"
FileUtils.mkdir_p(File.dirname(cache))
if VIDEO_EXTS.include?(ext)
@@ -295,16 +260,6 @@ rescue JSON::ParserError => e
{}
end
-def faces_pending?(dir)
- return false unless FACES_ENABLED
- json_path = File.join(dir, 'album.json')
- return false unless File.exist?(json_path)
- (load_json(json_path)['files'] || {}).any? do |name, meta|
- IMAGE_EXTS.include?(File.extname(name).downcase.delete_prefix('.')) &&
- meta['faces'].nil?
- end
-end
-
# Fields the admin controls — never overwrite with stale values from our earlier read.
ADMIN_ALBUM_KEYS = %w[title description cover cover_dynamic sort_reverse visible].freeze
ADMIN_FILE_KEYS = %w[title caption visible].freeze
@@ -348,7 +303,7 @@ if Process.uid == 0
begin
require 'etc'
pw = Etc.getpwnam(service_user)
- puts "Fixing ownership of #{start} → #{service_user}"
+ log "Fixing ownership of #{start} → #{service_user}"
FileUtils.chown_R(pw.uid, pw.gid, start)
rescue ArgumentError
warn "Warning: user '#{service_user}' not found; skipping chown"
@@ -361,4 +316,4 @@ dirs = dirs.uniq
total = dirs.size
dirs.each_with_index { |d, i| process_dir(d, i + 1, total) }
-puts 'Done.'
+log 'Done.'