summaryrefslogtreecommitdiffstats
path: root/scripts/update.rb
diff options
context:
space:
mode:
authorKen D'Ambrosio <ken@jots.org>2026-06-08 17:34:18 +0000
committerKen D'Ambrosio <ken@jots.org>2026-06-08 17:34:18 +0000
commit73d6f8c9ac0177ca3a6587e6534592a545d44d67 (patch)
tree382c08bd09cbe526d1576d9d030294870b0788ed /scripts/update.rb
parentda28a20f091372375822f9dde4486ecade859e7e (diff)
Switch face detection to CNN model with parallel batch processing
- faces.py: use model="cnn" (more accurate, better at angles/small faces/poor lighting) instead of HOG; model comment explains the trade-off clearly - faces.py: accept multiple image paths; process with ThreadPoolExecutor (dlib releases GIL during C++ inference → genuine thread parallelism); output JSON dict {path: [faces]} for batch calls - update.rb: batch_detect_faces() collects all unprocessed images per directory and calls faces.py once per directory rather than once per image, avoiding repeated model load overhead - update.rb: FACES_WORKERS read from config.yml faces.workers (default 4; set to 20 in this install's config.yml on a 64-core Xeon) - update.rb: process_dir() now takes idx/total and prints [N/total] prefix on every Scanning/Skipping line for progress monitoring To monitor a long run: nohup ruby /opt/albumen/scripts/update.rb > /tmp/faces_update.log 2>&1 & tail -f /tmp/faces_update.log Resume/restart is fully safe: sentinel files are only written after atomic_write_json, so an aborted directory reruns cleanly from scratch. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
Diffstat (limited to 'scripts/update.rb')
-rw-r--r--scripts/update.rb53
1 files changed, 35 insertions, 18 deletions
diff --git a/scripts/update.rb b/scripts/update.rb
index d6effe5..1a00ddf 100644
--- a/scripts/update.rb
+++ b/scripts/update.rb
@@ -35,8 +35,9 @@ MEDIA_EXTS = (IMAGE_EXTS + VIDEO_EXTS + AUDIO_EXTS).freeze
TRANSCODE_EXTS = %w[avi mkv mov].freeze # not universally browser-playable; convert to MP4
SENTINEL_FILE = '.albumen_scanned'.freeze
-_cfg = File.exist?(CONFIG_PATH) ? YAML.load_file(CONFIG_PATH, symbolize_names: true) : {}
+_cfg = File.exist?(CONFIG_PATH) ? YAML.load_file(CONFIG_PATH, symbolize_names: true) : {}
FACES_ENABLED = (_cfg.dig(:faces, :enabled) == true).freeze
+FACES_WORKERS = (_cfg.dig(:faces, :workers) || 4).freeze
VENV_PYTHON = File.expand_path('../venv/bin/python3', __dir__).freeze
FACES_SCRIPT = File.expand_path('faces.py', __dir__).freeze
@@ -45,19 +46,20 @@ FORCE_UPDATE = !!(ARGV.delete('--force') || ARGV[0])
# ── Directory processing ───────────────────────────────────────────────────────
-def process_dir(dir)
- rel = dir.delete_prefix(MEDIA_ROOT).delete_prefix('/')
- label = rel.empty? ? '(root)' : rel
+def process_dir(dir, idx, total)
+ rel = dir.delete_prefix(MEDIA_ROOT).delete_prefix('/')
+ label = rel.empty? ? '(root)' : rel
+ prefix = "[#{idx}/#{total}]"
unless FORCE_UPDATE
sentinel = File.join(dir, SENTINEL_FILE)
if File.exist?(sentinel) && File.mtime(sentinel) >= File.mtime(dir)
- puts "Skipping #{label} (unchanged)"
+ puts "#{prefix} Skipping #{label} (unchanged)"
return
end
end
- puts "Scanning #{label}"
+ puts "#{prefix} Scanning #{label}"
json_path = File.join(dir, 'album.json')
data = load_json(json_path)
@@ -125,6 +127,8 @@ def process_dir(dir)
generate_thumb_if_needed(full, rel, name, ext)
end
+ batch_detect_faces(dir, current, data) if FACES_ENABLED
+
atomic_write_json(json_path, data)
FileUtils.touch(File.join(dir, SENTINEL_FILE))
end
@@ -178,23 +182,35 @@ def enrich_image(full, name, meta)
end
end
- enrich_faces(full, name, meta)
end
-def enrich_faces(full, name, meta)
- return unless FACES_ENABLED
- return unless meta['faces'].nil? # already processed ([] means "processed, none found")
+def batch_detect_faces(dir, names, data)
return unless File.exist?(VENV_PYTHON) && File.exist?(FACES_SCRIPT)
+ unprocessed = names.select do |name|
+ IMAGE_EXTS.include?(File.extname(name).downcase.delete_prefix('.')) &&
+ (data['files'][name] || {})['faces'].nil?
+ end
+ return if unprocessed.empty?
+
+ puts " Detecting faces in #{unprocessed.length} image(s) (#{FACES_WORKERS} workers)…"
+ paths = unprocessed.map { |n| File.join(dir, n) }
+ cmd = [VENV_PYTHON, FACES_SCRIPT, '--workers', FACES_WORKERS.to_s] + paths
+
begin
- out = IO.popen([VENV_PYTHON, FACES_SCRIPT, full], err: '/dev/null', &:read).strip
- faces = JSON.parse(out.empty? ? '[]' : out)
- if faces.is_a?(Array)
- meta['faces'] = faces
+ out = IO.popen(cmd, err: '/dev/null', &:read).strip
+ results = JSON.parse(out.empty? ? '{}' : out)
+ raise 'expected Hash' unless results.is_a?(Hash)
+
+ results.each do |path, faces|
+ name = File.basename(path)
+ next unless data['files'].key?(name)
+ next if faces.nil? # error on this file — leave faces: null to retry
+ data['files'][name]['faces'] = faces
puts " #{name}: #{faces.length} face(s)" unless faces.empty?
end
rescue StandardError => e
- warn " #{name}: face detection error — #{e.message}"
+ warn " Face detection batch error — #{e.message}"
end
end
@@ -325,8 +341,9 @@ if Process.uid == 0
end
# Walk the tree: process each directory (depth-first, parent before children)
-dirs = [start]
-dirs += Dir.glob("#{start}/**/*/").sort
-dirs.uniq.each { |d| process_dir(d) }
+dirs = [start] + Dir.glob("#{start}/**/*/").sort
+dirs = dirs.uniq
+total = dirs.size
+dirs.each_with_index { |d, i| process_dir(d, i + 1, total) }
puts 'Done.'