diff options
| author | Ken D'Ambrosio <ken@jots.org> | 2026-06-08 19:00:02 +0000 |
|---|---|---|
| committer | Ken D'Ambrosio <ken@jots.org> | 2026-06-08 19:00:02 +0000 |
| commit | 01f52565f460a0107679999588b73b770f01a98c (patch) | |
| tree | 806c723ad62221f176fd97d5fdcaadac5d8da2d4 /scripts/cluster_faces.py | |
| parent | 625b3d5176f2c274e91fcf28bda8e45cc0477722 (diff) | |
Add people/face clustering feature
- scripts/cluster_faces.py: greedy centroid clustering (numpy) with 3
refinement passes; preserves existing UUID/name mappings across re-runs;
writes MEDIA_ROOT/people.json atomically.
- app.rb: GET /face/* serves cropped+padded face thumbnails (100x100,
cached under cache/faces/); GET|POST /admin/people for cluster
management; POST /admin/people/recluster runs cluster_faces.py as a
background job; POST /admin/people/:uuid saves names+slugs; GET /people
public grid of named people; GET /people/:slug photos for one person.
- views/admin/people.erb: lists all clusters (named first, then by size),
face crop samples, inline name form, re-cluster button with live log.
- views/people.erb: public grid of named people.
- views/person.erb: photo grid for one person, linking back to album
lightbox for each photo.
- views/layout.erb: People link in nav (conditional on FACES_ENABLED).
- public/css/style.css: styles for people admin list and public tiles.
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
Diffstat (limited to 'scripts/cluster_faces.py')
| -rw-r--r-- | scripts/cluster_faces.py | 223 |
1 files changed, 223 insertions, 0 deletions
diff --git a/scripts/cluster_faces.py b/scripts/cluster_faces.py new file mode 100644 index 0000000..92c64fe --- /dev/null +++ b/scripts/cluster_faces.py @@ -0,0 +1,223 @@ +#!/usr/bin/env python3 +""" +Cluster face encodings from per-directory faces.json sidecars into people. + +Reads all faces.json files under MEDIA_ROOT, groups face encodings by +similarity (L2 distance < threshold), and writes MEDIA_ROOT/people.json. + +Existing names and UUIDs in people.json are preserved across re-runs: any +cluster whose members overlap with a named cluster in the old file keeps that +UUID and name. + +Usage: + python3 cluster_faces.py [--threshold 0.55] [--media-root /var/albumen] + +Threshold guide: + 0.50 — very strict; may split the same person across lighting/angle + 0.55 — good default for family photos + 0.60 — lenient; may merge different people who look alike +""" +import argparse +import glob +import json +import os +import sys +import uuid as _uuid +from datetime import datetime, timezone + +try: + import numpy as np +except ImportError: + print("numpy is required: /opt/albumen/venv/bin/pip install numpy", file=sys.stderr) + sys.exit(1) + +MEDIA_ROOT = os.environ.get('MEDIA_ROOT', '/var/albumen') + + +def collect_faces(media_root): + """Return list of {rel, box, encoding} for all processed face instances.""" + faces = [] + for path in sorted(glob.glob(os.path.join(media_root, '**', 'faces.json'), recursive=True)): + dir_abs = os.path.dirname(path) + dir_rel = dir_abs[len(media_root):].lstrip('/') + try: + data = json.load(open(path)) + except Exception: + continue + for filename, face_list in data.items(): + if not isinstance(face_list, list): + continue + for face in face_list: + enc = face.get('encoding') + box = face.get('box') + if not enc or not box or len(enc) != 128: + continue + rel = f"{dir_rel}/{filename}" if dir_rel else filename + faces.append({'rel': rel, 'box': box, 'encoding': enc}) + return faces + + +def cluster(encodings, threshold): + """ + Greedy centroid clustering with up to 3 refinement passes. + Returns an integer label array of length n. + """ + n = len(encodings) + if n == 0: + return np.array([], dtype=np.int32) + + # First pass: greedy — each face goes to the nearest centroid or starts a new cluster + labels = np.zeros(n, dtype=np.int32) + cent_sum = [encodings[0].copy()] + cent_count = [1] + + for i in range(1, n): + means = np.vstack(cent_sum) / np.array(cent_count, dtype=np.float32)[:, np.newaxis] + dists = np.sqrt(np.sum((encodings[i] - means) ** 2, axis=1)) + best = int(np.argmin(dists)) + if dists[best] < threshold: + labels[i] = best + cent_sum[best] += encodings[i] + cent_count[best] += 1 + else: + labels[i] = len(cent_sum) + cent_sum.append(encodings[i].copy()) + cent_count.append(1) + + # Refinement passes: reassign each face to the nearest centroid + for _ in range(3): + k = int(labels.max()) + 1 + centroids = np.zeros((k, 128), dtype=np.float32) + counts = np.zeros(k, dtype=np.int32) + for i in range(n): + centroids[labels[i]] += encodings[i] + counts[labels[i]] += 1 + nz = counts > 0 + centroids[nz] /= counts[nz, np.newaxis] + + new_labels = np.zeros(n, dtype=np.int32) + chunk = 512 + for start in range(0, n, chunk): + end = min(start + chunk, n) + diff = encodings[start:end, np.newaxis, :] - centroids[np.newaxis, :, :] + dist = np.sqrt(np.sum(diff ** 2, axis=2)) # (c, k) + best = np.argmin(dist, axis=1) + bd = dist[np.arange(end - start), best] + for j in range(end - start): + new_labels[start + j] = int(best[j]) if bd[j] < threshold else int(labels[start + j]) + + if np.array_equal(new_labels, labels): + break + labels = new_labels + + # Compact: remap to 0..k-1 + unique = np.unique(labels) + remap = {int(old): new for new, old in enumerate(unique)} + return np.array([remap[int(l)] for l in labels], dtype=np.int32) + + +def load_people(path): + if not os.path.exists(path): + return {} + try: + return json.load(open(path)).get('people', {}) + except Exception: + return {} + + +def build_people(faces, labels, existing): + """ + Group faces into clusters and match against existing UUIDs/names. + A new cluster inherits the UUID and name of the existing cluster with the + most member overlap (≥1 shared photo). + """ + clusters = {} + for face, label in zip(faces, labels.tolist()): + clusters.setdefault(int(label), []).append(face) + + existing_rels = {uid: frozenset(m['rel'] for m in p.get('members', [])) + for uid, p in existing.items()} + used = set() + people = {} + + # Process largest clusters first so names attach to the most representative cluster + for label, members in sorted(clusters.items(), key=lambda x: -len(x[1])): + new_rels = frozenset(m['rel'] for m in members) + + best_uid, best_n = None, 0 + for uid, old_rels in existing_rels.items(): + if uid in used: + continue + n = len(new_rels & old_rels) + if n > best_n: + best_n, best_uid = n, uid + + if best_uid and best_n >= 1: + uid = best_uid + name = existing[uid].get('name') + slug = existing[uid].get('slug') + used.add(uid) + else: + uid = str(_uuid.uuid4()) + name = None + slug = None + + people[uid] = { + 'name': name, + 'slug': slug, + 'members': [{'rel': m['rel'], 'box': m['box']} for m in members], + } + + return people + + +def atomic_write(path, obj): + tmp = path + '.tmp.cluster' + with open(tmp, 'w') as f: + json.dump(obj, f) + os.rename(tmp, path) + + +def main(): + ap = argparse.ArgumentParser( + description=__doc__, + formatter_class=argparse.RawDescriptionHelpFormatter) + ap.add_argument('--threshold', type=float, default=0.55, + help='L2 distance threshold (default 0.55; lower = stricter)') + ap.add_argument('--media-root', default=MEDIA_ROOT) + args = ap.parse_args() + + people_path = os.path.join(args.media_root, 'people.json') + + print(f"Collecting faces from {args.media_root} ...") + faces = collect_faces(args.media_root) + print(f" {len(faces)} face instances") + + if not faces: + print("No faces to cluster. Run the face daemon first.") + return + + print(f"Clustering (threshold={args.threshold}) ...") + encodings = np.array([f['encoding'] for f in faces], dtype=np.float32) + labels = cluster(encodings, args.threshold) + k = int(labels.max()) + 1 + print(f" {k} clusters from {len(faces)} faces") + + existing = load_people(people_path) + print(f" {len(existing)} existing records (names preserved)") + + people = build_people(faces, labels, existing) + + result = { + 'updated_at': datetime.now(timezone.utc).strftime('%Y-%m-%dT%H:%M:%SZ'), + 'threshold': args.threshold, + 'people': people, + } + + atomic_write(people_path, result) + named = sum(1 for p in people.values() if p.get('name')) + print(f"Wrote {people_path}: {named} named, {k - named} unnamed") + + +if __name__ == '__main__': + main() |
