summaryrefslogtreecommitdiffstats
path: root/scripts
diff options
context:
space:
mode:
Diffstat (limited to 'scripts')
-rw-r--r--scripts/cluster_faces.py21
1 files changed, 20 insertions, 1 deletions
diff --git a/scripts/cluster_faces.py b/scripts/cluster_faces.py
index 92c64fe..a9492a9 100644
--- a/scripts/cluster_faces.py
+++ b/scripts/cluster_faces.py
@@ -35,7 +35,24 @@ MEDIA_ROOT = os.environ.get('MEDIA_ROOT', '/var/albumen')
def collect_faces(media_root):
- """Return list of {rel, box, encoding} for all processed face instances."""
+ """Return list of {rel, box, encoding} for all processed face instances.
+
+ Faces that are in the pool or blacklist in people.json are skipped —
+ the user has explicitly handled them and they should not be re-clustered.
+ """
+ skip = set()
+ people_path = os.path.join(media_root, 'people.json')
+ if os.path.exists(people_path):
+ try:
+ pd = json.load(open(people_path))
+ for entry in pd.get('blacklist', []):
+ skip.add((entry['rel'], tuple(entry['box'])))
+ pool = pd.get('people', {}).get('__pool__', {})
+ for m in pool.get('members', []):
+ skip.add((m['rel'], tuple(m['box'])))
+ except Exception:
+ pass
+
faces = []
for path in sorted(glob.glob(os.path.join(media_root, '**', 'faces.json'), recursive=True)):
dir_abs = os.path.dirname(path)
@@ -53,6 +70,8 @@ def collect_faces(media_root):
if not enc or not box or len(enc) != 128:
continue
rel = f"{dir_rel}/{filename}" if dir_rel else filename
+ if (rel, tuple(box)) in skip:
+ continue
faces.append({'rel': rel, 'box': box, 'encoding': enc})
return faces