#!/usr/bin/env python3 """ Detect faces in one or more images and return bounding boxes and 128-D encodings. Usage: python3 faces.py [--workers N] [ ...] Output: JSON dict mapping each input path to its result array. {"/path/img.jpg": [{"box": [top, right, bottom, left], "encoding": [128 floats]}, ...], ...} A null value for a path means detection failed (file unreadable, corrupt, etc.); update.rb leaves that file's 'faces' field as null and retries on the next run. An empty array [] means the image was processed successfully but no faces were found. Model note ---------- Uses the CNN model (model="cnn"), which is substantially more accurate than the HOG model, especially for: - Faces at angles (up to ~45° profile) - Small faces in group photos - Faces in non-ideal lighting Trade-off: CNN is ~10-30x slower than HOG on CPU. Parallelism via --workers compensates on multi-core machines. dlib releases the Python GIL during C++ inference, so threads achieve genuine concurrency. To switch to the faster but less accurate HOG model, change model="cnn" to model="hog" in the detect_one() function below. """ import sys import json import argparse from concurrent.futures import ThreadPoolExecutor try: import face_recognition _FR_AVAILABLE = True except ImportError as _e: print(f"face_recognition not available: {_e}", file=sys.stderr) _FR_AVAILABLE = False def detect_one(path): """Returns list of face dicts, or None on error.""" try: img = face_recognition.load_image_file(path) locations = face_recognition.face_locations(img, model="cnn") encodings = face_recognition.face_encodings(img, locations) return [{"box": list(loc), "encoding": enc.tolist()} for loc, enc in zip(locations, encodings)] except Exception as e: print(f" {path}: {e}", file=sys.stderr) return None def main(): parser = argparse.ArgumentParser(description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter) parser.add_argument("images", nargs="*", help="Image paths to process") parser.add_argument("--workers", type=int, default=4, help="Parallel threads (default: 4; set to nproc for full utilisation)") args = parser.parse_args() if not _FR_AVAILABLE or not args.images: print("{}") return if len(args.images) == 1 or args.workers <= 1: results = {p: detect_one(p) for p in args.images} else: with ThreadPoolExecutor(max_workers=args.workers) as pool: face_lists = list(pool.map(detect_one, args.images)) results = dict(zip(args.images, face_lists)) print(json.dumps(results)) if __name__ == "__main__": main()