From d006bb0d911c6ce08a20e2c10e30785ad559b439 Mon Sep 17 00:00:00 2001 From: Ken D'Ambrosio Date: Tue, 12 May 2026 15:11:23 +0000 Subject: Add organize_by_day.rb script for GPS-based day/location splitting Groups media files in an album directory by EXIF date, reverse-geocodes the first GPS fix of each day via Nominatim, and moves files into "Day N - Location" subdirectories. Dry-run by default; pass --go to execute. Supports a LOCATION_OVERRIDES map for cleaning up Nominatim names that are overly granular. Co-Authored-By: Claude Sonnet 4.6 --- scripts/organize_by_day.rb | 135 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 135 insertions(+) create mode 100644 scripts/organize_by_day.rb (limited to 'scripts/organize_by_day.rb') diff --git a/scripts/organize_by_day.rb b/scripts/organize_by_day.rb new file mode 100644 index 0000000..d5c39d4 --- /dev/null +++ b/scripts/organize_by_day.rb @@ -0,0 +1,135 @@ +#!/usr/bin/env ruby +# frozen_string_literal: true +# +# Usage: ruby organize_by_day.rb [--go] +# +# Groups media files in by EXIF date, reverse-geocodes the first +# GPS fix of each day, and moves files into "Day N - Location" subdirectories. +# Without --go it prints the plan only (dry run). + +require 'json' +require 'date' +require 'net/http' +require 'uri' +require 'fileutils' + +IMAGE_EXTS = %w[jpg jpeg png gif webp heic heif tiff bmp].freeze +VIDEO_EXTS = %w[mp4 mov avi mkv webm m4v ogv].freeze +MEDIA_EXTS = (IMAGE_EXTS + VIDEO_EXTS).freeze + +album_dir = ARGV[0] +dry_run = !ARGV.include?('--go') + +# Clean up Nominatim names that are overly granular or wrong. +LOCATION_OVERRIDES = { + /Jaipur Municipal Corporation/i => 'Jaipur', + /Kothimahal Tahsil/i => 'Ujjain', +} + +abort "Usage: #{$PROGRAM_NAME} [--go]" unless album_dir +abort "Not a directory: #{album_dir}" unless File.directory?(album_dir) + +def parse_exif_date(s) + return nil unless s + Date.parse(s.sub(/^(\d{4}):(\d{2}):(\d{2})/, '\1-\2-\3')) +rescue ArgumentError + nil +end + +def reverse_geocode(lat, lon) + uri = URI("https://nominatim.openstreetmap.org/reverse?lat=#{lat}&lon=#{lon}&format=json&zoom=10") + req = Net::HTTP::Get.new(uri) + req['User-Agent'] = 'albumen-organizer/1.0 (self-hosted photo album)' + res = Net::HTTP.start(uri.host, uri.port, use_ssl: true, open_timeout: 10, read_timeout: 10) do |h| + h.request(req) + end + data = JSON.parse(res.body) + addr = data['address'] || {} + addr['city'] || addr['town'] || addr['village'] || addr['suburb'] || + addr['county'] || addr['state'] || 'Unknown' +rescue => e + warn " Geocode error: #{e.message}" + 'Unknown' +end + +# ── Gather files ─────────────────────────────────────────────────────────────── + +files = Dir.children(album_dir) + .select { |n| MEDIA_EXTS.include?(File.extname(n).downcase.delete_prefix('.')) } + .sort + +abort "No media files found in #{album_dir}" if files.empty? +puts "Found #{files.size} media files. Reading EXIF (batch)..." + +# Run exiftool once over all files for speed +exif_out = `exiftool -json -DateTimeOriginal -CreateDate -GPSLatitude# -GPSLongitude# -norecurse "#{album_dir}" 2>/dev/null` +exif_map = JSON.parse(exif_out).each_with_object({}) do |e, h| + h[File.basename(e['SourceFile'])] = e +end + +file_info = files.map do |name| + exif = exif_map[name] || {} + raw = exif['DateTimeOriginal'] || exif['CreateDate'] + { + name: name, + date: parse_exif_date(raw), + lat: exif['GPSLatitude'], + lon: exif['GPSLongitude'], + } +end + +# ── Group by date ────────────────────────────────────────────────────────────── + +by_date = file_info.group_by { |f| f[:date] } +no_date = by_date.delete(nil) || [] +puts " #{no_date.size} files with no date (will be skipped)." if no_date.any? + +sorted_dates = by_date.keys.sort +puts "#{sorted_dates.size} distinct days found." + +# ── Build plan ───────────────────────────────────────────────────────────────── + +plan = sorted_dates.each_with_index.map do |date, i| + day_files = by_date[date].sort_by { |f| f[:name] } + first_gps = day_files.find { |f| f[:lat] && f[:lon] } + + location = if first_gps + print " Geocoding Day #{i + 1} (#{date})... " + STDOUT.flush + loc = reverse_geocode(first_gps[:lat], first_gps[:lon]) + LOCATION_OVERRIDES.each { |pat, rep| loc = loc.gsub(pat, rep) } + puts loc + sleep 1.1 # Nominatim rate limit: 1 req/s + loc + else + puts " Day #{i + 1} (#{date}): no GPS, using 'Unknown'" + 'Unknown' + end + + { date: date, dir: "Day #{i + 1} - #{location}", files: day_files.map { |f| f[:name] } } +end + +# ── Print plan ───────────────────────────────────────────────────────────────── + +puts "\n#{'=' * 60}" +puts dry_run ? 'DRY RUN — pass --go to execute' : 'EXECUTING' +puts '=' * 60 +plan.each do |p| + puts "#{p[:dir]}/ (#{p[:files].size} files)" +end +puts "#{no_date.size} files with no date left in place." if no_date.any? + +exit if dry_run + +# ── Execute ──────────────────────────────────────────────────────────────────── + +plan.each do |p| + target = File.join(album_dir, p[:dir]) + FileUtils.mkdir_p(target) + p[:files].each do |name| + FileUtils.mv(File.join(album_dir, name), File.join(target, name)) + end + puts "Created #{p[:dir]}/ (#{p[:files].size} files)" +end + +puts "\nDone. Run update.rb to rebuild album.json files and thumbnails." -- cgit v1.2.3