diff --git a/compare b/compare index 44e566c..f2a8677 100755 --- a/compare +++ b/compare @@ -9,8 +9,10 @@ # the directory is removed outright. # 3. The remaining directories are “cleaned” (converted to lowercase, punctuation removed) # and then grouped by fuzzy similarity using a configurable threshold. -# 4. If duplicate groups remain, you are prompted to choose which duplicate to remove. -# 5. A dry-run mode (--dry-run) is available to show what would be removed without deleting. +# 4. Within each group, if at least one directory contains "2160p" and one contains "1080p", +# the 1080p directory(ies) are removed (or flagged in dry-run mode). +# 5. For any remaining duplicate groups, the user is prompted to select a directory to remove. +# 6. A --dry-run mode is available to preview removals without actually deleting any directories. set -euo pipefail @@ -59,14 +61,14 @@ if [ ! -f "$WORDS_FILE" ]; then exit 1 fi -# Read undesirable words (one per line) into an array (ignoring blank lines) +# Read undesirable words (one per line) into an array, ignoring blank lines. mapfile -t words < <(grep -v '^[[:space:]]*$' "$WORDS_FILE") echo "=== Pre-filtering Directories by Undesirable Words ===" # Create an array to hold directories that do NOT match any undesirable word. filtered_dirs=() -# Loop over immediate subdirectories in both directories. +# Loop over immediate subdirectories in both DIR1 and DIR2. for d in "$DIR1"/* "$DIR2"/*; do if [ -d "$d" ]; then base=$(basename "$d") @@ -96,12 +98,10 @@ done clean_name() { local name="$1" # Normalize: convert to lowercase, remove punctuation, and trim extra whitespace. - local normalized - normalized=$(echo "$name" | tr '[:upper:]' '[:lower:]' | sed 's/[^a-z0-9 ]//g' | xargs) - echo "$normalized" + echo "$name" | tr '[:upper:]' '[:lower:]' | sed 's/[^a-z0-9 ]//g' | xargs } -# Function: Compute fuzzy similarity between two strings using Python's difflib. +# Function: Compute fuzzy similarity between two names using Python's difflib. compute_similarity() { local name1="$1" local name2="$2" @@ -136,8 +136,45 @@ for d in "${filtered_dirs[@]}"; do fi done +echo "=== Resolution Preference Filtering ===" +# For each group, if one directory contains "2160p" and another contains "1080p", +# remove the 1080p directory(ies). +for key in "${!groups[@]}"; do + IFS=$'\n' read -r -a paths <<< "${groups[$key]}" || true + has_2160p=false + has_1080p=false + for path in "${paths[@]}"; do + base=$(basename "$path") + if echo "$base" | grep -qi "2160p"; then + has_2160p=true + fi + if echo "$base" | grep -qi "1080p"; then + has_1080p=true + fi + done + if $has_2160p && $has_1080p; then + echo "Group (representative cleaned name: ${group_rep[$key]:-unknown}) has both 1080p and 2160p directories." + new_group=() + for path in "${paths[@]}"; do + base=$(basename "$path") + if echo "$base" | grep -qi "1080p"; then + echo "Removing '$path' because a 2160p version is present." + if $DRY_RUN; then + echo "Dry-run: would remove '$path'" + else + rm -rf "$path" + echo "Removed '$path'" + fi + else + new_group+=("$path") + fi + done + groups["$key"]=$(printf "%s\n" "${new_group[@]}") + fi +done + echo "=== Interactive Duplicate Resolution ===" -# For each group that has more than one directory, prompt the user to select one to remove. +# For each group that still contains more than one directory, prompt the user to select one to remove. for key in "${!groups[@]}"; do IFS=$'\n' read -r -a paths <<< "${groups[$key]}" || true # Filter out directories that no longer exist. @@ -148,7 +185,7 @@ for key in "${!groups[@]}"; do fi done if [ "${#existing[@]}" -gt 1 ]; then - echo "Duplicate group (cleaned representative: ${group_rep[$key]:-unknown}):" + echo "Duplicate group (representative cleaned name: ${group_rep[$key]:-unknown}):" i=1 for p in "${existing[@]}"; do echo " [$i] $p" @@ -171,4 +208,3 @@ for key in "${!groups[@]}"; do done echo "Script completed." -