added 1080p 2160p filter and removal
This commit is contained in:
parent
1c37c870fe
commit
67c7056b2e
58
compare
58
compare
@ -9,8 +9,10 @@
|
|||||||
# the directory is removed outright.
|
# the directory is removed outright.
|
||||||
# 3. The remaining directories are “cleaned” (converted to lowercase, punctuation removed)
|
# 3. The remaining directories are “cleaned” (converted to lowercase, punctuation removed)
|
||||||
# and then grouped by fuzzy similarity using a configurable threshold.
|
# and then grouped by fuzzy similarity using a configurable threshold.
|
||||||
# 4. If duplicate groups remain, you are prompted to choose which duplicate to remove.
|
# 4. Within each group, if at least one directory contains "2160p" and one contains "1080p",
|
||||||
# 5. A dry-run mode (--dry-run) is available to show what would be removed without deleting.
|
# the 1080p directory(ies) are removed (or flagged in dry-run mode).
|
||||||
|
# 5. For any remaining duplicate groups, the user is prompted to select a directory to remove.
|
||||||
|
# 6. A --dry-run mode is available to preview removals without actually deleting any directories.
|
||||||
|
|
||||||
set -euo pipefail
|
set -euo pipefail
|
||||||
|
|
||||||
@ -59,14 +61,14 @@ if [ ! -f "$WORDS_FILE" ]; then
|
|||||||
exit 1
|
exit 1
|
||||||
fi
|
fi
|
||||||
|
|
||||||
# Read undesirable words (one per line) into an array (ignoring blank lines)
|
# Read undesirable words (one per line) into an array, ignoring blank lines.
|
||||||
mapfile -t words < <(grep -v '^[[:space:]]*$' "$WORDS_FILE")
|
mapfile -t words < <(grep -v '^[[:space:]]*$' "$WORDS_FILE")
|
||||||
|
|
||||||
echo "=== Pre-filtering Directories by Undesirable Words ==="
|
echo "=== Pre-filtering Directories by Undesirable Words ==="
|
||||||
# Create an array to hold directories that do NOT match any undesirable word.
|
# Create an array to hold directories that do NOT match any undesirable word.
|
||||||
filtered_dirs=()
|
filtered_dirs=()
|
||||||
|
|
||||||
# Loop over immediate subdirectories in both directories.
|
# Loop over immediate subdirectories in both DIR1 and DIR2.
|
||||||
for d in "$DIR1"/* "$DIR2"/*; do
|
for d in "$DIR1"/* "$DIR2"/*; do
|
||||||
if [ -d "$d" ]; then
|
if [ -d "$d" ]; then
|
||||||
base=$(basename "$d")
|
base=$(basename "$d")
|
||||||
@ -96,12 +98,10 @@ done
|
|||||||
clean_name() {
|
clean_name() {
|
||||||
local name="$1"
|
local name="$1"
|
||||||
# Normalize: convert to lowercase, remove punctuation, and trim extra whitespace.
|
# Normalize: convert to lowercase, remove punctuation, and trim extra whitespace.
|
||||||
local normalized
|
echo "$name" | tr '[:upper:]' '[:lower:]' | sed 's/[^a-z0-9 ]//g' | xargs
|
||||||
normalized=$(echo "$name" | tr '[:upper:]' '[:lower:]' | sed 's/[^a-z0-9 ]//g' | xargs)
|
|
||||||
echo "$normalized"
|
|
||||||
}
|
}
|
||||||
|
|
||||||
# Function: Compute fuzzy similarity between two strings using Python's difflib.
|
# Function: Compute fuzzy similarity between two names using Python's difflib.
|
||||||
compute_similarity() {
|
compute_similarity() {
|
||||||
local name1="$1"
|
local name1="$1"
|
||||||
local name2="$2"
|
local name2="$2"
|
||||||
@ -136,8 +136,45 @@ for d in "${filtered_dirs[@]}"; do
|
|||||||
fi
|
fi
|
||||||
done
|
done
|
||||||
|
|
||||||
|
echo "=== Resolution Preference Filtering ==="
|
||||||
|
# For each group, if one directory contains "2160p" and another contains "1080p",
|
||||||
|
# remove the 1080p directory(ies).
|
||||||
|
for key in "${!groups[@]}"; do
|
||||||
|
IFS=$'\n' read -r -a paths <<< "${groups[$key]}" || true
|
||||||
|
has_2160p=false
|
||||||
|
has_1080p=false
|
||||||
|
for path in "${paths[@]}"; do
|
||||||
|
base=$(basename "$path")
|
||||||
|
if echo "$base" | grep -qi "2160p"; then
|
||||||
|
has_2160p=true
|
||||||
|
fi
|
||||||
|
if echo "$base" | grep -qi "1080p"; then
|
||||||
|
has_1080p=true
|
||||||
|
fi
|
||||||
|
done
|
||||||
|
if $has_2160p && $has_1080p; then
|
||||||
|
echo "Group (representative cleaned name: ${group_rep[$key]:-unknown}) has both 1080p and 2160p directories."
|
||||||
|
new_group=()
|
||||||
|
for path in "${paths[@]}"; do
|
||||||
|
base=$(basename "$path")
|
||||||
|
if echo "$base" | grep -qi "1080p"; then
|
||||||
|
echo "Removing '$path' because a 2160p version is present."
|
||||||
|
if $DRY_RUN; then
|
||||||
|
echo "Dry-run: would remove '$path'"
|
||||||
|
else
|
||||||
|
rm -rf "$path"
|
||||||
|
echo "Removed '$path'"
|
||||||
|
fi
|
||||||
|
else
|
||||||
|
new_group+=("$path")
|
||||||
|
fi
|
||||||
|
done
|
||||||
|
groups["$key"]=$(printf "%s\n" "${new_group[@]}")
|
||||||
|
fi
|
||||||
|
done
|
||||||
|
|
||||||
echo "=== Interactive Duplicate Resolution ==="
|
echo "=== Interactive Duplicate Resolution ==="
|
||||||
# For each group that has more than one directory, prompt the user to select one to remove.
|
# For each group that still contains more than one directory, prompt the user to select one to remove.
|
||||||
for key in "${!groups[@]}"; do
|
for key in "${!groups[@]}"; do
|
||||||
IFS=$'\n' read -r -a paths <<< "${groups[$key]}" || true
|
IFS=$'\n' read -r -a paths <<< "${groups[$key]}" || true
|
||||||
# Filter out directories that no longer exist.
|
# Filter out directories that no longer exist.
|
||||||
@ -148,7 +185,7 @@ for key in "${!groups[@]}"; do
|
|||||||
fi
|
fi
|
||||||
done
|
done
|
||||||
if [ "${#existing[@]}" -gt 1 ]; then
|
if [ "${#existing[@]}" -gt 1 ]; then
|
||||||
echo "Duplicate group (cleaned representative: ${group_rep[$key]:-unknown}):"
|
echo "Duplicate group (representative cleaned name: ${group_rep[$key]:-unknown}):"
|
||||||
i=1
|
i=1
|
||||||
for p in "${existing[@]}"; do
|
for p in "${existing[@]}"; do
|
||||||
echo " [$i] $p"
|
echo " [$i] $p"
|
||||||
@ -171,4 +208,3 @@ for key in "${!groups[@]}"; do
|
|||||||
done
|
done
|
||||||
|
|
||||||
echo "Script completed."
|
echo "Script completed."
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user