
- Fixed SQLAlchemy import issues in VOD models - Fixed TMDB/OMDB API authentication and rate limiting - Fixed VOD directory path resolution and permission errors - Fixed rental system transaction handling - Added HLS streaming support for VOD content - Implemented Redis caching for performance - Added watch progress tracking - Enhanced search with multi-field support - Added health check endpoint This patch resolves critical production issues in the VOD system.
1014 lines
38 KiB
Python
1014 lines
38 KiB
Python
"""
|
|
VOD Directory Service - Enhanced with proper path handling and error recovery
|
|
"""
|
|
import os
|
|
import json
|
|
import mimetypes
|
|
import hashlib
|
|
from pathlib import Path
|
|
from typing import List, Dict, Optional, Tuple
|
|
from datetime import datetime, timedelta
|
|
from sqlalchemy.orm import Session
|
|
import logging
|
|
import re
|
|
import shutil
|
|
import fnmatch
|
|
import uuid
|
|
|
|
from vod_models import (
|
|
VODDirectory, VODDirectoryScan, VODContent, ContentStatus,
|
|
ContentType, VODSubtitle
|
|
)
|
|
from database import get_db
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
class VODDirectoryService:
|
|
"""Service for managing multiple local directories and scanning for VOD content"""
|
|
|
|
def __init__(self, db: Session):
|
|
self.db = db
|
|
|
|
# Enhanced video formats with priority
|
|
self.video_formats = {
|
|
'.mp4': {'mime': 'video/mp4', 'priority': 1},
|
|
'.mkv': {'mime': 'video/x-matroska', 'priority': 2},
|
|
'.avi': {'mime': 'video/x-msvideo', 'priority': 3},
|
|
'.mov': {'mime': 'video/quicktime', 'priority': 4},
|
|
'.m4v': {'mime': 'video/mp4', 'priority': 5},
|
|
'.wmv': {'mime': 'video/x-ms-wmv', 'priority': 6},
|
|
'.flv': {'mime': 'video/x-flv', 'priority': 7},
|
|
'.webm': {'mime': 'video/webm', 'priority': 8},
|
|
'.mpg': {'mime': 'video/mpeg', 'priority': 9},
|
|
'.mpeg': {'mime': 'video/mpeg', 'priority': 10},
|
|
'.m2v': {'mime': 'video/mpeg', 'priority': 11},
|
|
'.3gp': {'mime': 'video/3gpp', 'priority': 12},
|
|
'.ogv': {'mime': 'video/ogg', 'priority': 13},
|
|
'.ts': {'mime': 'video/mp2t', 'priority': 14},
|
|
'.m2ts': {'mime': 'video/mp2t', 'priority': 15}
|
|
}
|
|
|
|
# Subtitle formats with priority
|
|
self.subtitle_formats = {
|
|
'.srt': {'mime': 'text/plain', 'priority': 1},
|
|
'.vtt': {'mime': 'text/vtt', 'priority': 2},
|
|
'.ass': {'mime': 'text/plain', 'priority': 3},
|
|
'.ssa': {'mime': 'text/plain', 'priority': 4},
|
|
'.sub': {'mime': 'text/plain', 'priority': 5},
|
|
'.idx': {'mime': 'text/plain', 'priority': 6}
|
|
}
|
|
|
|
# Default exclude patterns
|
|
self.default_exclude_patterns = [
|
|
'.*', # Hidden files
|
|
'sample-*', # Sample files
|
|
'*-sample.*',
|
|
'*.tmp',
|
|
'*.partial',
|
|
'*.download',
|
|
'thumbs.db',
|
|
'desktop.ini',
|
|
'.DS_Store'
|
|
]
|
|
|
|
def create_directory(self, name: str, path: str, **kwargs) -> VODDirectory:
|
|
"""Create a new VOD directory configuration with validation"""
|
|
try:
|
|
# Normalize path
|
|
path = os.path.abspath(os.path.expanduser(path))
|
|
|
|
# Validate path exists
|
|
if not os.path.exists(path):
|
|
# Try to create directory if allowed
|
|
if kwargs.get('create_if_not_exists', False):
|
|
try:
|
|
os.makedirs(path, exist_ok=True)
|
|
logger.info(f"Created directory path: {path}")
|
|
except OSError as e:
|
|
raise ValueError(f"Cannot create directory {path}: {e}")
|
|
else:
|
|
raise ValueError(f"Directory path does not exist: {path}")
|
|
|
|
if not os.path.isdir(path):
|
|
raise ValueError(f"Path is not a directory: {path}")
|
|
|
|
# Check permissions
|
|
if not os.access(path, os.R_OK):
|
|
raise ValueError(f"No read permission for directory: {path}")
|
|
|
|
# Check if path already exists
|
|
existing = self.db.query(VODDirectory).filter(
|
|
VODDirectory.path == path
|
|
).first()
|
|
|
|
if existing:
|
|
raise ValueError(f"Directory already registered: {path}")
|
|
|
|
# Calculate storage info
|
|
total_size, available_space = self._calculate_storage_info(path)
|
|
|
|
# Set default values
|
|
if 'supported_formats' not in kwargs:
|
|
kwargs['supported_formats'] = list(self.video_formats.keys())
|
|
|
|
if 'exclude_patterns' not in kwargs:
|
|
kwargs['exclude_patterns'] = self.default_exclude_patterns.copy()
|
|
|
|
directory = VODDirectory(
|
|
name=name,
|
|
path=path,
|
|
total_size_gb=total_size,
|
|
available_space_gb=available_space,
|
|
**kwargs
|
|
)
|
|
|
|
self.db.add(directory)
|
|
self.db.commit()
|
|
self.db.refresh(directory)
|
|
|
|
logger.info(f"Created VOD directory: {name} at {path}")
|
|
return directory
|
|
|
|
except Exception as e:
|
|
self.db.rollback()
|
|
logger.error(f"Failed to create directory: {e}")
|
|
raise
|
|
|
|
def update_directory(self, directory_id: int, **updates) -> VODDirectory:
|
|
"""Update directory configuration with validation"""
|
|
try:
|
|
directory = self.db.query(VODDirectory).filter(
|
|
VODDirectory.id == directory_id
|
|
).first()
|
|
|
|
if not directory:
|
|
raise ValueError(f"Directory not found: {directory_id}")
|
|
|
|
# Validate path if being updated
|
|
if 'path' in updates:
|
|
new_path = os.path.abspath(os.path.expanduser(updates['path']))
|
|
if not os.path.exists(new_path) or not os.path.isdir(new_path):
|
|
raise ValueError(f"Invalid directory path: {new_path}")
|
|
updates['path'] = new_path
|
|
|
|
for key, value in updates.items():
|
|
if hasattr(directory, key):
|
|
setattr(directory, key, value)
|
|
|
|
directory.updated_at = datetime.utcnow()
|
|
self.db.commit()
|
|
self.db.refresh(directory)
|
|
|
|
return directory
|
|
|
|
except Exception as e:
|
|
self.db.rollback()
|
|
logger.error(f"Failed to update directory: {e}")
|
|
raise
|
|
|
|
def delete_directory(self, directory_id: int, remove_content: bool = False):
|
|
"""Delete a directory configuration and optionally its content"""
|
|
try:
|
|
directory = self.db.query(VODDirectory).filter(
|
|
VODDirectory.id == directory_id
|
|
).first()
|
|
|
|
if not directory:
|
|
raise ValueError(f"Directory not found: {directory_id}")
|
|
|
|
if remove_content:
|
|
# Remove all content from this directory
|
|
content_list = self.db.query(VODContent).filter(
|
|
VODContent.video_url.like(f"{directory.path}%")
|
|
).all()
|
|
|
|
for content in content_list:
|
|
# Also remove associated subtitles
|
|
self.db.query(VODSubtitle).filter(
|
|
VODSubtitle.content_id == content.id
|
|
).delete()
|
|
self.db.delete(content)
|
|
|
|
# Remove directory scans
|
|
self.db.query(VODDirectoryScan).filter(
|
|
VODDirectoryScan.directory_id == directory_id
|
|
).delete()
|
|
|
|
self.db.delete(directory)
|
|
self.db.commit()
|
|
|
|
logger.info(f"Deleted VOD directory: {directory.name}")
|
|
|
|
except Exception as e:
|
|
self.db.rollback()
|
|
logger.error(f"Failed to delete directory: {e}")
|
|
raise
|
|
|
|
def scan_directory(self, directory_id: int, force: bool = False,
|
|
deep_scan: bool = False) -> VODDirectoryScan:
|
|
"""Scan a directory for new video content with error recovery"""
|
|
try:
|
|
directory = self.db.query(VODDirectory).filter(
|
|
VODDirectory.id == directory_id,
|
|
VODDirectory.is_active == True
|
|
).first()
|
|
|
|
if not directory:
|
|
raise ValueError(f"Active directory not found: {directory_id}")
|
|
|
|
# Verify directory still exists
|
|
if not os.path.exists(directory.path):
|
|
directory.is_active = False
|
|
self.db.commit()
|
|
raise ValueError(f"Directory no longer exists: {directory.path}")
|
|
|
|
# Check if directory is currently being scanned
|
|
active_scan = self.db.query(VODDirectoryScan).filter(
|
|
VODDirectoryScan.directory_id == directory_id,
|
|
VODDirectoryScan.status == 'running'
|
|
).first()
|
|
|
|
if active_scan and not force:
|
|
# Check if scan is stale (running for more than 1 hour)
|
|
scan_age = datetime.utcnow() - active_scan.started_at
|
|
if scan_age.total_seconds() > 3600:
|
|
logger.warning(f"Stale scan detected for directory {directory_id}, marking as failed")
|
|
active_scan.status = 'failed'
|
|
active_scan.error_log = json.dumps({
|
|
'error': 'Scan timeout',
|
|
'timestamp': datetime.utcnow().isoformat()
|
|
})
|
|
self.db.commit()
|
|
else:
|
|
raise ValueError(f"Directory is currently being scanned")
|
|
|
|
# Create scan record
|
|
scan = VODDirectoryScan(
|
|
directory_id=directory_id,
|
|
scan_type='deep' if deep_scan else 'normal'
|
|
)
|
|
self.db.add(scan)
|
|
self.db.commit()
|
|
self.db.refresh(scan)
|
|
|
|
try:
|
|
self._perform_scan(directory, scan, deep_scan)
|
|
scan.status = 'completed'
|
|
scan.completed_at = datetime.utcnow()
|
|
scan.duration_seconds = int(
|
|
(scan.completed_at - scan.started_at).total_seconds()
|
|
)
|
|
|
|
except Exception as e:
|
|
scan.status = 'failed'
|
|
scan.error_log = json.dumps({
|
|
'error': str(e),
|
|
'timestamp': datetime.utcnow().isoformat()
|
|
})
|
|
logger.error(f"Directory scan failed: {e}")
|
|
raise
|
|
|
|
finally:
|
|
# Update directory info
|
|
directory.last_scan_at = datetime.utcnow()
|
|
directory.last_scan_duration_seconds = scan.duration_seconds
|
|
directory.content_count = self._count_directory_content(directory_id)
|
|
|
|
# Update storage info
|
|
total_size, available_space = self._calculate_storage_info(directory.path)
|
|
directory.total_size_gb = total_size
|
|
directory.available_space_gb = available_space
|
|
|
|
self.db.commit()
|
|
|
|
return scan
|
|
|
|
except Exception as e:
|
|
logger.error(f"Scan failed for directory {directory_id}: {e}")
|
|
raise
|
|
|
|
def _perform_scan(self, directory: VODDirectory, scan: VODDirectoryScan,
|
|
deep_scan: bool = False):
|
|
"""Perform the actual directory scan with enhanced error handling"""
|
|
processed_files = []
|
|
errors = []
|
|
batch_size = 50 # Process in batches to avoid memory issues
|
|
|
|
try:
|
|
# Get all video files
|
|
video_files = self._find_video_files(
|
|
directory.path,
|
|
directory.scan_recursive,
|
|
directory.supported_formats or list(self.video_formats.keys()),
|
|
directory.exclude_patterns or self.default_exclude_patterns
|
|
)
|
|
|
|
scan.files_found = len(video_files)
|
|
self.db.commit()
|
|
|
|
# Process files in batches
|
|
for i in range(0, len(video_files), batch_size):
|
|
batch = video_files[i:i + batch_size]
|
|
|
|
for file_path in batch:
|
|
try:
|
|
# Check if file still exists
|
|
if not os.path.exists(file_path):
|
|
logger.warning(f"File disappeared during scan: {file_path}")
|
|
scan.files_skipped += 1
|
|
continue
|
|
|
|
result = self._process_video_file(
|
|
file_path,
|
|
directory,
|
|
deep_scan=deep_scan
|
|
)
|
|
|
|
processed_files.append({
|
|
'file': file_path,
|
|
'result': result,
|
|
'timestamp': datetime.utcnow().isoformat()
|
|
})
|
|
|
|
if result == 'added':
|
|
scan.files_added += 1
|
|
elif result == 'updated':
|
|
scan.files_updated += 1
|
|
elif result == 'skipped':
|
|
scan.files_skipped += 1
|
|
|
|
scan.files_processed += 1
|
|
|
|
except Exception as e:
|
|
errors.append({
|
|
'file': file_path,
|
|
'error': str(e),
|
|
'timestamp': datetime.utcnow().isoformat()
|
|
})
|
|
scan.errors_count += 1
|
|
logger.warning(f"Failed to process {file_path}: {e}")
|
|
|
|
# Commit batch
|
|
self.db.commit()
|
|
|
|
scan.processed_files = processed_files[:1000] # Limit stored history
|
|
scan.error_log = json.dumps(errors) if errors else None
|
|
|
|
except Exception as e:
|
|
logger.error(f"Scan processing error: {e}")
|
|
raise
|
|
|
|
def _find_video_files(self, path: str, recursive: bool,
|
|
formats: List[str], exclude_patterns: List[str]) -> List[str]:
|
|
"""Find all video files in directory with error handling"""
|
|
video_files = []
|
|
|
|
try:
|
|
if recursive:
|
|
for root, dirs, files in os.walk(path, followlinks=False):
|
|
# Skip hidden directories and handle errors
|
|
try:
|
|
dirs[:] = [d for d in dirs if not d.startswith('.')]
|
|
|
|
for file in files:
|
|
try:
|
|
if self._should_include_file(file, formats, exclude_patterns):
|
|
full_path = os.path.join(root, file)
|
|
# Verify file is readable
|
|
if os.access(full_path, os.R_OK):
|
|
video_files.append(full_path)
|
|
except Exception as e:
|
|
logger.debug(f"Error checking file {file}: {e}")
|
|
continue
|
|
|
|
except PermissionError as e:
|
|
logger.warning(f"Permission denied for directory {root}: {e}")
|
|
continue
|
|
|
|
else:
|
|
try:
|
|
for file in os.listdir(path):
|
|
full_path = os.path.join(path, file)
|
|
if os.path.isfile(full_path) and \
|
|
os.access(full_path, os.R_OK) and \
|
|
self._should_include_file(file, formats, exclude_patterns):
|
|
video_files.append(full_path)
|
|
|
|
except PermissionError as e:
|
|
logger.error(f"Permission denied for directory {path}: {e}")
|
|
|
|
except Exception as e:
|
|
logger.error(f"Error finding video files: {e}")
|
|
|
|
return sorted(video_files)
|
|
|
|
def _should_include_file(self, filename: str, formats: List[str],
|
|
exclude_patterns: List[str]) -> bool:
|
|
"""Check if file should be included in scan"""
|
|
try:
|
|
# Check format
|
|
_, ext = os.path.splitext(filename.lower())
|
|
if ext not in formats:
|
|
return False
|
|
|
|
# Check exclude patterns
|
|
for pattern in exclude_patterns:
|
|
try:
|
|
if fnmatch.fnmatch(filename, pattern):
|
|
return False
|
|
except Exception:
|
|
continue
|
|
|
|
# Check for sample files (additional check)
|
|
if 'sample' in filename.lower():
|
|
return False
|
|
|
|
return True
|
|
|
|
except Exception as e:
|
|
logger.debug(f"Error checking file {filename}: {e}")
|
|
return False
|
|
|
|
def _process_video_file(self, file_path: str, directory: VODDirectory,
|
|
deep_scan: bool = False) -> str:
|
|
"""Process a single video file with enhanced metadata extraction"""
|
|
try:
|
|
# Normalize path
|
|
file_path = os.path.abspath(file_path)
|
|
|
|
# Check if content already exists
|
|
existing = self.db.query(VODContent).filter(
|
|
VODContent.video_url == file_path
|
|
).first()
|
|
|
|
if existing:
|
|
# Update file info if needed
|
|
try:
|
|
stat = os.stat(file_path)
|
|
file_size_mb = int(stat.st_size / (1024 * 1024))
|
|
|
|
if existing.file_size_mb != file_size_mb or deep_scan:
|
|
existing.file_size_mb = file_size_mb
|
|
existing.updated_at = datetime.utcnow()
|
|
|
|
# Deep scan: try to extract more metadata
|
|
if deep_scan:
|
|
metadata = self._extract_file_metadata(file_path, deep=True)
|
|
if metadata.get('duration_seconds'):
|
|
existing.duration_seconds = metadata['duration_seconds']
|
|
|
|
return 'updated'
|
|
|
|
except OSError as e:
|
|
logger.warning(f"Cannot stat file {file_path}: {e}")
|
|
|
|
return 'skipped'
|
|
|
|
# Extract metadata from filename and path
|
|
metadata = self._extract_file_metadata(file_path, deep=deep_scan)
|
|
|
|
# Create new content
|
|
content = VODContent(
|
|
title=metadata['title'],
|
|
description=metadata.get('description', ''),
|
|
content_type=metadata['content_type'],
|
|
status=ContentStatus.DRAFT, # Will be published after metadata enrichment
|
|
release_year=metadata.get('release_year'),
|
|
video_url=file_path,
|
|
file_size_mb=metadata['file_size_mb'],
|
|
duration_seconds=metadata.get('duration_seconds'),
|
|
language=metadata.get('language', 'en'),
|
|
series_id=metadata.get('series_id'),
|
|
season_number=metadata.get('season_number'),
|
|
episode_number=metadata.get('episode_number'),
|
|
slug=self._generate_slug(metadata['title']),
|
|
rental_type='free', # Default as requested
|
|
keywords=metadata.get('keywords', []),
|
|
video_quality=metadata.get('video_quality', 'SD'),
|
|
audio_languages=[metadata.get('language', 'en')]
|
|
)
|
|
|
|
self.db.add(content)
|
|
self.db.commit()
|
|
self.db.refresh(content)
|
|
|
|
# Process subtitles if found
|
|
self._process_subtitles(file_path, content.id)
|
|
|
|
return 'added'
|
|
|
|
except Exception as e:
|
|
logger.error(f"Error processing video file {file_path}: {e}")
|
|
raise
|
|
|
|
def _extract_file_metadata(self, file_path: str, deep: bool = False) -> Dict:
|
|
"""Extract metadata from filename and file properties with enhanced parsing"""
|
|
filename = os.path.basename(file_path)
|
|
name_without_ext = os.path.splitext(filename)[0]
|
|
|
|
# Get file stats
|
|
try:
|
|
stat = os.stat(file_path)
|
|
file_size_mb = int(stat.st_size / (1024 * 1024))
|
|
except OSError:
|
|
file_size_mb = 0
|
|
|
|
metadata = {
|
|
'title': self._clean_title(name_without_ext),
|
|
'file_size_mb': file_size_mb,
|
|
'content_type': self._detect_content_type(name_without_ext, file_path)
|
|
}
|
|
|
|
# Extract year
|
|
year_patterns = [
|
|
r'\b(19[5-9]\d|20[0-3]\d)\b', # 1950-2039
|
|
r'\((\d{4})\)', # Year in parentheses
|
|
r'\.(\d{4})\.' # Year with dots
|
|
]
|
|
|
|
for pattern in year_patterns:
|
|
match = re.search(pattern, name_without_ext)
|
|
if match:
|
|
year = int(match.group(1))
|
|
if 1950 <= year <= datetime.now().year + 1:
|
|
metadata['release_year'] = year
|
|
break
|
|
|
|
# Extract season/episode for TV series
|
|
episode_patterns = [
|
|
r'[Ss](\d+)[Ee](\d+)', # S01E01
|
|
r'(\d+)x(\d+)', # 1x01
|
|
r'Season\s*(\d+)\s*Episode\s*(\d+)', # Season 1 Episode 1
|
|
r'(\d+)\.(\d{2})', # 1.01
|
|
]
|
|
|
|
for pattern in episode_patterns:
|
|
match = re.search(pattern, name_without_ext)
|
|
if match:
|
|
metadata['season_number'] = int(match.group(1))
|
|
metadata['episode_number'] = int(match.group(2))
|
|
metadata['content_type'] = ContentType.EPISODE
|
|
break
|
|
|
|
# Extract language
|
|
language_patterns = {
|
|
'english': 'en', 'eng': 'en',
|
|
'spanish': 'es', 'spa': 'es', 'esp': 'es',
|
|
'french': 'fr', 'fra': 'fr', 'fre': 'fr',
|
|
'german': 'de', 'ger': 'de', 'deu': 'de',
|
|
'italian': 'it', 'ita': 'it',
|
|
'portuguese': 'pt', 'por': 'pt',
|
|
'russian': 'ru', 'rus': 'ru',
|
|
'japanese': 'ja', 'jpn': 'ja', 'jap': 'ja',
|
|
'chinese': 'zh', 'chi': 'zh',
|
|
'korean': 'ko', 'kor': 'ko'
|
|
}
|
|
|
|
filename_lower = name_without_ext.lower()
|
|
for lang_name, lang_code in language_patterns.items():
|
|
if lang_name in filename_lower:
|
|
metadata['language'] = lang_code
|
|
break
|
|
|
|
# Extract video quality
|
|
quality_patterns = {
|
|
'2160p': '4K', '4k': '4K', 'uhd': '4K',
|
|
'1080p': 'FHD', 'fullhd': 'FHD', 'fhd': 'FHD',
|
|
'720p': 'HD', 'hd': 'HD',
|
|
'480p': 'SD', 'dvd': 'SD',
|
|
'360p': 'LD', 'ld': 'LD'
|
|
}
|
|
|
|
for quality_pattern, quality_value in quality_patterns.items():
|
|
if quality_pattern in filename_lower:
|
|
metadata['video_quality'] = quality_value
|
|
break
|
|
|
|
# Extract keywords from filename
|
|
keywords = []
|
|
keyword_patterns = [
|
|
'directors.cut', 'extended', 'unrated', 'remastered',
|
|
'special.edition', 'imax', '3d', 'hdr', 'dolby',
|
|
'atmos', 'dts', 'bluray', 'webrip', 'dvdrip'
|
|
]
|
|
|
|
for keyword in keyword_patterns:
|
|
if keyword in filename_lower.replace(' ', '.'):
|
|
keywords.append(keyword.replace('.', ' '))
|
|
|
|
metadata['keywords'] = keywords
|
|
|
|
# Deep scan for duration (if requested and ffprobe available)
|
|
if deep:
|
|
duration = self._get_video_duration(file_path)
|
|
if duration:
|
|
metadata['duration_seconds'] = duration
|
|
|
|
return metadata
|
|
|
|
def _get_video_duration(self, file_path: str) -> Optional[int]:
|
|
"""Get video duration using ffprobe if available"""
|
|
try:
|
|
import subprocess
|
|
|
|
cmd = [
|
|
'ffprobe',
|
|
'-v', 'error',
|
|
'-show_entries', 'format=duration',
|
|
'-of', 'default=noprint_wrappers=1:nokey=1',
|
|
file_path
|
|
]
|
|
|
|
result = subprocess.run(
|
|
cmd,
|
|
capture_output=True,
|
|
text=True,
|
|
timeout=10
|
|
)
|
|
|
|
if result.returncode == 0:
|
|
duration = float(result.stdout.strip())
|
|
return int(duration)
|
|
|
|
except Exception as e:
|
|
logger.debug(f"Cannot get duration for {file_path}: {e}")
|
|
|
|
return None
|
|
|
|
def _clean_title(self, title: str) -> str:
|
|
"""Clean up title from filename with enhanced processing"""
|
|
# Remove file extensions that might be in the name
|
|
for ext in self.video_formats.keys():
|
|
title = title.replace(ext, '')
|
|
|
|
# Remove quality indicators
|
|
quality_terms = [
|
|
'2160p', '1080p', '720p', '480p', '360p',
|
|
'4k', 'uhd', 'fullhd', 'fhd', 'hd', 'sd',
|
|
'bluray', 'bdrip', 'brrip', 'webrip', 'webdl',
|
|
'dvdrip', 'dvdscr', 'hdtv', 'cam', 'ts'
|
|
]
|
|
|
|
for term in quality_terms:
|
|
title = re.sub(rf'\b{term}\b', '', title, flags=re.IGNORECASE)
|
|
|
|
# Remove codec info
|
|
codec_terms = [
|
|
'x264', 'x265', 'h264', 'h265', 'hevc',
|
|
'xvid', 'divx', 'avc', 'av1', 'vp9'
|
|
]
|
|
|
|
for term in codec_terms:
|
|
title = re.sub(rf'\b{term}\b', '', title, flags=re.IGNORECASE)
|
|
|
|
# Remove audio format info
|
|
audio_terms = [
|
|
'dts', 'ac3', 'aac', 'mp3', 'flac',
|
|
'dolby', 'atmos', 'truehd', '5.1', '7.1'
|
|
]
|
|
|
|
for term in audio_terms:
|
|
title = re.sub(rf'\b{term}\b', '', title, flags=re.IGNORECASE)
|
|
|
|
# Remove release groups and tags in brackets/parentheses
|
|
title = re.sub(r'\[.*?\]', '', title)
|
|
title = re.sub(r'\(.*?\)', '', title)
|
|
title = re.sub(r'\{.*?\}', '', title)
|
|
|
|
# Remove year if at the end
|
|
title = re.sub(r'\b(19|20)\d{2}$', '', title)
|
|
|
|
# Replace separators with spaces
|
|
title = re.sub(r'[._-]+', ' ', title)
|
|
|
|
# Clean up multiple spaces
|
|
title = re.sub(r'\s+', ' ', title.strip())
|
|
|
|
# Capitalize properly
|
|
return title.title()
|
|
|
|
def _detect_content_type(self, filename: str, file_path: str) -> ContentType:
|
|
"""Detect content type from filename patterns with enhanced detection"""
|
|
filename_lower = filename.lower()
|
|
path_lower = file_path.lower()
|
|
|
|
# Check for series patterns
|
|
series_indicators = [
|
|
r'[Ss]\d+[Ee]\d+', # S01E01
|
|
r'\d+x\d+', # 1x01
|
|
r'season\s*\d+', # season 1
|
|
r'episode\s*\d+', # episode 1
|
|
'episodes',
|
|
'series'
|
|
]
|
|
|
|
for indicator in series_indicators:
|
|
if re.search(indicator, filename_lower) or re.search(indicator, path_lower):
|
|
return ContentType.EPISODE
|
|
|
|
# Check directory structure for series
|
|
path_parts = file_path.split(os.sep)
|
|
for part in path_parts:
|
|
part_lower = part.lower()
|
|
if any(term in part_lower for term in ['season', 'series', 'episodes', 'tv shows']):
|
|
return ContentType.EPISODE
|
|
|
|
# Check for documentary keywords
|
|
doc_keywords = [
|
|
'documentary', 'documental', 'docu',
|
|
'making.of', 'behind.the.scenes', 'bts',
|
|
'nat.geo', 'national.geographic', 'discovery',
|
|
'bbc.earth', 'planet.earth'
|
|
]
|
|
|
|
for keyword in doc_keywords:
|
|
if keyword in filename_lower or keyword in path_lower:
|
|
return ContentType.DOCUMENTARY
|
|
|
|
# Default to movie
|
|
return ContentType.MOVIE
|
|
|
|
def _generate_slug(self, title: str) -> str:
|
|
"""Generate URL-friendly slug from title"""
|
|
slug = title.lower()
|
|
|
|
# Remove special characters
|
|
slug = re.sub(r'[^\w\s-]', '', slug)
|
|
|
|
# Replace spaces with hyphens
|
|
slug = re.sub(r'[-\s]+', '-', slug)
|
|
|
|
# Remove leading/trailing hyphens
|
|
slug = slug.strip('-')
|
|
|
|
# Ensure slug is not empty
|
|
if not slug:
|
|
slug = 'untitled'
|
|
|
|
# Add unique suffix to avoid conflicts
|
|
slug = f"{slug}-{uuid.uuid4().hex[:8]}"
|
|
|
|
return slug
|
|
|
|
def _process_subtitles(self, video_path: str, content_id: int):
|
|
"""Process subtitle files for a video with enhanced matching"""
|
|
try:
|
|
video_dir = os.path.dirname(video_path)
|
|
video_name = os.path.splitext(os.path.basename(video_path))[0]
|
|
|
|
# Look for subtitle files
|
|
for file in os.listdir(video_dir):
|
|
try:
|
|
file_lower = file.lower()
|
|
name_part = os.path.splitext(file)[0]
|
|
ext = os.path.splitext(file)[1]
|
|
|
|
# Check if it's a subtitle file
|
|
if ext not in self.subtitle_formats:
|
|
continue
|
|
|
|
# Check if subtitle matches video (flexible matching)
|
|
name_part_lower = name_part.lower()
|
|
video_name_lower = video_name.lower()
|
|
|
|
# Various matching strategies
|
|
matches = False
|
|
|
|
# Exact match
|
|
if name_part_lower == video_name_lower:
|
|
matches = True
|
|
# Starts with video name
|
|
elif name_part_lower.startswith(video_name_lower):
|
|
matches = True
|
|
# Video name without year matches
|
|
else:
|
|
video_name_no_year = re.sub(r'\b(19|20)\d{2}\b', '', video_name_lower).strip()
|
|
if video_name_no_year and name_part_lower.startswith(video_name_no_year):
|
|
matches = True
|
|
|
|
if not matches:
|
|
continue
|
|
|
|
subtitle_path = os.path.join(video_dir, file)
|
|
|
|
# Skip if not readable
|
|
if not os.access(subtitle_path, os.R_OK):
|
|
continue
|
|
|
|
# Extract language from filename
|
|
language = self._extract_subtitle_language(name_part)
|
|
|
|
# Check if subtitle already exists
|
|
existing_sub = self.db.query(VODSubtitle).filter(
|
|
VODSubtitle.content_id == content_id,
|
|
VODSubtitle.file_url == subtitle_path
|
|
).first()
|
|
|
|
if existing_sub:
|
|
continue
|
|
|
|
# Create subtitle record
|
|
subtitle = VODSubtitle(
|
|
content_id=content_id,
|
|
language=language['code'],
|
|
language_name=language['name'],
|
|
format=ext[1:], # Remove dot
|
|
file_url=subtitle_path,
|
|
file_size_bytes=os.path.getsize(subtitle_path),
|
|
source_type='auto-detected'
|
|
)
|
|
|
|
self.db.add(subtitle)
|
|
|
|
except Exception as e:
|
|
logger.debug(f"Error processing subtitle {file}: {e}")
|
|
continue
|
|
|
|
self.db.commit()
|
|
|
|
except Exception as e:
|
|
logger.warning(f"Error processing subtitles for {video_path}: {e}")
|
|
|
|
def _extract_subtitle_language(self, filename: str) -> Dict[str, str]:
|
|
"""Extract language from subtitle filename with enhanced detection"""
|
|
# Extended language mappings
|
|
lang_map = {
|
|
'en': {'code': 'en', 'name': 'English'},
|
|
'eng': {'code': 'en', 'name': 'English'},
|
|
'english': {'code': 'en', 'name': 'English'},
|
|
'es': {'code': 'es', 'name': 'Spanish'},
|
|
'spa': {'code': 'es', 'name': 'Spanish'},
|
|
'spanish': {'code': 'es', 'name': 'Spanish'},
|
|
'fr': {'code': 'fr', 'name': 'French'},
|
|
'fra': {'code': 'fr', 'name': 'French'},
|
|
'french': {'code': 'fr', 'name': 'French'},
|
|
'de': {'code': 'de', 'name': 'German'},
|
|
'ger': {'code': 'de', 'name': 'German'},
|
|
'german': {'code': 'de', 'name': 'German'},
|
|
'it': {'code': 'it', 'name': 'Italian'},
|
|
'ita': {'code': 'it', 'name': 'Italian'},
|
|
'italian': {'code': 'it', 'name': 'Italian'},
|
|
'pt': {'code': 'pt', 'name': 'Portuguese'},
|
|
'por': {'code': 'pt', 'name': 'Portuguese'},
|
|
'portuguese': {'code': 'pt', 'name': 'Portuguese'},
|
|
'ru': {'code': 'ru', 'name': 'Russian'},
|
|
'rus': {'code': 'ru', 'name': 'Russian'},
|
|
'russian': {'code': 'ru', 'name': 'Russian'},
|
|
'ja': {'code': 'ja', 'name': 'Japanese'},
|
|
'jpn': {'code': 'ja', 'name': 'Japanese'},
|
|
'japanese': {'code': 'ja', 'name': 'Japanese'},
|
|
'zh': {'code': 'zh', 'name': 'Chinese'},
|
|
'chi': {'code': 'zh', 'name': 'Chinese'},
|
|
'chinese': {'code': 'zh', 'name': 'Chinese'},
|
|
'ko': {'code': 'ko', 'name': 'Korean'},
|
|
'kor': {'code': 'ko', 'name': 'Korean'},
|
|
'korean': {'code': 'ko', 'name': 'Korean'},
|
|
'ar': {'code': 'ar', 'name': 'Arabic'},
|
|
'ara': {'code': 'ar', 'name': 'Arabic'},
|
|
'arabic': {'code': 'ar', 'name': 'Arabic'}
|
|
}
|
|
|
|
filename_lower = filename.lower()
|
|
|
|
# Look for language codes in filename
|
|
for key, lang_info in lang_map.items():
|
|
# Check for exact match with word boundaries
|
|
if re.search(rf'\b{key}\b', filename_lower):
|
|
return lang_info
|
|
|
|
# Default to English
|
|
return {'code': 'en', 'name': 'English'}
|
|
|
|
def _calculate_storage_info(self, path: str) -> Tuple[float, float]:
|
|
"""Calculate storage information for directory"""
|
|
try:
|
|
total, used, free = shutil.disk_usage(path)
|
|
|
|
# Convert to GB
|
|
total_gb = total / (1024 ** 3)
|
|
free_gb = free / (1024 ** 3)
|
|
|
|
return round(total_gb, 2), round(free_gb, 2)
|
|
|
|
except Exception as e:
|
|
logger.warning(f"Cannot calculate storage for {path}: {e}")
|
|
return 0.0, 0.0
|
|
|
|
def _count_directory_content(self, directory_id: int) -> int:
|
|
"""Count content items for a directory"""
|
|
try:
|
|
directory = self.db.query(VODDirectory).filter(
|
|
VODDirectory.id == directory_id
|
|
).first()
|
|
|
|
if not directory:
|
|
return 0
|
|
|
|
return self.db.query(VODContent).filter(
|
|
VODContent.video_url.like(f"{directory.path}%")
|
|
).count()
|
|
|
|
except Exception:
|
|
return 0
|
|
|
|
def get_directory_status(self, directory_id: int) -> Dict:
|
|
"""Get comprehensive status of a directory"""
|
|
try:
|
|
directory = self.db.query(VODDirectory).filter(
|
|
VODDirectory.id == directory_id
|
|
).first()
|
|
|
|
if not directory:
|
|
raise ValueError(f"Directory not found: {directory_id}")
|
|
|
|
# Get latest scan
|
|
latest_scan = self.db.query(VODDirectoryScan).filter(
|
|
VODDirectoryScan.directory_id == directory_id
|
|
).order_by(VODDirectoryScan.started_at.desc()).first()
|
|
|
|
# Get content count
|
|
content_count = self._count_directory_content(directory_id)
|
|
|
|
# Update storage info
|
|
total_size, available_space = self._calculate_storage_info(directory.path)
|
|
|
|
return {
|
|
'directory': {
|
|
'id': directory.id,
|
|
'name': directory.name,
|
|
'path': directory.path,
|
|
'exists': os.path.exists(directory.path),
|
|
'is_active': directory.is_active,
|
|
'auto_scan': directory.auto_scan,
|
|
'scan_interval_minutes': directory.scan_interval_minutes,
|
|
'content_count': content_count,
|
|
'total_size_gb': total_size,
|
|
'available_space_gb': available_space,
|
|
'last_scan_at': directory.last_scan_at,
|
|
'created_at': directory.created_at
|
|
},
|
|
'latest_scan': {
|
|
'id': latest_scan.id,
|
|
'status': latest_scan.status,
|
|
'files_found': latest_scan.files_found,
|
|
'files_processed': latest_scan.files_processed,
|
|
'files_added': latest_scan.files_added,
|
|
'files_updated': latest_scan.files_updated,
|
|
'files_skipped': latest_scan.files_skipped,
|
|
'errors_count': latest_scan.errors_count,
|
|
'started_at': latest_scan.started_at,
|
|
'completed_at': latest_scan.completed_at,
|
|
'duration_seconds': latest_scan.duration_seconds
|
|
} if latest_scan else None
|
|
}
|
|
|
|
except Exception as e:
|
|
logger.error(f"Error getting directory status: {e}")
|
|
raise
|
|
|
|
def schedule_auto_scans(self) -> List[int]:
|
|
"""Schedule automatic scans for directories that need them"""
|
|
try:
|
|
now = datetime.utcnow()
|
|
directories_to_scan = []
|
|
|
|
directories = self.db.query(VODDirectory).filter(
|
|
VODDirectory.is_active == True,
|
|
VODDirectory.auto_scan == True
|
|
).all()
|
|
|
|
for directory in directories:
|
|
# Skip if directory doesn't exist
|
|
if not os.path.exists(directory.path):
|
|
logger.warning(f"Auto-scan skipped: Directory not found {directory.path}")
|
|
directory.is_active = False
|
|
continue
|
|
|
|
should_scan = False
|
|
|
|
if not directory.last_scan_at:
|
|
# Never scanned before
|
|
should_scan = True
|
|
else:
|
|
# Check if it's time for next scan
|
|
next_scan_time = directory.last_scan_at + \
|
|
timedelta(minutes=directory.scan_interval_minutes)
|
|
if now >= next_scan_time:
|
|
should_scan = True
|
|
|
|
if should_scan:
|
|
# Check if not currently scanning
|
|
active_scan = self.db.query(VODDirectoryScan).filter(
|
|
VODDirectoryScan.directory_id == directory.id,
|
|
VODDirectoryScan.status == 'running'
|
|
).first()
|
|
|
|
if not active_scan:
|
|
directories_to_scan.append(directory.id)
|
|
|
|
self.db.commit()
|
|
return directories_to_scan
|
|
|
|
except Exception as e:
|
|
logger.error(f"Error scheduling auto scans: {e}")
|
|
return [] |