From ffe847fb5eb24fb01b8fc7d39e2e5d700774a115 Mon Sep 17 00:00:00 2001 From: LordBaryhobal Date: Mon, 5 May 2025 00:53:31 +0200 Subject: [PATCH] refactor: extract launch script + adapt metadata extractor --- Dockerfile | 2 +- __init__.py | 0 scripts/__init__.py | 0 scripts/extract_metadata.py | 47 +++++++++ scripts/server.py | 71 ++++++++++++++ src/metadata_extractor.py | 185 ++++++++++++++++++++++++++++++++++++ src/server.py | 80 ++-------------- 7 files changed, 311 insertions(+), 74 deletions(-) create mode 100644 __init__.py create mode 100644 scripts/__init__.py create mode 100755 scripts/extract_metadata.py create mode 100755 scripts/server.py create mode 100644 src/metadata_extractor.py mode change 100755 => 100644 src/server.py diff --git a/Dockerfile b/Dockerfile index df1defb..04a42ca 100644 --- a/Dockerfile +++ b/Dockerfile @@ -26,4 +26,4 @@ COPY . . EXPOSE 8000 -CMD ["python", "src/server.py"] \ No newline at end of file +CMD ["python", "-m", "scripts.server"] \ No newline at end of file diff --git a/__init__.py b/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/scripts/__init__.py b/scripts/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/scripts/extract_metadata.py b/scripts/extract_metadata.py new file mode 100755 index 0000000..b48c6c7 --- /dev/null +++ b/scripts/extract_metadata.py @@ -0,0 +1,47 @@ +#!/usr/bin/env python3 + +import argparse +import logging +import os +import sys + +from src.metadata_extractor import MetadataExtractor + + +def main(): + logging.basicConfig( + level=logging.INFO, + format="[%(levelname)s] %(message)s" + ) + + parser = argparse.ArgumentParser( + description="Extract metadata from video files and save as JSON" + ) + parser.add_argument( + "input", + help="Path to input video file or directory" + ) + parser.add_argument( + "-o", "--output", + help="Directory path where the output JSON files will be saved" + ) + args = parser.parse_args() + + input_path = args.input + output_dir = args.output + + extractor: MetadataExtractor = MetadataExtractor() + + success = False + if os.path.isfile(input_path): + success = extractor.process_file(input_path, output_dir) + elif os.path.isdir(input_path): + success = extractor.process_directory(input_path, output_dir) + else: + logging.error(f"Path not found: {input_path}") + + if not success: + sys.exit(1) + +if __name__ == "__main__": + main() diff --git a/scripts/server.py b/scripts/server.py new file mode 100755 index 0000000..95dfe54 --- /dev/null +++ b/scripts/server.py @@ -0,0 +1,71 @@ +#!/usr/bin/env python3 + +import argparse +import logging + +from src.env_default import EnvDefault +from src.server import MeliesServer + + +def main(): + logging.basicConfig( + level=logging.INFO, + format="%(asctime)s [%(levelname)s] %(message)s", + datefmt=r"%Y-%m-%d %H:%M:%S" + ) + + parser = argparse.ArgumentParser( + description="Starts the Melies server", + formatter_class=argparse.RawTextHelpFormatter + ) + parser.add_argument( + "-p", "--port", + action=EnvDefault, + envvar="MELIES_PORT", + default=8000, + type=int, + help="Port on which the server listens" + ) + parser.add_argument( + "--max-payload-size", + action=EnvDefault, + envvar="MELIES_MAX_PAYLOAD_SIZE", + default=1e6, + type=int, + help="Maximum POST payload size in bytes that the server accepts" + ) + parser.add_argument( + "--to-convert-dir", + action=EnvDefault, + envvar="MELIES_TO_CONVERT_DIR", + default="to_convert", + help="Path to the directory containing medias to convert" + ) + parser.add_argument( + "--converted-dir", + action=EnvDefault, + envvar="MELIES_CONVERTED_DIR", + default="converted", + help="Path to the directory containing converted medias" + ) + parser.add_argument( + "--metadata-dir", + action=EnvDefault, + envvar="MELIES_METADATA_DIR", + default="metadata", + help="Path to the directory containing metadata files" + ) + args = parser.parse_args() + + server = MeliesServer( + args.port, + args.to_convert_dir, + args.converted_dir, + args.metadata_dir, + args.max_payload_size + ) + server.start() + + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/src/metadata_extractor.py b/src/metadata_extractor.py new file mode 100644 index 0000000..cf45d22 --- /dev/null +++ b/src/metadata_extractor.py @@ -0,0 +1,185 @@ +import json +import logging +import os +import subprocess +from typing import Optional + + +class MetadataExtractor: + SUPPORTED_EXTENSIONS = (".mp4", ".mkv", ".mov", ".avi") + + def __init__(self): + self.logger: logging.Logger = logging.getLogger("MetadataExtractor") + + def analyze_file(self, path: str) -> Optional[dict]: + """ + Extracts metadata from a video file using ffprobe + + :param path: Path to the video file + :return: Metadata information or ``None`` if an error occurred + """ + + # Get general file info in JSON format + cmd: list[str] = [ + "ffprobe", + "-v", "quiet", + "-print_format", "json", + "-show_format", + "-show_streams", + path + ] + + try: + result = subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True) + if result.returncode != 0: + self.logger.error(f"Error processing {path}: {result.stderr}") + return None + + data: dict = json.loads(result.stdout) + + # Extract filename and title + filename: str = os.path.basename(path) + title: str = data.get("format", {}).get("tags", {}).get("title", filename) + + # Initialize metadata structure + metadata: dict = { + "filename": filename, + "title": title, + "audio_tracks": [], + "subtitle_tracks": [] + } + + # Process streams + for stream in data.get("streams", []): + codec_type = stream.get("codec_type") + tags = stream.get("tags", {}) + disposition = stream.get("disposition", {}) + track = { + "index": stream.get("index"), + "language": tags.get("language", "und"), + "name": tags.get("title", ""), + "flags": { + "default": disposition.get("default", 0) == 1, + "original": disposition.get("original", 0) == 1, + "commentary": disposition.get("commentary", 0) == 1 + } + } + + if codec_type == "audio": + track |= { + "channels": stream.get("channels", 0) + } + track["flags"] |= { + "visual_impaired": disposition.get("visual_impaired", 0) == 1 + } + metadata["audio_tracks"].append(track) + + elif codec_type == "subtitle": + track["flags"] |= { + "forced": disposition.get("forced", 0) == 1, + "hearing_impaired": disposition.get("hearing_impaired", 0) == 1 + } + metadata["subtitle_tracks"].append(track) + + else: + self.logger.warning(f"Unknown track codec type '{codec_type}'") + + return metadata + + except Exception as e: + self.logger.error(f"Error processing {path}: {str(e)}") + return None + + def process_file(self, file_path: str, output_dir: str) -> bool: + """ + Processes a single video file and writes metadata to a JSON file + + :param file_path: Path of the video file + :param output_dir: Path of the directory where the output JSON file will be saved + :return: True if successful, False otherwise + """ + + if not os.path.isfile(file_path): + self.logger.error(f"File not found: {file_path}") + return False + + if not file_path.lower().endswith(self.SUPPORTED_EXTENSIONS): + self.logger.error(f"Unsupported file format: {file_path}") + return False + + self.logger.debug(f"Extracting metadata from {os.path.basename(file_path)}") + metadata: Optional[dict] = self.analyze_file(file_path) + + if metadata: + # Generate output filename based on input file + filename = os.path.basename(os.path.splitext(file_path)[0]) + "_metadata.json" + + if output_dir: + # Ensure output directory exists + os.makedirs(output_dir, exist_ok=True) + output_path = os.path.join(output_dir, filename) + else: + # If no output directory specified, save in the same directory as the input file + base_name = os.path.splitext(file_path)[0] + output_path = f"{base_name}_metadata.json" + + # Write metadata to JSON file + with open(output_path, "w", encoding="utf-8") as f: + json.dump(metadata, f, indent=2, ensure_ascii=False) + + self.logger.debug(f"Metadata saved to {output_path}") + return True + + return False + + def process_directory(self, directory_path: str, output_dir: Optional[str] = None) -> bool: + """ + Processes all video files in a directory and writes metadata to a JSON file + + :param directory_path: Path of the directory + :param output_dir: Path of the directory where the output JSON file will be saved + :return: True if successful, False otherwise + """ + + if not os.path.isdir(directory_path): + self.logger.error(f"Directory not found: {directory_path}") + return False + + all_metadata: dict[str, dict] = {} + file_count: int = 0 + + for root, _, files in os.walk(directory_path): + for file in files: + if file.lower().endswith(self.SUPPORTED_EXTENSIONS): + file_path: str = os.path.join(root, file) + self.logger.debug(f"Extracting metadata from {file}") + metadata: Optional[dict] = self.analyze_file(file_path) + + if metadata: + # Use relative path as key + rel_path: str = os.path.relpath(file_path, directory_path) + all_metadata[rel_path] = metadata + file_count += 1 + + if file_count == 0: + self.logger.error(f"No supported video files found in {directory_path}") + return False + + # Generate output filename based on directory name + dir_name: str = os.path.basename(os.path.normpath(directory_path)) + filename: str = f"{dir_name}_metadata.json" + + if output_dir is not None: + # Ensure output directory exists + os.makedirs(output_dir, exist_ok=True) + output_path = os.path.join(output_dir, filename) + else: + # If no output directory specified, save in the current directory + output_path = filename + + # Write all metadata to a single JSON file + with open(output_path, "w", encoding="utf-8") as f: + json.dump(all_metadata, f, indent=2, ensure_ascii=False) + + self.logger.debug(f"Metadata for {file_count} files saved to {output_path}") + return True \ No newline at end of file diff --git a/src/server.py b/src/server.py old mode 100755 new mode 100644 index ecf00a4..59a2b44 --- a/src/server.py +++ b/src/server.py @@ -1,8 +1,5 @@ -#!/usr/bin/env python3 - from __future__ import annotations -import argparse import json import logging import os @@ -11,6 +8,7 @@ import time from functools import partial from http import HTTPStatus from http.server import SimpleHTTPRequestHandler +from logging import Logger from typing import Optional from urllib.parse import parse_qs, unquote, urlparse @@ -19,7 +17,6 @@ from watchdog.events import (FileClosedEvent, FileDeletedEvent, FileMovedEvent, from watchdog.observers import Observer from watchdog.observers.api import BaseObserver -from src.env_default import EnvDefault from src.file_handlers import ToConvertFileHandler, MetadataFileHandler @@ -39,7 +36,7 @@ class HTTPHandler(SimpleHTTPRequestHandler): self.data: Optional[dict|list] = None def log_message(self, format, *args): - logging.info("%s - %s" % ( + self.server_.logger.info("%s - %s" % ( self.client_address[0], format % args )) @@ -131,6 +128,7 @@ class MeliesServer(FileSystemEventHandler): max_payload_size: int): super().__init__() + self.logger: Logger = logging.getLogger("MeliesServer") self.port: int = port self.to_convert_dir: str = to_convert_dir @@ -148,12 +146,6 @@ class MeliesServer(FileSystemEventHandler): self.to_convert_files: ToConvertFileHandler = ToConvertFileHandler(self.to_convert_dir) self.metadata_files: MetadataFileHandler = MetadataFileHandler(self.metadata_dir) - logging.basicConfig( - level=logging.INFO, - format="%(asctime)s [%(levelname)s] %(message)s", - datefmt=r"%Y-%m-%d %H:%M:%S" - ) - self.httpd: Optional[socketserver.TCPServer] = None self.observer: BaseObserver = Observer() self.observer.schedule( @@ -170,7 +162,7 @@ class MeliesServer(FileSystemEventHandler): self.observer.start() try: with socketserver.TCPServer(("", self.port), self.http_handler_cls) as self.httpd: - logging.info(f"Serving on port {self.port}") + self.logger.info(f"Serving on port {self.port}") self.httpd.serve_forever() except KeyboardInterrupt: pass @@ -181,17 +173,17 @@ class MeliesServer(FileSystemEventHandler): self.observer.join() def on_deleted(self, event: FileDeletedEvent): - logging.info(f"Converted media deleted: {event.src_path}") + self.logger.info(f"Converted media deleted: {event.src_path}") self.delete_metadata(event.src_path) return super().on_deleted(event) def on_moved(self, event: FileMovedEvent): - logging.info(f"Converted media moved: {event.src_path} -> {event.dest_path}") + self.logger.info(f"Converted media moved: {event.src_path} -> {event.dest_path}") self.rename_metadata(event.src_path, event.dest_path) return super().on_moved(event) def on_closed(self, event: FileClosedEvent): - logging.info(f"Converted media created or modified: {event.src_path}") + self.logger.info(f"Converted media created or modified: {event.src_path}") self.extract_metadata(event.src_path) return super().on_closed(event) @@ -203,61 +195,3 @@ class MeliesServer(FileSystemEventHandler): def delete_metadata(self, path: str): pass - - -def main(): - parser = argparse.ArgumentParser( - description="Starts the Melies server", - formatter_class=argparse.RawTextHelpFormatter - ) - parser.add_argument( - "-p", "--port", - action=EnvDefault, - envvar="MELIES_PORT", - default=8000, - type=int, - help="Port on which the server listens" - ) - parser.add_argument( - "--max-payload-size", - action=EnvDefault, - envvar="MELIES_MAX_PAYLOAD_SIZE", - default=1e6, - type=int, - help="Maximum POST payload size in bytes that the server accepts" - ) - parser.add_argument( - "--to-convert-dir", - action=EnvDefault, - envvar="MELIES_TO_CONVERT_DIR", - default="to_convert", - help="Path to the directory containing medias to convert" - ) - parser.add_argument( - "--converted-dir", - action=EnvDefault, - envvar="MELIES_CONVERTED_DIR", - default="converted", - help="Path to the directory containing converted medias" - ) - parser.add_argument( - "--metadata-dir", - action=EnvDefault, - envvar="MELIES_METADATA_DIR", - default="metadata", - help="Path to the directory containing metadata files" - ) - args = parser.parse_args() - - server = MeliesServer( - args.port, - args.to_convert_dir, - args.converted_dir, - args.metadata_dir, - args.max_payload_size - ) - server.start() - - -if __name__ == "__main__": - main()