refactor: extract launch script + adapt metadata extractor

This commit is contained in:
Louis Heredero 2025-05-05 00:53:31 +02:00
parent 775d3da6ed
commit ffe847fb5e
Signed by: HEL
GPG Key ID: 8D83DE470F8544E7
7 changed files with 311 additions and 74 deletions

View File

@ -26,4 +26,4 @@ COPY . .
EXPOSE 8000 EXPOSE 8000
CMD ["python", "src/server.py"] CMD ["python", "-m", "scripts.server"]

0
__init__.py Normal file
View File

0
scripts/__init__.py Normal file
View File

47
scripts/extract_metadata.py Executable file
View File

@ -0,0 +1,47 @@
#!/usr/bin/env python3
import argparse
import logging
import os
import sys
from src.metadata_extractor import MetadataExtractor
def main():
logging.basicConfig(
level=logging.INFO,
format="[%(levelname)s] %(message)s"
)
parser = argparse.ArgumentParser(
description="Extract metadata from video files and save as JSON"
)
parser.add_argument(
"input",
help="Path to input video file or directory"
)
parser.add_argument(
"-o", "--output",
help="Directory path where the output JSON files will be saved"
)
args = parser.parse_args()
input_path = args.input
output_dir = args.output
extractor: MetadataExtractor = MetadataExtractor()
success = False
if os.path.isfile(input_path):
success = extractor.process_file(input_path, output_dir)
elif os.path.isdir(input_path):
success = extractor.process_directory(input_path, output_dir)
else:
logging.error(f"Path not found: {input_path}")
if not success:
sys.exit(1)
if __name__ == "__main__":
main()

71
scripts/server.py Executable file
View File

@ -0,0 +1,71 @@
#!/usr/bin/env python3
import argparse
import logging
from src.env_default import EnvDefault
from src.server import MeliesServer
def main():
logging.basicConfig(
level=logging.INFO,
format="%(asctime)s [%(levelname)s] %(message)s",
datefmt=r"%Y-%m-%d %H:%M:%S"
)
parser = argparse.ArgumentParser(
description="Starts the Melies server",
formatter_class=argparse.RawTextHelpFormatter
)
parser.add_argument(
"-p", "--port",
action=EnvDefault,
envvar="MELIES_PORT",
default=8000,
type=int,
help="Port on which the server listens"
)
parser.add_argument(
"--max-payload-size",
action=EnvDefault,
envvar="MELIES_MAX_PAYLOAD_SIZE",
default=1e6,
type=int,
help="Maximum POST payload size in bytes that the server accepts"
)
parser.add_argument(
"--to-convert-dir",
action=EnvDefault,
envvar="MELIES_TO_CONVERT_DIR",
default="to_convert",
help="Path to the directory containing medias to convert"
)
parser.add_argument(
"--converted-dir",
action=EnvDefault,
envvar="MELIES_CONVERTED_DIR",
default="converted",
help="Path to the directory containing converted medias"
)
parser.add_argument(
"--metadata-dir",
action=EnvDefault,
envvar="MELIES_METADATA_DIR",
default="metadata",
help="Path to the directory containing metadata files"
)
args = parser.parse_args()
server = MeliesServer(
args.port,
args.to_convert_dir,
args.converted_dir,
args.metadata_dir,
args.max_payload_size
)
server.start()
if __name__ == "__main__":
main()

185
src/metadata_extractor.py Normal file
View File

@ -0,0 +1,185 @@
import json
import logging
import os
import subprocess
from typing import Optional
class MetadataExtractor:
SUPPORTED_EXTENSIONS = (".mp4", ".mkv", ".mov", ".avi")
def __init__(self):
self.logger: logging.Logger = logging.getLogger("MetadataExtractor")
def analyze_file(self, path: str) -> Optional[dict]:
"""
Extracts metadata from a video file using ffprobe
:param path: Path to the video file
:return: Metadata information or ``None`` if an error occurred
"""
# Get general file info in JSON format
cmd: list[str] = [
"ffprobe",
"-v", "quiet",
"-print_format", "json",
"-show_format",
"-show_streams",
path
]
try:
result = subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)
if result.returncode != 0:
self.logger.error(f"Error processing {path}: {result.stderr}")
return None
data: dict = json.loads(result.stdout)
# Extract filename and title
filename: str = os.path.basename(path)
title: str = data.get("format", {}).get("tags", {}).get("title", filename)
# Initialize metadata structure
metadata: dict = {
"filename": filename,
"title": title,
"audio_tracks": [],
"subtitle_tracks": []
}
# Process streams
for stream in data.get("streams", []):
codec_type = stream.get("codec_type")
tags = stream.get("tags", {})
disposition = stream.get("disposition", {})
track = {
"index": stream.get("index"),
"language": tags.get("language", "und"),
"name": tags.get("title", ""),
"flags": {
"default": disposition.get("default", 0) == 1,
"original": disposition.get("original", 0) == 1,
"commentary": disposition.get("commentary", 0) == 1
}
}
if codec_type == "audio":
track |= {
"channels": stream.get("channels", 0)
}
track["flags"] |= {
"visual_impaired": disposition.get("visual_impaired", 0) == 1
}
metadata["audio_tracks"].append(track)
elif codec_type == "subtitle":
track["flags"] |= {
"forced": disposition.get("forced", 0) == 1,
"hearing_impaired": disposition.get("hearing_impaired", 0) == 1
}
metadata["subtitle_tracks"].append(track)
else:
self.logger.warning(f"Unknown track codec type '{codec_type}'")
return metadata
except Exception as e:
self.logger.error(f"Error processing {path}: {str(e)}")
return None
def process_file(self, file_path: str, output_dir: str) -> bool:
"""
Processes a single video file and writes metadata to a JSON file
:param file_path: Path of the video file
:param output_dir: Path of the directory where the output JSON file will be saved
:return: True if successful, False otherwise
"""
if not os.path.isfile(file_path):
self.logger.error(f"File not found: {file_path}")
return False
if not file_path.lower().endswith(self.SUPPORTED_EXTENSIONS):
self.logger.error(f"Unsupported file format: {file_path}")
return False
self.logger.debug(f"Extracting metadata from {os.path.basename(file_path)}")
metadata: Optional[dict] = self.analyze_file(file_path)
if metadata:
# Generate output filename based on input file
filename = os.path.basename(os.path.splitext(file_path)[0]) + "_metadata.json"
if output_dir:
# Ensure output directory exists
os.makedirs(output_dir, exist_ok=True)
output_path = os.path.join(output_dir, filename)
else:
# If no output directory specified, save in the same directory as the input file
base_name = os.path.splitext(file_path)[0]
output_path = f"{base_name}_metadata.json"
# Write metadata to JSON file
with open(output_path, "w", encoding="utf-8") as f:
json.dump(metadata, f, indent=2, ensure_ascii=False)
self.logger.debug(f"Metadata saved to {output_path}")
return True
return False
def process_directory(self, directory_path: str, output_dir: Optional[str] = None) -> bool:
"""
Processes all video files in a directory and writes metadata to a JSON file
:param directory_path: Path of the directory
:param output_dir: Path of the directory where the output JSON file will be saved
:return: True if successful, False otherwise
"""
if not os.path.isdir(directory_path):
self.logger.error(f"Directory not found: {directory_path}")
return False
all_metadata: dict[str, dict] = {}
file_count: int = 0
for root, _, files in os.walk(directory_path):
for file in files:
if file.lower().endswith(self.SUPPORTED_EXTENSIONS):
file_path: str = os.path.join(root, file)
self.logger.debug(f"Extracting metadata from {file}")
metadata: Optional[dict] = self.analyze_file(file_path)
if metadata:
# Use relative path as key
rel_path: str = os.path.relpath(file_path, directory_path)
all_metadata[rel_path] = metadata
file_count += 1
if file_count == 0:
self.logger.error(f"No supported video files found in {directory_path}")
return False
# Generate output filename based on directory name
dir_name: str = os.path.basename(os.path.normpath(directory_path))
filename: str = f"{dir_name}_metadata.json"
if output_dir is not None:
# Ensure output directory exists
os.makedirs(output_dir, exist_ok=True)
output_path = os.path.join(output_dir, filename)
else:
# If no output directory specified, save in the current directory
output_path = filename
# Write all metadata to a single JSON file
with open(output_path, "w", encoding="utf-8") as f:
json.dump(all_metadata, f, indent=2, ensure_ascii=False)
self.logger.debug(f"Metadata for {file_count} files saved to {output_path}")
return True

80
src/server.py Executable file → Normal file
View File

@ -1,8 +1,5 @@
#!/usr/bin/env python3
from __future__ import annotations from __future__ import annotations
import argparse
import json import json
import logging import logging
import os import os
@ -11,6 +8,7 @@ import time
from functools import partial from functools import partial
from http import HTTPStatus from http import HTTPStatus
from http.server import SimpleHTTPRequestHandler from http.server import SimpleHTTPRequestHandler
from logging import Logger
from typing import Optional from typing import Optional
from urllib.parse import parse_qs, unquote, urlparse from urllib.parse import parse_qs, unquote, urlparse
@ -19,7 +17,6 @@ from watchdog.events import (FileClosedEvent, FileDeletedEvent, FileMovedEvent,
from watchdog.observers import Observer from watchdog.observers import Observer
from watchdog.observers.api import BaseObserver from watchdog.observers.api import BaseObserver
from src.env_default import EnvDefault
from src.file_handlers import ToConvertFileHandler, MetadataFileHandler from src.file_handlers import ToConvertFileHandler, MetadataFileHandler
@ -39,7 +36,7 @@ class HTTPHandler(SimpleHTTPRequestHandler):
self.data: Optional[dict|list] = None self.data: Optional[dict|list] = None
def log_message(self, format, *args): def log_message(self, format, *args):
logging.info("%s - %s" % ( self.server_.logger.info("%s - %s" % (
self.client_address[0], self.client_address[0],
format % args format % args
)) ))
@ -131,6 +128,7 @@ class MeliesServer(FileSystemEventHandler):
max_payload_size: int): max_payload_size: int):
super().__init__() super().__init__()
self.logger: Logger = logging.getLogger("MeliesServer")
self.port: int = port self.port: int = port
self.to_convert_dir: str = to_convert_dir self.to_convert_dir: str = to_convert_dir
@ -148,12 +146,6 @@ class MeliesServer(FileSystemEventHandler):
self.to_convert_files: ToConvertFileHandler = ToConvertFileHandler(self.to_convert_dir) self.to_convert_files: ToConvertFileHandler = ToConvertFileHandler(self.to_convert_dir)
self.metadata_files: MetadataFileHandler = MetadataFileHandler(self.metadata_dir) self.metadata_files: MetadataFileHandler = MetadataFileHandler(self.metadata_dir)
logging.basicConfig(
level=logging.INFO,
format="%(asctime)s [%(levelname)s] %(message)s",
datefmt=r"%Y-%m-%d %H:%M:%S"
)
self.httpd: Optional[socketserver.TCPServer] = None self.httpd: Optional[socketserver.TCPServer] = None
self.observer: BaseObserver = Observer() self.observer: BaseObserver = Observer()
self.observer.schedule( self.observer.schedule(
@ -170,7 +162,7 @@ class MeliesServer(FileSystemEventHandler):
self.observer.start() self.observer.start()
try: try:
with socketserver.TCPServer(("", self.port), self.http_handler_cls) as self.httpd: with socketserver.TCPServer(("", self.port), self.http_handler_cls) as self.httpd:
logging.info(f"Serving on port {self.port}") self.logger.info(f"Serving on port {self.port}")
self.httpd.serve_forever() self.httpd.serve_forever()
except KeyboardInterrupt: except KeyboardInterrupt:
pass pass
@ -181,17 +173,17 @@ class MeliesServer(FileSystemEventHandler):
self.observer.join() self.observer.join()
def on_deleted(self, event: FileDeletedEvent): def on_deleted(self, event: FileDeletedEvent):
logging.info(f"Converted media deleted: {event.src_path}") self.logger.info(f"Converted media deleted: {event.src_path}")
self.delete_metadata(event.src_path) self.delete_metadata(event.src_path)
return super().on_deleted(event) return super().on_deleted(event)
def on_moved(self, event: FileMovedEvent): def on_moved(self, event: FileMovedEvent):
logging.info(f"Converted media moved: {event.src_path} -> {event.dest_path}") self.logger.info(f"Converted media moved: {event.src_path} -> {event.dest_path}")
self.rename_metadata(event.src_path, event.dest_path) self.rename_metadata(event.src_path, event.dest_path)
return super().on_moved(event) return super().on_moved(event)
def on_closed(self, event: FileClosedEvent): def on_closed(self, event: FileClosedEvent):
logging.info(f"Converted media created or modified: {event.src_path}") self.logger.info(f"Converted media created or modified: {event.src_path}")
self.extract_metadata(event.src_path) self.extract_metadata(event.src_path)
return super().on_closed(event) return super().on_closed(event)
@ -203,61 +195,3 @@ class MeliesServer(FileSystemEventHandler):
def delete_metadata(self, path: str): def delete_metadata(self, path: str):
pass pass
def main():
parser = argparse.ArgumentParser(
description="Starts the Melies server",
formatter_class=argparse.RawTextHelpFormatter
)
parser.add_argument(
"-p", "--port",
action=EnvDefault,
envvar="MELIES_PORT",
default=8000,
type=int,
help="Port on which the server listens"
)
parser.add_argument(
"--max-payload-size",
action=EnvDefault,
envvar="MELIES_MAX_PAYLOAD_SIZE",
default=1e6,
type=int,
help="Maximum POST payload size in bytes that the server accepts"
)
parser.add_argument(
"--to-convert-dir",
action=EnvDefault,
envvar="MELIES_TO_CONVERT_DIR",
default="to_convert",
help="Path to the directory containing medias to convert"
)
parser.add_argument(
"--converted-dir",
action=EnvDefault,
envvar="MELIES_CONVERTED_DIR",
default="converted",
help="Path to the directory containing converted medias"
)
parser.add_argument(
"--metadata-dir",
action=EnvDefault,
envvar="MELIES_METADATA_DIR",
default="metadata",
help="Path to the directory containing metadata files"
)
args = parser.parse_args()
server = MeliesServer(
args.port,
args.to_convert_dir,
args.converted_dir,
args.metadata_dir,
args.max_payload_size
)
server.start()
if __name__ == "__main__":
main()