refactor: extract launch script + adapt metadata extractor

This commit is contained in:
Louis Heredero 2025-05-05 00:53:31 +02:00
parent 775d3da6ed
commit ffe847fb5e
Signed by: HEL
GPG Key ID: 8D83DE470F8544E7
7 changed files with 311 additions and 74 deletions

View File

@ -26,4 +26,4 @@ COPY . .
EXPOSE 8000
CMD ["python", "src/server.py"]
CMD ["python", "-m", "scripts.server"]

0
__init__.py Normal file
View File

0
scripts/__init__.py Normal file
View File

47
scripts/extract_metadata.py Executable file
View File

@ -0,0 +1,47 @@
#!/usr/bin/env python3
import argparse
import logging
import os
import sys
from src.metadata_extractor import MetadataExtractor
def main():
logging.basicConfig(
level=logging.INFO,
format="[%(levelname)s] %(message)s"
)
parser = argparse.ArgumentParser(
description="Extract metadata from video files and save as JSON"
)
parser.add_argument(
"input",
help="Path to input video file or directory"
)
parser.add_argument(
"-o", "--output",
help="Directory path where the output JSON files will be saved"
)
args = parser.parse_args()
input_path = args.input
output_dir = args.output
extractor: MetadataExtractor = MetadataExtractor()
success = False
if os.path.isfile(input_path):
success = extractor.process_file(input_path, output_dir)
elif os.path.isdir(input_path):
success = extractor.process_directory(input_path, output_dir)
else:
logging.error(f"Path not found: {input_path}")
if not success:
sys.exit(1)
if __name__ == "__main__":
main()

71
scripts/server.py Executable file
View File

@ -0,0 +1,71 @@
#!/usr/bin/env python3
import argparse
import logging
from src.env_default import EnvDefault
from src.server import MeliesServer
def main():
logging.basicConfig(
level=logging.INFO,
format="%(asctime)s [%(levelname)s] %(message)s",
datefmt=r"%Y-%m-%d %H:%M:%S"
)
parser = argparse.ArgumentParser(
description="Starts the Melies server",
formatter_class=argparse.RawTextHelpFormatter
)
parser.add_argument(
"-p", "--port",
action=EnvDefault,
envvar="MELIES_PORT",
default=8000,
type=int,
help="Port on which the server listens"
)
parser.add_argument(
"--max-payload-size",
action=EnvDefault,
envvar="MELIES_MAX_PAYLOAD_SIZE",
default=1e6,
type=int,
help="Maximum POST payload size in bytes that the server accepts"
)
parser.add_argument(
"--to-convert-dir",
action=EnvDefault,
envvar="MELIES_TO_CONVERT_DIR",
default="to_convert",
help="Path to the directory containing medias to convert"
)
parser.add_argument(
"--converted-dir",
action=EnvDefault,
envvar="MELIES_CONVERTED_DIR",
default="converted",
help="Path to the directory containing converted medias"
)
parser.add_argument(
"--metadata-dir",
action=EnvDefault,
envvar="MELIES_METADATA_DIR",
default="metadata",
help="Path to the directory containing metadata files"
)
args = parser.parse_args()
server = MeliesServer(
args.port,
args.to_convert_dir,
args.converted_dir,
args.metadata_dir,
args.max_payload_size
)
server.start()
if __name__ == "__main__":
main()

185
src/metadata_extractor.py Normal file
View File

@ -0,0 +1,185 @@
import json
import logging
import os
import subprocess
from typing import Optional
class MetadataExtractor:
SUPPORTED_EXTENSIONS = (".mp4", ".mkv", ".mov", ".avi")
def __init__(self):
self.logger: logging.Logger = logging.getLogger("MetadataExtractor")
def analyze_file(self, path: str) -> Optional[dict]:
"""
Extracts metadata from a video file using ffprobe
:param path: Path to the video file
:return: Metadata information or ``None`` if an error occurred
"""
# Get general file info in JSON format
cmd: list[str] = [
"ffprobe",
"-v", "quiet",
"-print_format", "json",
"-show_format",
"-show_streams",
path
]
try:
result = subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)
if result.returncode != 0:
self.logger.error(f"Error processing {path}: {result.stderr}")
return None
data: dict = json.loads(result.stdout)
# Extract filename and title
filename: str = os.path.basename(path)
title: str = data.get("format", {}).get("tags", {}).get("title", filename)
# Initialize metadata structure
metadata: dict = {
"filename": filename,
"title": title,
"audio_tracks": [],
"subtitle_tracks": []
}
# Process streams
for stream in data.get("streams", []):
codec_type = stream.get("codec_type")
tags = stream.get("tags", {})
disposition = stream.get("disposition", {})
track = {
"index": stream.get("index"),
"language": tags.get("language", "und"),
"name": tags.get("title", ""),
"flags": {
"default": disposition.get("default", 0) == 1,
"original": disposition.get("original", 0) == 1,
"commentary": disposition.get("commentary", 0) == 1
}
}
if codec_type == "audio":
track |= {
"channels": stream.get("channels", 0)
}
track["flags"] |= {
"visual_impaired": disposition.get("visual_impaired", 0) == 1
}
metadata["audio_tracks"].append(track)
elif codec_type == "subtitle":
track["flags"] |= {
"forced": disposition.get("forced", 0) == 1,
"hearing_impaired": disposition.get("hearing_impaired", 0) == 1
}
metadata["subtitle_tracks"].append(track)
else:
self.logger.warning(f"Unknown track codec type '{codec_type}'")
return metadata
except Exception as e:
self.logger.error(f"Error processing {path}: {str(e)}")
return None
def process_file(self, file_path: str, output_dir: str) -> bool:
"""
Processes a single video file and writes metadata to a JSON file
:param file_path: Path of the video file
:param output_dir: Path of the directory where the output JSON file will be saved
:return: True if successful, False otherwise
"""
if not os.path.isfile(file_path):
self.logger.error(f"File not found: {file_path}")
return False
if not file_path.lower().endswith(self.SUPPORTED_EXTENSIONS):
self.logger.error(f"Unsupported file format: {file_path}")
return False
self.logger.debug(f"Extracting metadata from {os.path.basename(file_path)}")
metadata: Optional[dict] = self.analyze_file(file_path)
if metadata:
# Generate output filename based on input file
filename = os.path.basename(os.path.splitext(file_path)[0]) + "_metadata.json"
if output_dir:
# Ensure output directory exists
os.makedirs(output_dir, exist_ok=True)
output_path = os.path.join(output_dir, filename)
else:
# If no output directory specified, save in the same directory as the input file
base_name = os.path.splitext(file_path)[0]
output_path = f"{base_name}_metadata.json"
# Write metadata to JSON file
with open(output_path, "w", encoding="utf-8") as f:
json.dump(metadata, f, indent=2, ensure_ascii=False)
self.logger.debug(f"Metadata saved to {output_path}")
return True
return False
def process_directory(self, directory_path: str, output_dir: Optional[str] = None) -> bool:
"""
Processes all video files in a directory and writes metadata to a JSON file
:param directory_path: Path of the directory
:param output_dir: Path of the directory where the output JSON file will be saved
:return: True if successful, False otherwise
"""
if not os.path.isdir(directory_path):
self.logger.error(f"Directory not found: {directory_path}")
return False
all_metadata: dict[str, dict] = {}
file_count: int = 0
for root, _, files in os.walk(directory_path):
for file in files:
if file.lower().endswith(self.SUPPORTED_EXTENSIONS):
file_path: str = os.path.join(root, file)
self.logger.debug(f"Extracting metadata from {file}")
metadata: Optional[dict] = self.analyze_file(file_path)
if metadata:
# Use relative path as key
rel_path: str = os.path.relpath(file_path, directory_path)
all_metadata[rel_path] = metadata
file_count += 1
if file_count == 0:
self.logger.error(f"No supported video files found in {directory_path}")
return False
# Generate output filename based on directory name
dir_name: str = os.path.basename(os.path.normpath(directory_path))
filename: str = f"{dir_name}_metadata.json"
if output_dir is not None:
# Ensure output directory exists
os.makedirs(output_dir, exist_ok=True)
output_path = os.path.join(output_dir, filename)
else:
# If no output directory specified, save in the current directory
output_path = filename
# Write all metadata to a single JSON file
with open(output_path, "w", encoding="utf-8") as f:
json.dump(all_metadata, f, indent=2, ensure_ascii=False)
self.logger.debug(f"Metadata for {file_count} files saved to {output_path}")
return True

80
src/server.py Executable file → Normal file
View File

@ -1,8 +1,5 @@
#!/usr/bin/env python3
from __future__ import annotations
import argparse
import json
import logging
import os
@ -11,6 +8,7 @@ import time
from functools import partial
from http import HTTPStatus
from http.server import SimpleHTTPRequestHandler
from logging import Logger
from typing import Optional
from urllib.parse import parse_qs, unquote, urlparse
@ -19,7 +17,6 @@ from watchdog.events import (FileClosedEvent, FileDeletedEvent, FileMovedEvent,
from watchdog.observers import Observer
from watchdog.observers.api import BaseObserver
from src.env_default import EnvDefault
from src.file_handlers import ToConvertFileHandler, MetadataFileHandler
@ -39,7 +36,7 @@ class HTTPHandler(SimpleHTTPRequestHandler):
self.data: Optional[dict|list] = None
def log_message(self, format, *args):
logging.info("%s - %s" % (
self.server_.logger.info("%s - %s" % (
self.client_address[0],
format % args
))
@ -131,6 +128,7 @@ class MeliesServer(FileSystemEventHandler):
max_payload_size: int):
super().__init__()
self.logger: Logger = logging.getLogger("MeliesServer")
self.port: int = port
self.to_convert_dir: str = to_convert_dir
@ -148,12 +146,6 @@ class MeliesServer(FileSystemEventHandler):
self.to_convert_files: ToConvertFileHandler = ToConvertFileHandler(self.to_convert_dir)
self.metadata_files: MetadataFileHandler = MetadataFileHandler(self.metadata_dir)
logging.basicConfig(
level=logging.INFO,
format="%(asctime)s [%(levelname)s] %(message)s",
datefmt=r"%Y-%m-%d %H:%M:%S"
)
self.httpd: Optional[socketserver.TCPServer] = None
self.observer: BaseObserver = Observer()
self.observer.schedule(
@ -170,7 +162,7 @@ class MeliesServer(FileSystemEventHandler):
self.observer.start()
try:
with socketserver.TCPServer(("", self.port), self.http_handler_cls) as self.httpd:
logging.info(f"Serving on port {self.port}")
self.logger.info(f"Serving on port {self.port}")
self.httpd.serve_forever()
except KeyboardInterrupt:
pass
@ -181,17 +173,17 @@ class MeliesServer(FileSystemEventHandler):
self.observer.join()
def on_deleted(self, event: FileDeletedEvent):
logging.info(f"Converted media deleted: {event.src_path}")
self.logger.info(f"Converted media deleted: {event.src_path}")
self.delete_metadata(event.src_path)
return super().on_deleted(event)
def on_moved(self, event: FileMovedEvent):
logging.info(f"Converted media moved: {event.src_path} -> {event.dest_path}")
self.logger.info(f"Converted media moved: {event.src_path} -> {event.dest_path}")
self.rename_metadata(event.src_path, event.dest_path)
return super().on_moved(event)
def on_closed(self, event: FileClosedEvent):
logging.info(f"Converted media created or modified: {event.src_path}")
self.logger.info(f"Converted media created or modified: {event.src_path}")
self.extract_metadata(event.src_path)
return super().on_closed(event)
@ -203,61 +195,3 @@ class MeliesServer(FileSystemEventHandler):
def delete_metadata(self, path: str):
pass
def main():
parser = argparse.ArgumentParser(
description="Starts the Melies server",
formatter_class=argparse.RawTextHelpFormatter
)
parser.add_argument(
"-p", "--port",
action=EnvDefault,
envvar="MELIES_PORT",
default=8000,
type=int,
help="Port on which the server listens"
)
parser.add_argument(
"--max-payload-size",
action=EnvDefault,
envvar="MELIES_MAX_PAYLOAD_SIZE",
default=1e6,
type=int,
help="Maximum POST payload size in bytes that the server accepts"
)
parser.add_argument(
"--to-convert-dir",
action=EnvDefault,
envvar="MELIES_TO_CONVERT_DIR",
default="to_convert",
help="Path to the directory containing medias to convert"
)
parser.add_argument(
"--converted-dir",
action=EnvDefault,
envvar="MELIES_CONVERTED_DIR",
default="converted",
help="Path to the directory containing converted medias"
)
parser.add_argument(
"--metadata-dir",
action=EnvDefault,
envvar="MELIES_METADATA_DIR",
default="metadata",
help="Path to the directory containing metadata files"
)
args = parser.parse_args()
server = MeliesServer(
args.port,
args.to_convert_dir,
args.converted_dir,
args.metadata_dir,
args.max_payload_size
)
server.start()
if __name__ == "__main__":
main()