refactor: extract launch script + adapt metadata extractor
This commit is contained in:
parent
775d3da6ed
commit
ffe847fb5e
@ -26,4 +26,4 @@ COPY . .
|
|||||||
|
|
||||||
EXPOSE 8000
|
EXPOSE 8000
|
||||||
|
|
||||||
CMD ["python", "src/server.py"]
|
CMD ["python", "-m", "scripts.server"]
|
0
__init__.py
Normal file
0
__init__.py
Normal file
0
scripts/__init__.py
Normal file
0
scripts/__init__.py
Normal file
47
scripts/extract_metadata.py
Executable file
47
scripts/extract_metadata.py
Executable file
@ -0,0 +1,47 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
|
||||||
|
import argparse
|
||||||
|
import logging
|
||||||
|
import os
|
||||||
|
import sys
|
||||||
|
|
||||||
|
from src.metadata_extractor import MetadataExtractor
|
||||||
|
|
||||||
|
|
||||||
|
def main():
|
||||||
|
logging.basicConfig(
|
||||||
|
level=logging.INFO,
|
||||||
|
format="[%(levelname)s] %(message)s"
|
||||||
|
)
|
||||||
|
|
||||||
|
parser = argparse.ArgumentParser(
|
||||||
|
description="Extract metadata from video files and save as JSON"
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"input",
|
||||||
|
help="Path to input video file or directory"
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"-o", "--output",
|
||||||
|
help="Directory path where the output JSON files will be saved"
|
||||||
|
)
|
||||||
|
args = parser.parse_args()
|
||||||
|
|
||||||
|
input_path = args.input
|
||||||
|
output_dir = args.output
|
||||||
|
|
||||||
|
extractor: MetadataExtractor = MetadataExtractor()
|
||||||
|
|
||||||
|
success = False
|
||||||
|
if os.path.isfile(input_path):
|
||||||
|
success = extractor.process_file(input_path, output_dir)
|
||||||
|
elif os.path.isdir(input_path):
|
||||||
|
success = extractor.process_directory(input_path, output_dir)
|
||||||
|
else:
|
||||||
|
logging.error(f"Path not found: {input_path}")
|
||||||
|
|
||||||
|
if not success:
|
||||||
|
sys.exit(1)
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
71
scripts/server.py
Executable file
71
scripts/server.py
Executable file
@ -0,0 +1,71 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
|
||||||
|
import argparse
|
||||||
|
import logging
|
||||||
|
|
||||||
|
from src.env_default import EnvDefault
|
||||||
|
from src.server import MeliesServer
|
||||||
|
|
||||||
|
|
||||||
|
def main():
|
||||||
|
logging.basicConfig(
|
||||||
|
level=logging.INFO,
|
||||||
|
format="%(asctime)s [%(levelname)s] %(message)s",
|
||||||
|
datefmt=r"%Y-%m-%d %H:%M:%S"
|
||||||
|
)
|
||||||
|
|
||||||
|
parser = argparse.ArgumentParser(
|
||||||
|
description="Starts the Melies server",
|
||||||
|
formatter_class=argparse.RawTextHelpFormatter
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"-p", "--port",
|
||||||
|
action=EnvDefault,
|
||||||
|
envvar="MELIES_PORT",
|
||||||
|
default=8000,
|
||||||
|
type=int,
|
||||||
|
help="Port on which the server listens"
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--max-payload-size",
|
||||||
|
action=EnvDefault,
|
||||||
|
envvar="MELIES_MAX_PAYLOAD_SIZE",
|
||||||
|
default=1e6,
|
||||||
|
type=int,
|
||||||
|
help="Maximum POST payload size in bytes that the server accepts"
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--to-convert-dir",
|
||||||
|
action=EnvDefault,
|
||||||
|
envvar="MELIES_TO_CONVERT_DIR",
|
||||||
|
default="to_convert",
|
||||||
|
help="Path to the directory containing medias to convert"
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--converted-dir",
|
||||||
|
action=EnvDefault,
|
||||||
|
envvar="MELIES_CONVERTED_DIR",
|
||||||
|
default="converted",
|
||||||
|
help="Path to the directory containing converted medias"
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--metadata-dir",
|
||||||
|
action=EnvDefault,
|
||||||
|
envvar="MELIES_METADATA_DIR",
|
||||||
|
default="metadata",
|
||||||
|
help="Path to the directory containing metadata files"
|
||||||
|
)
|
||||||
|
args = parser.parse_args()
|
||||||
|
|
||||||
|
server = MeliesServer(
|
||||||
|
args.port,
|
||||||
|
args.to_convert_dir,
|
||||||
|
args.converted_dir,
|
||||||
|
args.metadata_dir,
|
||||||
|
args.max_payload_size
|
||||||
|
)
|
||||||
|
server.start()
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
185
src/metadata_extractor.py
Normal file
185
src/metadata_extractor.py
Normal file
@ -0,0 +1,185 @@
|
|||||||
|
import json
|
||||||
|
import logging
|
||||||
|
import os
|
||||||
|
import subprocess
|
||||||
|
from typing import Optional
|
||||||
|
|
||||||
|
|
||||||
|
class MetadataExtractor:
|
||||||
|
SUPPORTED_EXTENSIONS = (".mp4", ".mkv", ".mov", ".avi")
|
||||||
|
|
||||||
|
def __init__(self):
|
||||||
|
self.logger: logging.Logger = logging.getLogger("MetadataExtractor")
|
||||||
|
|
||||||
|
def analyze_file(self, path: str) -> Optional[dict]:
|
||||||
|
"""
|
||||||
|
Extracts metadata from a video file using ffprobe
|
||||||
|
|
||||||
|
:param path: Path to the video file
|
||||||
|
:return: Metadata information or ``None`` if an error occurred
|
||||||
|
"""
|
||||||
|
|
||||||
|
# Get general file info in JSON format
|
||||||
|
cmd: list[str] = [
|
||||||
|
"ffprobe",
|
||||||
|
"-v", "quiet",
|
||||||
|
"-print_format", "json",
|
||||||
|
"-show_format",
|
||||||
|
"-show_streams",
|
||||||
|
path
|
||||||
|
]
|
||||||
|
|
||||||
|
try:
|
||||||
|
result = subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)
|
||||||
|
if result.returncode != 0:
|
||||||
|
self.logger.error(f"Error processing {path}: {result.stderr}")
|
||||||
|
return None
|
||||||
|
|
||||||
|
data: dict = json.loads(result.stdout)
|
||||||
|
|
||||||
|
# Extract filename and title
|
||||||
|
filename: str = os.path.basename(path)
|
||||||
|
title: str = data.get("format", {}).get("tags", {}).get("title", filename)
|
||||||
|
|
||||||
|
# Initialize metadata structure
|
||||||
|
metadata: dict = {
|
||||||
|
"filename": filename,
|
||||||
|
"title": title,
|
||||||
|
"audio_tracks": [],
|
||||||
|
"subtitle_tracks": []
|
||||||
|
}
|
||||||
|
|
||||||
|
# Process streams
|
||||||
|
for stream in data.get("streams", []):
|
||||||
|
codec_type = stream.get("codec_type")
|
||||||
|
tags = stream.get("tags", {})
|
||||||
|
disposition = stream.get("disposition", {})
|
||||||
|
track = {
|
||||||
|
"index": stream.get("index"),
|
||||||
|
"language": tags.get("language", "und"),
|
||||||
|
"name": tags.get("title", ""),
|
||||||
|
"flags": {
|
||||||
|
"default": disposition.get("default", 0) == 1,
|
||||||
|
"original": disposition.get("original", 0) == 1,
|
||||||
|
"commentary": disposition.get("commentary", 0) == 1
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if codec_type == "audio":
|
||||||
|
track |= {
|
||||||
|
"channels": stream.get("channels", 0)
|
||||||
|
}
|
||||||
|
track["flags"] |= {
|
||||||
|
"visual_impaired": disposition.get("visual_impaired", 0) == 1
|
||||||
|
}
|
||||||
|
metadata["audio_tracks"].append(track)
|
||||||
|
|
||||||
|
elif codec_type == "subtitle":
|
||||||
|
track["flags"] |= {
|
||||||
|
"forced": disposition.get("forced", 0) == 1,
|
||||||
|
"hearing_impaired": disposition.get("hearing_impaired", 0) == 1
|
||||||
|
}
|
||||||
|
metadata["subtitle_tracks"].append(track)
|
||||||
|
|
||||||
|
else:
|
||||||
|
self.logger.warning(f"Unknown track codec type '{codec_type}'")
|
||||||
|
|
||||||
|
return metadata
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
self.logger.error(f"Error processing {path}: {str(e)}")
|
||||||
|
return None
|
||||||
|
|
||||||
|
def process_file(self, file_path: str, output_dir: str) -> bool:
|
||||||
|
"""
|
||||||
|
Processes a single video file and writes metadata to a JSON file
|
||||||
|
|
||||||
|
:param file_path: Path of the video file
|
||||||
|
:param output_dir: Path of the directory where the output JSON file will be saved
|
||||||
|
:return: True if successful, False otherwise
|
||||||
|
"""
|
||||||
|
|
||||||
|
if not os.path.isfile(file_path):
|
||||||
|
self.logger.error(f"File not found: {file_path}")
|
||||||
|
return False
|
||||||
|
|
||||||
|
if not file_path.lower().endswith(self.SUPPORTED_EXTENSIONS):
|
||||||
|
self.logger.error(f"Unsupported file format: {file_path}")
|
||||||
|
return False
|
||||||
|
|
||||||
|
self.logger.debug(f"Extracting metadata from {os.path.basename(file_path)}")
|
||||||
|
metadata: Optional[dict] = self.analyze_file(file_path)
|
||||||
|
|
||||||
|
if metadata:
|
||||||
|
# Generate output filename based on input file
|
||||||
|
filename = os.path.basename(os.path.splitext(file_path)[0]) + "_metadata.json"
|
||||||
|
|
||||||
|
if output_dir:
|
||||||
|
# Ensure output directory exists
|
||||||
|
os.makedirs(output_dir, exist_ok=True)
|
||||||
|
output_path = os.path.join(output_dir, filename)
|
||||||
|
else:
|
||||||
|
# If no output directory specified, save in the same directory as the input file
|
||||||
|
base_name = os.path.splitext(file_path)[0]
|
||||||
|
output_path = f"{base_name}_metadata.json"
|
||||||
|
|
||||||
|
# Write metadata to JSON file
|
||||||
|
with open(output_path, "w", encoding="utf-8") as f:
|
||||||
|
json.dump(metadata, f, indent=2, ensure_ascii=False)
|
||||||
|
|
||||||
|
self.logger.debug(f"Metadata saved to {output_path}")
|
||||||
|
return True
|
||||||
|
|
||||||
|
return False
|
||||||
|
|
||||||
|
def process_directory(self, directory_path: str, output_dir: Optional[str] = None) -> bool:
|
||||||
|
"""
|
||||||
|
Processes all video files in a directory and writes metadata to a JSON file
|
||||||
|
|
||||||
|
:param directory_path: Path of the directory
|
||||||
|
:param output_dir: Path of the directory where the output JSON file will be saved
|
||||||
|
:return: True if successful, False otherwise
|
||||||
|
"""
|
||||||
|
|
||||||
|
if not os.path.isdir(directory_path):
|
||||||
|
self.logger.error(f"Directory not found: {directory_path}")
|
||||||
|
return False
|
||||||
|
|
||||||
|
all_metadata: dict[str, dict] = {}
|
||||||
|
file_count: int = 0
|
||||||
|
|
||||||
|
for root, _, files in os.walk(directory_path):
|
||||||
|
for file in files:
|
||||||
|
if file.lower().endswith(self.SUPPORTED_EXTENSIONS):
|
||||||
|
file_path: str = os.path.join(root, file)
|
||||||
|
self.logger.debug(f"Extracting metadata from {file}")
|
||||||
|
metadata: Optional[dict] = self.analyze_file(file_path)
|
||||||
|
|
||||||
|
if metadata:
|
||||||
|
# Use relative path as key
|
||||||
|
rel_path: str = os.path.relpath(file_path, directory_path)
|
||||||
|
all_metadata[rel_path] = metadata
|
||||||
|
file_count += 1
|
||||||
|
|
||||||
|
if file_count == 0:
|
||||||
|
self.logger.error(f"No supported video files found in {directory_path}")
|
||||||
|
return False
|
||||||
|
|
||||||
|
# Generate output filename based on directory name
|
||||||
|
dir_name: str = os.path.basename(os.path.normpath(directory_path))
|
||||||
|
filename: str = f"{dir_name}_metadata.json"
|
||||||
|
|
||||||
|
if output_dir is not None:
|
||||||
|
# Ensure output directory exists
|
||||||
|
os.makedirs(output_dir, exist_ok=True)
|
||||||
|
output_path = os.path.join(output_dir, filename)
|
||||||
|
else:
|
||||||
|
# If no output directory specified, save in the current directory
|
||||||
|
output_path = filename
|
||||||
|
|
||||||
|
# Write all metadata to a single JSON file
|
||||||
|
with open(output_path, "w", encoding="utf-8") as f:
|
||||||
|
json.dump(all_metadata, f, indent=2, ensure_ascii=False)
|
||||||
|
|
||||||
|
self.logger.debug(f"Metadata for {file_count} files saved to {output_path}")
|
||||||
|
return True
|
80
src/server.py
Executable file → Normal file
80
src/server.py
Executable file → Normal file
@ -1,8 +1,5 @@
|
|||||||
#!/usr/bin/env python3
|
|
||||||
|
|
||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
import argparse
|
|
||||||
import json
|
import json
|
||||||
import logging
|
import logging
|
||||||
import os
|
import os
|
||||||
@ -11,6 +8,7 @@ import time
|
|||||||
from functools import partial
|
from functools import partial
|
||||||
from http import HTTPStatus
|
from http import HTTPStatus
|
||||||
from http.server import SimpleHTTPRequestHandler
|
from http.server import SimpleHTTPRequestHandler
|
||||||
|
from logging import Logger
|
||||||
from typing import Optional
|
from typing import Optional
|
||||||
from urllib.parse import parse_qs, unquote, urlparse
|
from urllib.parse import parse_qs, unquote, urlparse
|
||||||
|
|
||||||
@ -19,7 +17,6 @@ from watchdog.events import (FileClosedEvent, FileDeletedEvent, FileMovedEvent,
|
|||||||
from watchdog.observers import Observer
|
from watchdog.observers import Observer
|
||||||
from watchdog.observers.api import BaseObserver
|
from watchdog.observers.api import BaseObserver
|
||||||
|
|
||||||
from src.env_default import EnvDefault
|
|
||||||
from src.file_handlers import ToConvertFileHandler, MetadataFileHandler
|
from src.file_handlers import ToConvertFileHandler, MetadataFileHandler
|
||||||
|
|
||||||
|
|
||||||
@ -39,7 +36,7 @@ class HTTPHandler(SimpleHTTPRequestHandler):
|
|||||||
self.data: Optional[dict|list] = None
|
self.data: Optional[dict|list] = None
|
||||||
|
|
||||||
def log_message(self, format, *args):
|
def log_message(self, format, *args):
|
||||||
logging.info("%s - %s" % (
|
self.server_.logger.info("%s - %s" % (
|
||||||
self.client_address[0],
|
self.client_address[0],
|
||||||
format % args
|
format % args
|
||||||
))
|
))
|
||||||
@ -131,6 +128,7 @@ class MeliesServer(FileSystemEventHandler):
|
|||||||
max_payload_size: int):
|
max_payload_size: int):
|
||||||
|
|
||||||
super().__init__()
|
super().__init__()
|
||||||
|
self.logger: Logger = logging.getLogger("MeliesServer")
|
||||||
|
|
||||||
self.port: int = port
|
self.port: int = port
|
||||||
self.to_convert_dir: str = to_convert_dir
|
self.to_convert_dir: str = to_convert_dir
|
||||||
@ -148,12 +146,6 @@ class MeliesServer(FileSystemEventHandler):
|
|||||||
self.to_convert_files: ToConvertFileHandler = ToConvertFileHandler(self.to_convert_dir)
|
self.to_convert_files: ToConvertFileHandler = ToConvertFileHandler(self.to_convert_dir)
|
||||||
self.metadata_files: MetadataFileHandler = MetadataFileHandler(self.metadata_dir)
|
self.metadata_files: MetadataFileHandler = MetadataFileHandler(self.metadata_dir)
|
||||||
|
|
||||||
logging.basicConfig(
|
|
||||||
level=logging.INFO,
|
|
||||||
format="%(asctime)s [%(levelname)s] %(message)s",
|
|
||||||
datefmt=r"%Y-%m-%d %H:%M:%S"
|
|
||||||
)
|
|
||||||
|
|
||||||
self.httpd: Optional[socketserver.TCPServer] = None
|
self.httpd: Optional[socketserver.TCPServer] = None
|
||||||
self.observer: BaseObserver = Observer()
|
self.observer: BaseObserver = Observer()
|
||||||
self.observer.schedule(
|
self.observer.schedule(
|
||||||
@ -170,7 +162,7 @@ class MeliesServer(FileSystemEventHandler):
|
|||||||
self.observer.start()
|
self.observer.start()
|
||||||
try:
|
try:
|
||||||
with socketserver.TCPServer(("", self.port), self.http_handler_cls) as self.httpd:
|
with socketserver.TCPServer(("", self.port), self.http_handler_cls) as self.httpd:
|
||||||
logging.info(f"Serving on port {self.port}")
|
self.logger.info(f"Serving on port {self.port}")
|
||||||
self.httpd.serve_forever()
|
self.httpd.serve_forever()
|
||||||
except KeyboardInterrupt:
|
except KeyboardInterrupt:
|
||||||
pass
|
pass
|
||||||
@ -181,17 +173,17 @@ class MeliesServer(FileSystemEventHandler):
|
|||||||
self.observer.join()
|
self.observer.join()
|
||||||
|
|
||||||
def on_deleted(self, event: FileDeletedEvent):
|
def on_deleted(self, event: FileDeletedEvent):
|
||||||
logging.info(f"Converted media deleted: {event.src_path}")
|
self.logger.info(f"Converted media deleted: {event.src_path}")
|
||||||
self.delete_metadata(event.src_path)
|
self.delete_metadata(event.src_path)
|
||||||
return super().on_deleted(event)
|
return super().on_deleted(event)
|
||||||
|
|
||||||
def on_moved(self, event: FileMovedEvent):
|
def on_moved(self, event: FileMovedEvent):
|
||||||
logging.info(f"Converted media moved: {event.src_path} -> {event.dest_path}")
|
self.logger.info(f"Converted media moved: {event.src_path} -> {event.dest_path}")
|
||||||
self.rename_metadata(event.src_path, event.dest_path)
|
self.rename_metadata(event.src_path, event.dest_path)
|
||||||
return super().on_moved(event)
|
return super().on_moved(event)
|
||||||
|
|
||||||
def on_closed(self, event: FileClosedEvent):
|
def on_closed(self, event: FileClosedEvent):
|
||||||
logging.info(f"Converted media created or modified: {event.src_path}")
|
self.logger.info(f"Converted media created or modified: {event.src_path}")
|
||||||
self.extract_metadata(event.src_path)
|
self.extract_metadata(event.src_path)
|
||||||
return super().on_closed(event)
|
return super().on_closed(event)
|
||||||
|
|
||||||
@ -203,61 +195,3 @@ class MeliesServer(FileSystemEventHandler):
|
|||||||
|
|
||||||
def delete_metadata(self, path: str):
|
def delete_metadata(self, path: str):
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
|
||||||
def main():
|
|
||||||
parser = argparse.ArgumentParser(
|
|
||||||
description="Starts the Melies server",
|
|
||||||
formatter_class=argparse.RawTextHelpFormatter
|
|
||||||
)
|
|
||||||
parser.add_argument(
|
|
||||||
"-p", "--port",
|
|
||||||
action=EnvDefault,
|
|
||||||
envvar="MELIES_PORT",
|
|
||||||
default=8000,
|
|
||||||
type=int,
|
|
||||||
help="Port on which the server listens"
|
|
||||||
)
|
|
||||||
parser.add_argument(
|
|
||||||
"--max-payload-size",
|
|
||||||
action=EnvDefault,
|
|
||||||
envvar="MELIES_MAX_PAYLOAD_SIZE",
|
|
||||||
default=1e6,
|
|
||||||
type=int,
|
|
||||||
help="Maximum POST payload size in bytes that the server accepts"
|
|
||||||
)
|
|
||||||
parser.add_argument(
|
|
||||||
"--to-convert-dir",
|
|
||||||
action=EnvDefault,
|
|
||||||
envvar="MELIES_TO_CONVERT_DIR",
|
|
||||||
default="to_convert",
|
|
||||||
help="Path to the directory containing medias to convert"
|
|
||||||
)
|
|
||||||
parser.add_argument(
|
|
||||||
"--converted-dir",
|
|
||||||
action=EnvDefault,
|
|
||||||
envvar="MELIES_CONVERTED_DIR",
|
|
||||||
default="converted",
|
|
||||||
help="Path to the directory containing converted medias"
|
|
||||||
)
|
|
||||||
parser.add_argument(
|
|
||||||
"--metadata-dir",
|
|
||||||
action=EnvDefault,
|
|
||||||
envvar="MELIES_METADATA_DIR",
|
|
||||||
default="metadata",
|
|
||||||
help="Path to the directory containing metadata files"
|
|
||||||
)
|
|
||||||
args = parser.parse_args()
|
|
||||||
|
|
||||||
server = MeliesServer(
|
|
||||||
args.port,
|
|
||||||
args.to_convert_dir,
|
|
||||||
args.converted_dir,
|
|
||||||
args.metadata_dir,
|
|
||||||
args.max_payload_size
|
|
||||||
)
|
|
||||||
server.start()
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
main()
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user