refactor: extract launch script + adapt metadata extractor
This commit is contained in:
parent
775d3da6ed
commit
ffe847fb5e
@ -26,4 +26,4 @@ COPY . .
|
||||
|
||||
EXPOSE 8000
|
||||
|
||||
CMD ["python", "src/server.py"]
|
||||
CMD ["python", "-m", "scripts.server"]
|
0
__init__.py
Normal file
0
__init__.py
Normal file
0
scripts/__init__.py
Normal file
0
scripts/__init__.py
Normal file
47
scripts/extract_metadata.py
Executable file
47
scripts/extract_metadata.py
Executable file
@ -0,0 +1,47 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
import argparse
|
||||
import logging
|
||||
import os
|
||||
import sys
|
||||
|
||||
from src.metadata_extractor import MetadataExtractor
|
||||
|
||||
|
||||
def main():
|
||||
logging.basicConfig(
|
||||
level=logging.INFO,
|
||||
format="[%(levelname)s] %(message)s"
|
||||
)
|
||||
|
||||
parser = argparse.ArgumentParser(
|
||||
description="Extract metadata from video files and save as JSON"
|
||||
)
|
||||
parser.add_argument(
|
||||
"input",
|
||||
help="Path to input video file or directory"
|
||||
)
|
||||
parser.add_argument(
|
||||
"-o", "--output",
|
||||
help="Directory path where the output JSON files will be saved"
|
||||
)
|
||||
args = parser.parse_args()
|
||||
|
||||
input_path = args.input
|
||||
output_dir = args.output
|
||||
|
||||
extractor: MetadataExtractor = MetadataExtractor()
|
||||
|
||||
success = False
|
||||
if os.path.isfile(input_path):
|
||||
success = extractor.process_file(input_path, output_dir)
|
||||
elif os.path.isdir(input_path):
|
||||
success = extractor.process_directory(input_path, output_dir)
|
||||
else:
|
||||
logging.error(f"Path not found: {input_path}")
|
||||
|
||||
if not success:
|
||||
sys.exit(1)
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
71
scripts/server.py
Executable file
71
scripts/server.py
Executable file
@ -0,0 +1,71 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
import argparse
|
||||
import logging
|
||||
|
||||
from src.env_default import EnvDefault
|
||||
from src.server import MeliesServer
|
||||
|
||||
|
||||
def main():
|
||||
logging.basicConfig(
|
||||
level=logging.INFO,
|
||||
format="%(asctime)s [%(levelname)s] %(message)s",
|
||||
datefmt=r"%Y-%m-%d %H:%M:%S"
|
||||
)
|
||||
|
||||
parser = argparse.ArgumentParser(
|
||||
description="Starts the Melies server",
|
||||
formatter_class=argparse.RawTextHelpFormatter
|
||||
)
|
||||
parser.add_argument(
|
||||
"-p", "--port",
|
||||
action=EnvDefault,
|
||||
envvar="MELIES_PORT",
|
||||
default=8000,
|
||||
type=int,
|
||||
help="Port on which the server listens"
|
||||
)
|
||||
parser.add_argument(
|
||||
"--max-payload-size",
|
||||
action=EnvDefault,
|
||||
envvar="MELIES_MAX_PAYLOAD_SIZE",
|
||||
default=1e6,
|
||||
type=int,
|
||||
help="Maximum POST payload size in bytes that the server accepts"
|
||||
)
|
||||
parser.add_argument(
|
||||
"--to-convert-dir",
|
||||
action=EnvDefault,
|
||||
envvar="MELIES_TO_CONVERT_DIR",
|
||||
default="to_convert",
|
||||
help="Path to the directory containing medias to convert"
|
||||
)
|
||||
parser.add_argument(
|
||||
"--converted-dir",
|
||||
action=EnvDefault,
|
||||
envvar="MELIES_CONVERTED_DIR",
|
||||
default="converted",
|
||||
help="Path to the directory containing converted medias"
|
||||
)
|
||||
parser.add_argument(
|
||||
"--metadata-dir",
|
||||
action=EnvDefault,
|
||||
envvar="MELIES_METADATA_DIR",
|
||||
default="metadata",
|
||||
help="Path to the directory containing metadata files"
|
||||
)
|
||||
args = parser.parse_args()
|
||||
|
||||
server = MeliesServer(
|
||||
args.port,
|
||||
args.to_convert_dir,
|
||||
args.converted_dir,
|
||||
args.metadata_dir,
|
||||
args.max_payload_size
|
||||
)
|
||||
server.start()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
185
src/metadata_extractor.py
Normal file
185
src/metadata_extractor.py
Normal file
@ -0,0 +1,185 @@
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
import subprocess
|
||||
from typing import Optional
|
||||
|
||||
|
||||
class MetadataExtractor:
|
||||
SUPPORTED_EXTENSIONS = (".mp4", ".mkv", ".mov", ".avi")
|
||||
|
||||
def __init__(self):
|
||||
self.logger: logging.Logger = logging.getLogger("MetadataExtractor")
|
||||
|
||||
def analyze_file(self, path: str) -> Optional[dict]:
|
||||
"""
|
||||
Extracts metadata from a video file using ffprobe
|
||||
|
||||
:param path: Path to the video file
|
||||
:return: Metadata information or ``None`` if an error occurred
|
||||
"""
|
||||
|
||||
# Get general file info in JSON format
|
||||
cmd: list[str] = [
|
||||
"ffprobe",
|
||||
"-v", "quiet",
|
||||
"-print_format", "json",
|
||||
"-show_format",
|
||||
"-show_streams",
|
||||
path
|
||||
]
|
||||
|
||||
try:
|
||||
result = subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)
|
||||
if result.returncode != 0:
|
||||
self.logger.error(f"Error processing {path}: {result.stderr}")
|
||||
return None
|
||||
|
||||
data: dict = json.loads(result.stdout)
|
||||
|
||||
# Extract filename and title
|
||||
filename: str = os.path.basename(path)
|
||||
title: str = data.get("format", {}).get("tags", {}).get("title", filename)
|
||||
|
||||
# Initialize metadata structure
|
||||
metadata: dict = {
|
||||
"filename": filename,
|
||||
"title": title,
|
||||
"audio_tracks": [],
|
||||
"subtitle_tracks": []
|
||||
}
|
||||
|
||||
# Process streams
|
||||
for stream in data.get("streams", []):
|
||||
codec_type = stream.get("codec_type")
|
||||
tags = stream.get("tags", {})
|
||||
disposition = stream.get("disposition", {})
|
||||
track = {
|
||||
"index": stream.get("index"),
|
||||
"language": tags.get("language", "und"),
|
||||
"name": tags.get("title", ""),
|
||||
"flags": {
|
||||
"default": disposition.get("default", 0) == 1,
|
||||
"original": disposition.get("original", 0) == 1,
|
||||
"commentary": disposition.get("commentary", 0) == 1
|
||||
}
|
||||
}
|
||||
|
||||
if codec_type == "audio":
|
||||
track |= {
|
||||
"channels": stream.get("channels", 0)
|
||||
}
|
||||
track["flags"] |= {
|
||||
"visual_impaired": disposition.get("visual_impaired", 0) == 1
|
||||
}
|
||||
metadata["audio_tracks"].append(track)
|
||||
|
||||
elif codec_type == "subtitle":
|
||||
track["flags"] |= {
|
||||
"forced": disposition.get("forced", 0) == 1,
|
||||
"hearing_impaired": disposition.get("hearing_impaired", 0) == 1
|
||||
}
|
||||
metadata["subtitle_tracks"].append(track)
|
||||
|
||||
else:
|
||||
self.logger.warning(f"Unknown track codec type '{codec_type}'")
|
||||
|
||||
return metadata
|
||||
|
||||
except Exception as e:
|
||||
self.logger.error(f"Error processing {path}: {str(e)}")
|
||||
return None
|
||||
|
||||
def process_file(self, file_path: str, output_dir: str) -> bool:
|
||||
"""
|
||||
Processes a single video file and writes metadata to a JSON file
|
||||
|
||||
:param file_path: Path of the video file
|
||||
:param output_dir: Path of the directory where the output JSON file will be saved
|
||||
:return: True if successful, False otherwise
|
||||
"""
|
||||
|
||||
if not os.path.isfile(file_path):
|
||||
self.logger.error(f"File not found: {file_path}")
|
||||
return False
|
||||
|
||||
if not file_path.lower().endswith(self.SUPPORTED_EXTENSIONS):
|
||||
self.logger.error(f"Unsupported file format: {file_path}")
|
||||
return False
|
||||
|
||||
self.logger.debug(f"Extracting metadata from {os.path.basename(file_path)}")
|
||||
metadata: Optional[dict] = self.analyze_file(file_path)
|
||||
|
||||
if metadata:
|
||||
# Generate output filename based on input file
|
||||
filename = os.path.basename(os.path.splitext(file_path)[0]) + "_metadata.json"
|
||||
|
||||
if output_dir:
|
||||
# Ensure output directory exists
|
||||
os.makedirs(output_dir, exist_ok=True)
|
||||
output_path = os.path.join(output_dir, filename)
|
||||
else:
|
||||
# If no output directory specified, save in the same directory as the input file
|
||||
base_name = os.path.splitext(file_path)[0]
|
||||
output_path = f"{base_name}_metadata.json"
|
||||
|
||||
# Write metadata to JSON file
|
||||
with open(output_path, "w", encoding="utf-8") as f:
|
||||
json.dump(metadata, f, indent=2, ensure_ascii=False)
|
||||
|
||||
self.logger.debug(f"Metadata saved to {output_path}")
|
||||
return True
|
||||
|
||||
return False
|
||||
|
||||
def process_directory(self, directory_path: str, output_dir: Optional[str] = None) -> bool:
|
||||
"""
|
||||
Processes all video files in a directory and writes metadata to a JSON file
|
||||
|
||||
:param directory_path: Path of the directory
|
||||
:param output_dir: Path of the directory where the output JSON file will be saved
|
||||
:return: True if successful, False otherwise
|
||||
"""
|
||||
|
||||
if not os.path.isdir(directory_path):
|
||||
self.logger.error(f"Directory not found: {directory_path}")
|
||||
return False
|
||||
|
||||
all_metadata: dict[str, dict] = {}
|
||||
file_count: int = 0
|
||||
|
||||
for root, _, files in os.walk(directory_path):
|
||||
for file in files:
|
||||
if file.lower().endswith(self.SUPPORTED_EXTENSIONS):
|
||||
file_path: str = os.path.join(root, file)
|
||||
self.logger.debug(f"Extracting metadata from {file}")
|
||||
metadata: Optional[dict] = self.analyze_file(file_path)
|
||||
|
||||
if metadata:
|
||||
# Use relative path as key
|
||||
rel_path: str = os.path.relpath(file_path, directory_path)
|
||||
all_metadata[rel_path] = metadata
|
||||
file_count += 1
|
||||
|
||||
if file_count == 0:
|
||||
self.logger.error(f"No supported video files found in {directory_path}")
|
||||
return False
|
||||
|
||||
# Generate output filename based on directory name
|
||||
dir_name: str = os.path.basename(os.path.normpath(directory_path))
|
||||
filename: str = f"{dir_name}_metadata.json"
|
||||
|
||||
if output_dir is not None:
|
||||
# Ensure output directory exists
|
||||
os.makedirs(output_dir, exist_ok=True)
|
||||
output_path = os.path.join(output_dir, filename)
|
||||
else:
|
||||
# If no output directory specified, save in the current directory
|
||||
output_path = filename
|
||||
|
||||
# Write all metadata to a single JSON file
|
||||
with open(output_path, "w", encoding="utf-8") as f:
|
||||
json.dump(all_metadata, f, indent=2, ensure_ascii=False)
|
||||
|
||||
self.logger.debug(f"Metadata for {file_count} files saved to {output_path}")
|
||||
return True
|
80
src/server.py
Executable file → Normal file
80
src/server.py
Executable file → Normal file
@ -1,8 +1,5 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
@ -11,6 +8,7 @@ import time
|
||||
from functools import partial
|
||||
from http import HTTPStatus
|
||||
from http.server import SimpleHTTPRequestHandler
|
||||
from logging import Logger
|
||||
from typing import Optional
|
||||
from urllib.parse import parse_qs, unquote, urlparse
|
||||
|
||||
@ -19,7 +17,6 @@ from watchdog.events import (FileClosedEvent, FileDeletedEvent, FileMovedEvent,
|
||||
from watchdog.observers import Observer
|
||||
from watchdog.observers.api import BaseObserver
|
||||
|
||||
from src.env_default import EnvDefault
|
||||
from src.file_handlers import ToConvertFileHandler, MetadataFileHandler
|
||||
|
||||
|
||||
@ -39,7 +36,7 @@ class HTTPHandler(SimpleHTTPRequestHandler):
|
||||
self.data: Optional[dict|list] = None
|
||||
|
||||
def log_message(self, format, *args):
|
||||
logging.info("%s - %s" % (
|
||||
self.server_.logger.info("%s - %s" % (
|
||||
self.client_address[0],
|
||||
format % args
|
||||
))
|
||||
@ -131,6 +128,7 @@ class MeliesServer(FileSystemEventHandler):
|
||||
max_payload_size: int):
|
||||
|
||||
super().__init__()
|
||||
self.logger: Logger = logging.getLogger("MeliesServer")
|
||||
|
||||
self.port: int = port
|
||||
self.to_convert_dir: str = to_convert_dir
|
||||
@ -148,12 +146,6 @@ class MeliesServer(FileSystemEventHandler):
|
||||
self.to_convert_files: ToConvertFileHandler = ToConvertFileHandler(self.to_convert_dir)
|
||||
self.metadata_files: MetadataFileHandler = MetadataFileHandler(self.metadata_dir)
|
||||
|
||||
logging.basicConfig(
|
||||
level=logging.INFO,
|
||||
format="%(asctime)s [%(levelname)s] %(message)s",
|
||||
datefmt=r"%Y-%m-%d %H:%M:%S"
|
||||
)
|
||||
|
||||
self.httpd: Optional[socketserver.TCPServer] = None
|
||||
self.observer: BaseObserver = Observer()
|
||||
self.observer.schedule(
|
||||
@ -170,7 +162,7 @@ class MeliesServer(FileSystemEventHandler):
|
||||
self.observer.start()
|
||||
try:
|
||||
with socketserver.TCPServer(("", self.port), self.http_handler_cls) as self.httpd:
|
||||
logging.info(f"Serving on port {self.port}")
|
||||
self.logger.info(f"Serving on port {self.port}")
|
||||
self.httpd.serve_forever()
|
||||
except KeyboardInterrupt:
|
||||
pass
|
||||
@ -181,17 +173,17 @@ class MeliesServer(FileSystemEventHandler):
|
||||
self.observer.join()
|
||||
|
||||
def on_deleted(self, event: FileDeletedEvent):
|
||||
logging.info(f"Converted media deleted: {event.src_path}")
|
||||
self.logger.info(f"Converted media deleted: {event.src_path}")
|
||||
self.delete_metadata(event.src_path)
|
||||
return super().on_deleted(event)
|
||||
|
||||
def on_moved(self, event: FileMovedEvent):
|
||||
logging.info(f"Converted media moved: {event.src_path} -> {event.dest_path}")
|
||||
self.logger.info(f"Converted media moved: {event.src_path} -> {event.dest_path}")
|
||||
self.rename_metadata(event.src_path, event.dest_path)
|
||||
return super().on_moved(event)
|
||||
|
||||
def on_closed(self, event: FileClosedEvent):
|
||||
logging.info(f"Converted media created or modified: {event.src_path}")
|
||||
self.logger.info(f"Converted media created or modified: {event.src_path}")
|
||||
self.extract_metadata(event.src_path)
|
||||
return super().on_closed(event)
|
||||
|
||||
@ -203,61 +195,3 @@ class MeliesServer(FileSystemEventHandler):
|
||||
|
||||
def delete_metadata(self, path: str):
|
||||
pass
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(
|
||||
description="Starts the Melies server",
|
||||
formatter_class=argparse.RawTextHelpFormatter
|
||||
)
|
||||
parser.add_argument(
|
||||
"-p", "--port",
|
||||
action=EnvDefault,
|
||||
envvar="MELIES_PORT",
|
||||
default=8000,
|
||||
type=int,
|
||||
help="Port on which the server listens"
|
||||
)
|
||||
parser.add_argument(
|
||||
"--max-payload-size",
|
||||
action=EnvDefault,
|
||||
envvar="MELIES_MAX_PAYLOAD_SIZE",
|
||||
default=1e6,
|
||||
type=int,
|
||||
help="Maximum POST payload size in bytes that the server accepts"
|
||||
)
|
||||
parser.add_argument(
|
||||
"--to-convert-dir",
|
||||
action=EnvDefault,
|
||||
envvar="MELIES_TO_CONVERT_DIR",
|
||||
default="to_convert",
|
||||
help="Path to the directory containing medias to convert"
|
||||
)
|
||||
parser.add_argument(
|
||||
"--converted-dir",
|
||||
action=EnvDefault,
|
||||
envvar="MELIES_CONVERTED_DIR",
|
||||
default="converted",
|
||||
help="Path to the directory containing converted medias"
|
||||
)
|
||||
parser.add_argument(
|
||||
"--metadata-dir",
|
||||
action=EnvDefault,
|
||||
envvar="MELIES_METADATA_DIR",
|
||||
default="metadata",
|
||||
help="Path to the directory containing metadata files"
|
||||
)
|
||||
args = parser.parse_args()
|
||||
|
||||
server = MeliesServer(
|
||||
args.port,
|
||||
args.to_convert_dir,
|
||||
args.converted_dir,
|
||||
args.metadata_dir,
|
||||
args.max_payload_size
|
||||
)
|
||||
server.start()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
|
Loading…
x
Reference in New Issue
Block a user