Melies/get_metadata.py

183 lines
6.3 KiB
Python
Executable File

#!/usr/bin/env python3
import argparse
import os
import subprocess
import json
import sys
SUPPORTED_EXTENSIONS = (".mp4", ".mkv", ".mov", ".avi")
def get_video_metadata(file_path):
"""
Extract metadata from a video file using ffprobe.
Args:
file_path (str): Path to the video file
Returns:
dict: Metadata information
"""
# Get general file info
cmd = [
"ffprobe", "-v", "quiet", "-print_format", "json",
"-show_format", "-show_streams", file_path
]
try:
result = subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)
if result.returncode != 0:
print(f"❌ Error processing {file_path}: {result.stderr}")
return None
data = json.loads(result.stdout)
# Extract filename and title
filename = os.path.basename(file_path)
title = data.get("format", {}).get("tags", {}).get("title", filename)
# Initialize metadata structure
metadata = {
"filename": filename,
"title": title,
"audio_tracks": [],
"subtitle_tracks": []
}
# Process streams
for stream in data.get("streams", []):
codec_type = stream.get("codec_type")
if codec_type == "audio":
track = {
"index": stream.get("index"),
"language": stream.get("tags", {}).get("language", "und"),
"name": stream.get("tags", {}).get("title", ""),
"channels": stream.get("channels", 0),
"flags": {
"default": stream.get("disposition", {}).get("default", 0) == 1,
"visual_impaired": stream.get("disposition", {}).get("visual_impaired", 0) == 1,
"original": stream.get("disposition", {}).get("original", 0) == 1,
"commentary": stream.get("disposition", {}).get("comment", 0) == 1
}
}
metadata["audio_tracks"].append(track)
elif codec_type == "subtitle":
track = {
"index": stream.get("index"),
"language": stream.get("tags", {}).get("language", "und"),
"name": stream.get("tags", {}).get("title", ""),
"flags": {
"default": stream.get("disposition", {}).get("default", 0) == 1,
"forced": stream.get("disposition", {}).get("forced", 0) == 1,
"hearing_impaired": stream.get("disposition", {}).get("hearing_impaired", 0) == 1,
"original": stream.get("disposition", {}).get("original", 0) == 1,
"commentary": stream.get("disposition", {}).get("comment", 0) == 1
}
}
metadata["subtitle_tracks"].append(track)
return metadata
except Exception as e:
print(f"❌ Error processing {file_path}: {str(e)}")
return None
def process_file(file_path, output_file=None):
"""
Process a single video file and write metadata to JSON.
Args:
file_path (str): Path to the video file
output_file (str, optional): Path to output JSON file
"""
if not os.path.isfile(file_path):
print(f"❌ File not found: {file_path}")
return False
if not file_path.lower().endswith(SUPPORTED_EXTENSIONS):
print(f"❌ Unsupported file format: {file_path}")
return False
print(f"📊 Extracting metadata from {os.path.basename(file_path)}")
metadata = get_video_metadata(file_path)
if metadata:
if not output_file:
# Generate output filename based on input file
base_name = os.path.splitext(file_path)[0]
output_file = f"{base_name}_metadata.json"
# Write metadata to JSON file
with open(output_file, 'w', encoding='utf-8') as f:
json.dump(metadata, f, indent=2, ensure_ascii=False)
print(f"✅ Metadata saved to {output_file}")
return True
return False
def process_directory(directory_path, output_file=None):
"""
Process all video files in a directory and write metadata to JSON.
Args:
directory_path (str): Path to the directory
output_file (str, optional): Path to output JSON file
"""
if not os.path.isdir(directory_path):
print(f"❌ Directory not found: {directory_path}")
return False
all_metadata = {}
file_count = 0
for root, _, files in os.walk(directory_path):
for file in files:
if file.lower().endswith(SUPPORTED_EXTENSIONS):
file_path = os.path.join(root, file)
print(f"📊 Extracting metadata from {file}")
metadata = get_video_metadata(file_path)
if metadata:
# Use relative path as key
rel_path = os.path.relpath(file_path, directory_path)
all_metadata[rel_path] = metadata
file_count += 1
if file_count == 0:
print(f"❌ No supported video files found in {directory_path}")
return False
if not output_file:
# Generate output filename based on directory name
dir_name = os.path.basename(os.path.normpath(directory_path))
output_file = f"{dir_name}_metadata.json"
# Write all metadata to a single JSON file
with open(output_file, 'w', encoding='utf-8') as f:
json.dump(all_metadata, f, indent=2, ensure_ascii=False)
print(f"✅ Metadata for {file_count} files saved to {output_file}")
return True
def main():
parser = argparse.ArgumentParser(description="Extract metadata from video files and save as JSON.")
parser.add_argument("input", help="Path to input video file or directory")
parser.add_argument("-o", "--output", help="Path to output JSON file")
args = parser.parse_args()
input_path = args.input
output_file = args.output
if os.path.isfile(input_path):
process_file(input_path, output_file)
elif os.path.isdir(input_path):
process_directory(input_path, output_file)
else:
print(f"❌ Path not found: {input_path}")
sys.exit(1)
if __name__ == "__main__":
main()