-
Notifications
You must be signed in to change notification settings - Fork 0
/
chapterize.py
183 lines (142 loc) · 7.21 KB
/
chapterize.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
#!/usr/bin/python3
import argparse
import glob
import os
import uuid
from typing import NamedTuple
NANOSECONDS_IN_ONE_SECOND = 1e9
BYTES_IN_ONE_MEGABYTE = 1e6
class FileMetadata(NamedTuple):
filename: str
chapter_start_time_ns: int
chapter_end_time_ns: int
def get_nanoseconds_for_file(file_name):
if args.interactive:
print(f"Getting nanoseconds for {file_name}")
# input("Press any key to continue...")
command = f"ffprobe -i \"{file_name}\" -show_entries format=duration"
raw_ffprobe_output = os.popen(command).readlines()
if args.interactive:
print(f"Nanosecond output for {file_name}")
print(raw_ffprobe_output)
# input("Press any key to continue...")
time_seconds = float(raw_ffprobe_output[1].rstrip().split("=")[1])
return int(time_seconds * NANOSECONDS_IN_ONE_SECOND)
def concat_using_ffmpeg_filters(input_audio_files, output_filename, temp_metadata, audio_encoder="aac"):
# We want -i file1.mp3 -i file2.mp3 -i file3.mp3 ... -i fileN.mp3
ffmpeg_input_audio_files = ' -i \"' + ('\" -i \"'.join(input_audio_files)) + '\"'
# We want [0:0][1:0][2:0][3:0]... for each input file
# For [x:y], x is the index of the input, and y is the number of video channels we want to encode.
ffmpeg_filter_audio_channels = ''.join([f"[{_}:0]" for _ in range(len(input_audio_files))])
return 'ffmpeg' + \
ffmpeg_input_audio_files + \
' -i ' + temp_metadata + \
f' -filter_complex \'{ffmpeg_filter_audio_channels}concat=n={len(input_audio_files)}:v=0:a=1[out]\'' + \
' -map \"[out]\"' + \
f' -map_metadata {len(input_audio_files)}' + \
f' -c:a {audio_encoder} -vn' + \
' -aac_coder fast ' + \
output_filename
def get_element_from_metadata(element_name, metadata_lines, override=None):
if override is not None:
return override
derived_line = list(filter(lambda x: x.startswith(element_name), metadata_lines))
if len(derived_line) != 1:
print(f"Cannot find element {element_name} in metadata, reading 'unknown' instead")
return "unknown"
else:
return derived_line[0].split("=")[1].rstrip()
def get_metadata_lines_from_file(filename):
get_metadata_command = f'ffmpeg -i "{filename}" -f ffmetadata -v quiet -'
return os.popen(get_metadata_command).readlines()
def get_chapter_name(metadata: FileMetadata, i: int, keep_chapter_names: bool):
if keep_chapter_names:
lines = get_metadata_lines_from_file(metadata.filename)
expected_title = get_element_from_metadata("title", lines)
if expected_title == "unknown":
return f"Chapter {i + 1}"
else:
return expected_title
else:
return f"Chapter {i + 1}"
if __name__ == "__main__":
arg_parser = argparse.ArgumentParser(description="Turn a set of audio files into a .m4b audiobook file")
arg_parser.add_argument("output_filename", help="filename of the final m4b file, extension included")
arg_parser.add_argument("input_files", help="glob for the input audio files, i.e. './test/*.mp3'")
arg_parser.add_argument("--encoder", help="override default (aac) encoder")
arg_parser.add_argument("--author", help="override the author metadata")
arg_parser.add_argument("--title", help="override the title metadata")
arg_parser.add_argument("--cover_image", help="path to a jpg/png to embed as cover art")
arg_parser.add_argument("--interactive", action="store_true",
help="more verbosity and requires user intervention") # TODO: Implement, more.
arg_parser.add_argument("--keep_chapter_names", action="store_true",
help="use title metadata from each file for chapter names")
args = arg_parser.parse_args()
input_audio_glob = args.input_files
if args.cover_image:
output_filename = "temp-merged.m4b"
else:
output_filename = args.output_filename
encoder = args.encoder if args.encoder is not None else "aac"
input_audio_filenames = glob.glob(input_audio_glob)
input_audio_filenames.sort()
if args.interactive:
print("Files to be merged:")
[print(_) for _ in input_audio_filenames]
input("Press any key to continue...")
if len(input_audio_filenames) == 0:
print(f"No matching audio files were found")
quit(1)
files_metadata = []
cumulative_time = 0 # In nanoseconds
for filename in input_audio_filenames:
chapter_end_time = cumulative_time + get_nanoseconds_for_file(filename)
files_metadata.append(FileMetadata(filename, cumulative_time, chapter_end_time))
cumulative_time = chapter_end_time
initial_metadata = get_metadata_lines_from_file(input_audio_filenames[0])
# Setup global metadata
metadata = ";FFMETADATA1\n"
metadata += f"title={get_element_from_metadata('title', initial_metadata, args.title)}\n"
metadata += f"album={get_element_from_metadata('album', initial_metadata, args.title)}\n"
metadata += f"artist={get_element_from_metadata('artist', initial_metadata, args.author)}\n"
# Add all chapter markers to metadata
# Timebase isn't set, so is defaulted to nanoseconds
for i, file_metadata in enumerate(files_metadata):
metadata += f'[CHAPTER]\nSTART={file_metadata.chapter_start_time_ns}\nEND={file_metadata.chapter_end_time_ns}\ntitle={get_chapter_name(file_metadata, i, args.keep_chapter_names)}\n'
if args.interactive:
print("=" * 10 + "METADATA" + "=" * 10)
print(metadata)
print("=" * 10 + "END METADATA" + "=" * 10)
input("Press any key to continue...")
# Create a temp file to store new metadata
new_metadata_file_location = "metadata-" + str(uuid.uuid4()) + ".txt"
# Write all the custom metadata to the new metadata file
new_metadata_file = open(new_metadata_file_location, 'w+')
new_metadata_file.write(metadata)
new_metadata_file.close()
command = concat_using_ffmpeg_filters(input_audio_filenames, output_filename, new_metadata_file_location, encoder)
if args.interactive:
print("Command to be run:")
print(command)
input("Press any key to continue...")
os.system(command)
os.system('rm -fr ' + new_metadata_file_location)
if args.cover_image:
# Rerender the file with cover art
print("todo")
cover_command = f"ffmpeg -i \"{output_filename}\" -i \"{args.cover_image}\" -map 0 -map 1 -c copy -dn -disposition:v:0 attached_pic \"cover-{output_filename}\""
if args.interactive:
print("Will attempt cover: " + args.cover_image)
print("With command " + cover_command)
print(cover_command)
input("Press any key to continue...")
os.system(cover_command)
os.system(f"rm -f \"{output_filename}\"")
os.system(f"mv \"cover-{output_filename}\" \"{args.output_filename}\"")
output_filename = args.output_filename
output_file_size_megabytes = os.path.getsize(output_filename) / BYTES_IN_ONE_MEGABYTE
print(
f"Audiobook file successfully created at {output_filename}, " +
f"with {len(input_audio_filenames)} chapters, " +
f"with a size of {output_file_size_megabytes:.2f}MB"
)