diff --git a/Makefile b/Makefile index a60cf64..afaf8f0 100644 --- a/Makefile +++ b/Makefile @@ -5,6 +5,7 @@ OBJ=\ $(BUILDDIR)/wave.o \ $(BUILDDIR)/ogg.o \ $(BUILDDIR)/mpg123.o \ + $(BUILDDIR)/mp4.o \ $(BUILDDIR)/id3.o \ $(BUILDDIR)/midi.o \ $(BUILDDIR)/mod.o \ @@ -21,7 +22,7 @@ all: $(BIN) $(BIN): $(OBJ) $(CC) $(CFLAGS) $(OBJ) -o $@ -$(BUILDDIR)/audioextract.o: audioextract.c audioextract.h ogg.h wave.h mpg123.h id3.h midi.h mod.h s3m.h it.h +$(BUILDDIR)/audioextract.o: audioextract.c audioextract.h ogg.h wave.h mpg123.h mp4.h id3.h midi.h mod.h s3m.h it.h $(CC) $(CFLAGS) $< -o $@ -c $(BUILDDIR)/wave.o: wave.c audioextract.h wave.h @@ -33,6 +34,9 @@ $(BUILDDIR)/ogg.o: ogg.c audioextract.h ogg.h $(BUILDDIR)/mpg123.o: mpg123.c audioextract.h mpg123.h $(CC) $(CFLAGS) $< -o $@ -c +$(BUILDDIR)/mp4.o: mp4.c audioextract.h mp4.h + $(CC) $(CFLAGS) $< -o $@ -c + $(BUILDDIR)/id3.o: id3.c audioextract.h id3.h $(CC) $(CFLAGS) $< -o $@ -c diff --git a/README.md b/README.md index a571c19..c1af9c8 100644 --- a/README.md +++ b/README.md @@ -42,6 +42,7 @@ small, so using the `--min-size` one can hopefully extract only real MPEG files. midi MIDI files mod FastTracker files mpg123 any MPEG layer 1/2/3 files (e.g. MP3) + mp4 MP4 files ogg Ogg files (Vorbis, FLAC, Opus, Theora, etc.) riff little-endian (Windows) wave files s3m ScreamTracker III files diff --git a/audioextract.c b/audioextract.c index 98a3309..9a0d9bd 100644 --- a/audioextract.c +++ b/audioextract.c @@ -15,6 +15,7 @@ #include "wave.h" #include "ogg.h" #include "mpg123.h" +#include "mp4.h" #include "id3.h" #include "midi.h" #include "mod.h" @@ -22,22 +23,23 @@ #include "it.h" enum fileformat { - NONE = 0, - OGG = 1, - RIFF = 2, - AIFF = 4, - MPG123 = 8, - ID3v2 = 16, - MIDI = 32, - MOD = 64, - S3M = 128, - IT = 256, + NONE = 0, + OGG = 1, + RIFF = 2, + AIFF = 4, + MPG123 = 8, + ID3v2 = 16, + MP4 = 32, + MIDI = 64, + MOD = 128, + S3M = 256, + IT = 512, // TODO: -// XM = 512, +// XM = 1024, }; -#define ALL_FORMATS (OGG | RIFF | AIFF | MPG123 | ID3v2 | MIDI | MOD | S3M | IT) -#define DEFAULT_FORMATS (OGG | RIFF | AIFF | ID3v2 | MIDI | S3M | IT) +#define ALL_FORMATS (OGG | RIFF | AIFF | MPG123 | MP4 | ID3v2 | MIDI | MOD | S3M | IT) +#define DEFAULT_FORMATS (OGG | RIFF | AIFF | MP4 | ID3v2 | MIDI | S3M | IT) #define TRACKER_FORMATS (MOD | S3M | IT) int usage(int argc, char **argv) @@ -62,6 +64,7 @@ int usage(int argc, char **argv) " midi MIDI files\n" " mod FastTracker files\n" " mpg123 any MPEG layer 1/2/3 files (e.g. MP3)\n" + " mp4 MP4 files\n" " ogg Ogg files (Vorbis, FLAC, Opus, Theora, etc.)\n" " riff little-endian (Windows) wave files\n" " s3m ScreamTracker III files\n" @@ -120,11 +123,7 @@ int probalby_mod_text(const unsigned char *str, size_t length) const unsigned char *findmagic(const unsigned char *start, const unsigned char *end, int formats, enum fileformat *format) { - if ((intptr_t)end < 4) - return NULL; - end -= 4; - - for (; start < end; ++ start) + for (size_t length = end - start; length >= 4; ++ start, -- length) { uint32_t magic = MAGIC(start); @@ -158,29 +157,29 @@ const unsigned char *findmagic(const unsigned char *start, const unsigned char * *format = IT; return start; } + else if (formats & MP4 && length > MP4_HEADER_SIZE && MAGIC(start + MP4_MAGIC_OFFSET) == MP4_MAGIC) + { + *format = MP4; + return start; + } else if (formats & MPG123 && IS_MPG123_MAGIC(start)) { *format = MPG123; return start; } - else + else if (formats & S3M && length > S3M_MAGIC_OFFSET + 4 && MAGIC(start + S3M_MAGIC_OFFSET) == S3M_MAGIC) { - size_t length = (size_t)(end - start); - - if (formats & S3M && length >= S3M_MAGIC_OFFSET && MAGIC(start + S3M_MAGIC_OFFSET) == S3M_MAGIC) + *format = S3M; + return start; + } + else if (formats & MOD && length > MOD_MAGIC_OFFSET + 4) + { + const unsigned char *modmagic = start + MOD_MAGIC_OFFSET; + if (IS_MOD_MAGIC(modmagic)) { - *format = S3M; + *format = MOD; return start; } - else if (formats & MOD && length >= MOD_MAGIC_OFFSET) - { - const unsigned char *modmagic = start + MOD_MAGIC_OFFSET; - if (IS_MOD_MAGIC(modmagic)) - { - *format = MOD; - return start; - } - } } } @@ -254,6 +253,7 @@ int extract(const char *filepath, const char *outdir, size_t minsize, size_t max size_t namelen = strlen(outdir) + strlen(filename) + 24; struct mpg123_info mpg123; + struct mp4_info mp4; size_t count = 0; // e.g. for tracks count in midi const unsigned char *audio_start = NULL; @@ -382,6 +382,15 @@ int extract(const char *filepath, const char *outdir, size_t minsize, size_t max else ++ ptr; break; + case MP4: + if (mp4_isfile(ptr, end, &mp4)) + { + WRITE_FILE(ptr, mp4.length, mp4.ext); + ptr += mp4.length; + } + else ++ ptr; + break; + case MIDI: if (midi_isheader(ptr, end, &length, &count)) { @@ -495,6 +504,10 @@ int parse_formats(const char *formats) { mask = MPG123; } + else if (strncasecmp("mp4", start, len) == 0) + { + mask = MP4; + } else if (strncasecmp("id3v2", start, len) == 0) { mask = ID3v2; diff --git a/mp4.c b/mp4.c new file mode 100644 index 0000000..4dd21c0 --- /dev/null +++ b/mp4.c @@ -0,0 +1,190 @@ +#include "mp4.h" + +struct ftype { + const char *brand; + const char *ext; +}; + +struct ftype mp4_ftypes[] = { + { "3g2a", "3g2" }, + { "3g2b", "3g2" }, + { "3g2c", "3g2" }, + { "3ge6", "3gp" }, + { "3ge7", "3gp" }, + { "3gg6", "3gp" }, + { "3gp1", "3gp" }, + { "3gp2", "3gp" }, + { "3gp3", "3gp" }, + { "3gp4", "3gp" }, + { "3gp5", "3gp" }, + { "3gp6", "3gp" }, + { "3gp6", "3gp" }, + { "3gp6", "3gp" }, + { "3gs7", "3gp" }, + { "avc1", 0 }, + { "CAEP", 0 }, + { "caqv", 0 }, + { "CDes", 0 }, + { "da0a", 0 }, + { "da0b", 0 }, + { "da1a", 0 }, + { "da1b", 0 }, + { "da2a", 0 }, + { "da2b", 0 }, + { "da3a", 0 }, + { "da3b", 0 }, + { "dmb1", 0 }, + { "dmpf", 0 }, + { "drc1", "mp4" }, + { "dv1a", 0 }, + { "dv1b", 0 }, + { "dv2a", 0 }, + { "dv2b", 0 }, + { "dv3a", 0 }, + { "dv3b", 0 }, + { "dvr1", "dvb" }, + { "dvt1", "dvb" }, + { "F4V ", "f4v" }, + { "F4P ", "f4p" }, + { "F4A ", "f4a" }, + { "F4B ", "f4b" }, + { "isc2", 0 }, + { "iso2", "mp4" }, + { "isom", "mp4" }, + { "JP2 ", "jp2" }, + { "JP20", "jp2" }, + { "jpm ", "jpm" }, + { "jpx ", "jpx" }, + { "KDDI", "3gp" }, + { "M4A ", "m4a" }, + { "M4B ", "m4b" }, + { "M4P ", "m4p" }, + { "M4V ", "m4v" }, + { "M4VH", "m4v" }, + { "M4VP", "m4v" }, + { "mj2s", "jp2" }, + { "mjp2", "jp2" }, + { "mmp4", "mp4" }, + { "mp21", 0 }, + { "mp41", "mp4" }, + { "mp42", "mp4" }, + { "mp71", "mp4" }, + { "MPPI", 0 }, + { "mqt ", "mqv" }, + { "MSNV", "mp4" }, + { "NDAS", 0 }, + { "NDSC", "mp4" }, + { "NDSH", "mp4" }, + { "NDSM", "mp4" }, + { "NDSP", "mp4" }, + { "NDSS", "mp4" }, + { "NDXC", "mp4" }, + { "NDXH", "mp4" }, + { "NDXM", "mp4" }, + { "NDXP", "mp4" }, + { "NDXS", "mp4" }, + { "odcf", 0 }, + { "opf2", 0 }, + { "opx2", 0 }, + { "pana", 0 }, + { "qt ", "mov" }, + { "ROSS", 0 }, + { "sdv ", 0 }, + { "ssc1", 0 }, + { "ssc2", 0 }, + { 0 , 0 } +}; + +const char *mp4_atom_types[] = { + "ftyp", "moov", "mdat", "free", "skip", "wide", "pnot", 0 +}; + +struct mp4_atom_head { + uint32_t size; + uint32_t type; +}; + +struct mp4_type_atom { + uint32_t size; + uint32_t type; + uint32_t major_brand; + uint32_t minor_version; + uint32_t compatible_brands[]; +}; + +const char *mp4_find_ext(uint32_t brand) +{ + for (struct ftype *ftype = mp4_ftypes; ftype->brand; ++ ftype) + { + if (MAGIC(ftype->brand) == brand) + { + if (!ftype->ext) return "mp4"; + return ftype->ext; + } + } + return NULL; +} + +int mp4_isatom_type(uint32_t type) +{ + for (size_t i = 0; mp4_atom_types[i]; ++ i) + { + if (MAGIC(mp4_atom_types[i]) == type) + { + return 1; + } + } + return 0; +} + +int mp4_isfile(const unsigned char *start, const unsigned char *end, struct mp4_info *info) +{ + const char *ext = 0; + size_t input_len = (size_t)(end - start); + const struct mp4_type_atom *type = (struct mp4_type_atom *)start; + + if (input_len < MP4_HEADER_SIZE || type->type != MP4_MAGIC) + return 0; + + size_t length = be32toh(type->size); + + if (length < MP4_HEADER_SIZE) + return 0; + + ext = mp4_find_ext(type->major_brand); + + if (!ext) + { + for (const uint32_t *brand = type->compatible_brands, + *end = (const uint32_t*)(start + length); + brand < end; ++ brand) + { + if (*brand) + { + ext = mp4_find_ext(*brand); + if (ext) break; + } + } + } + + if (!ext) + return 0; + + while (length + 8 < input_len) + { + const struct mp4_atom_head *head = (const struct mp4_atom_head *)(start + length); + size_t size = be32toh(head->size); + if (size < 8 || !mp4_isatom_type(head->type) || (size_t)(-1) - size < length) + break; + length += size; + if (length > input_len) length = input_len; + } + + if (info) + { + info->length = length; + info->ext = ext; + } + + return 1; +} diff --git a/mp4.h b/mp4.h new file mode 100644 index 0000000..29333d7 --- /dev/null +++ b/mp4.h @@ -0,0 +1,17 @@ +#ifndef AUDIOEXTRACT_MP4_H__ +#define AUDIOEXTRACT_MP4_H__ + +#include "audioextract.h" + +#define MP4_MAGIC MAGIC("ftyp") +#define MP4_MAGIC_OFFSET 4 +#define MP4_HEADER_SIZE 16 + +struct mp4_info { + size_t length; + const char *ext; +}; + +int mp4_isfile(const unsigned char *start, const unsigned char *end, struct mp4_info *info); + +#endif /* AUDIOEXTRACT_MP4_H__ */