From b2f1bae729ae96aa1e0daa2ce51bc0ad33fa311f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mathias=20Panzenb=C3=B6ck?= Date: Fri, 28 Dec 2012 18:23:27 +0100 Subject: [PATCH] added midi support --- Makefile | 7 ++- README.md | 10 +++- audioextract.c | 138 +++++++++++++++++++++++++++---------------------- audioextract.h | 18 ++++++- midi.c | 44 ++++++++++++++++ midi.h | 28 ++++++++++ ogg.h | 2 +- wave.c | 16 ------ 8 files changed, 178 insertions(+), 85 deletions(-) create mode 100644 midi.c create mode 100644 midi.h diff --git a/Makefile b/Makefile index 37a12d3..6a4f990 100644 --- a/Makefile +++ b/Makefile @@ -1,6 +1,6 @@ PREFIX=/usr/local BUILDDIR=build -OBJ=$(BUILDDIR)/audioextract.o $(BUILDDIR)/wave.o $(BUILDDIR)/ogg.o $(BUILDDIR)/mpeg.o $(BUILDDIR)/id3.o +OBJ=$(BUILDDIR)/audioextract.o $(BUILDDIR)/wave.o $(BUILDDIR)/ogg.o $(BUILDDIR)/mpeg.o $(BUILDDIR)/id3.o $(BUILDDIR)/midi.o CC=gcc CFLAGS=-Wall -std=gnu99 -O2 -fmessage-length=0 -g BIN=$(BUILDDIR)/audioextract @@ -12,7 +12,7 @@ all: $(BIN) $(BIN): $(OBJ) $(CC) $(CFLAGS) $(OBJ) -o $@ -$(BUILDDIR)/audioextract.o: audioextract.c audioextract.h ogg.h wave.h mpeg.h id3.h +$(BUILDDIR)/audioextract.o: audioextract.c audioextract.h ogg.h wave.h mpeg.h id3.h midi.h $(CC) $(CFLAGS) $< -o $@ -c $(BUILDDIR)/wave.o: wave.c audioextract.h wave.h @@ -27,6 +27,9 @@ $(BUILDDIR)/mpeg.o: mpeg.c audioextract.h mpeg.h $(BUILDDIR)/id3.o: id3.c audioextract.h id3.h $(CC) $(CFLAGS) $< -o $@ -c +$(BUILDDIR)/midi.o: midi.c audioextract.h midi.h + $(CC) $(CFLAGS) $< -o $@ -c + install: all install -s -D $(BIN) "$(PREFIX)/bin/audioextract" diff --git a/README.md b/README.md index 6857642..a1ca29f 100644 --- a/README.md +++ b/README.md @@ -3,6 +3,12 @@ Audioextract Extract audio files that are embedded within other files. +Setup +----- + + make + make install PREFIX=/usr + Usage ----- @@ -29,13 +35,13 @@ using the `--min-size` one can hopefully extract only real MPEG files. -f, --formats=FORMATS Comma separated list of formats (file magics) to extract. Supported formats: all all supported formats - default the default set of formats (AIFF, ID3v2, Ogg, RIFF) + default the default set of formats (AIFF, ID3v2, Ogg, RIFF, MIDI) aiff big-endian (Apple) wave files id3v2 MPEG files with ID3v2 tags at the start + midi MIDI files mpeg any MPEG files (e.g. MP3) ogg Ogg files (Vorbis, FLAC, Opus, Theora, etc.) riff little-endian (Windows) wave files - wav alias for riff wave both RIFF and AIFF wave files WARNING: Because MPEG files do not have a nice file magic, using diff --git a/audioextract.c b/audioextract.c index 2bf951e..db3430b 100644 --- a/audioextract.c +++ b/audioextract.c @@ -16,16 +16,21 @@ #include "ogg.h" #include "mpeg.h" #include "id3.h" +#include "midi.h" enum fileformat { - NONE = 0, - OGG = 1, - RIFF = 2, - AIFF = 4, - MPEG = 8, - ID3v2 = 16 - - /* TODO: AAC and MKV/WebM? */ + NONE = 0, + OGG = 1, + RIFF = 2, + AIFF = 4, + MPEG = 8, + ID3v2 = 16, + MIDI = 32, +// TODO: +// MOD = 64, +// S3M = 128, +// IT = 256, +// XM = 512, }; int usage(int argc, char **argv) @@ -43,13 +48,13 @@ int usage(int argc, char **argv) " -f, --formats=FORMATS Comma separated list of formats (file magics) to extract.\n" " Supported formats:\n" " all all supported formats\n" - " default the default set of formats (AIFF, ID3v2, Ogg, RIFF)\n" + " default the default set of formats (AIFF, ID3v2, Ogg, RIFF, MIDI)\n" " aiff big-endian (Apple) wave files\n" " id3v2 MPEG files with ID3v2 tags at the start\n" + " midi MIDI files\n" " mpeg any MPEG files (e.g. MP3)\n" " ogg Ogg files (Vorbis, FLAC, Opus, Theora, etc.)\n" " riff little-endian (Windows) wave files\n" - " wav alias for riff\n" " wave both RIFF and AIFF wave files\n" "\n" " WARNING: Because MPEG files do not have a nice file magic, using\n" @@ -107,6 +112,11 @@ const unsigned char *findmagic(const unsigned char *start, const unsigned char * *format = AIFF; return start; } + else if (formats & MIDI && magic == MIDI_MAGIC) + { + *format = MIDI; + return start; + } else if (formats & ID3v2 && IS_ID3v2_MAGIC(start)) { *format = ID3v2; @@ -189,6 +199,7 @@ int extract(const char *filepath, const char *outdir, size_t minsize, size_t max size_t namelen = strlen(outdir) + strlen(filename) + 24; struct mpeg_info mpeg; + size_t count = 0; // e.g. for tracks count in midi const unsigned char *audio_start = NULL; if (!quiet) @@ -245,11 +256,11 @@ int extract(const char *filepath, const char *outdir, size_t minsize, size_t max do { ptr += length; - } while (ptr < end && ogg_ispage(ptr, end, &length)); + } while (ogg_ispage(ptr, end, &length)); WRITE_FILE(audio_start, ptr - audio_start, "ogg"); - continue; } + else ++ ptr; break; case RIFF: @@ -257,8 +268,8 @@ int extract(const char *filepath, const char *outdir, size_t minsize, size_t max { WRITE_FILE(ptr, length, "wav"); ptr += length; - continue; } + else ++ ptr; break; case AIFF: @@ -266,8 +277,8 @@ int extract(const char *filepath, const char *outdir, size_t minsize, size_t max { WRITE_FILE(ptr, length, "aif"); ptr += length; - continue; } + else ++ ptr; break; case ID3v2: @@ -275,7 +286,10 @@ int extract(const char *filepath, const char *outdir, size_t minsize, size_t max if (format == ID3v2) { if (!id3v2_istag(ptr, end, 0, &length)) + { + ++ ptr; break; + } } else length = 0; @@ -290,10 +304,9 @@ int extract(const char *filepath, const char *outdir, size_t minsize, size_t max do { ptr += mpeg.frame_size; - } while (ptr < end - && mpeg_isframe(ptr, end, &mpeg) + } while (mpeg_isframe(ptr, end, &mpeg) && mpeg.version == version - && mpeg.layer == layer); + && mpeg.layer == layer); if (id3v1_istag(ptr, end, &length)) { @@ -310,15 +323,32 @@ int extract(const char *filepath, const char *outdir, size_t minsize, size_t max layer == 2 ? "mp2" : layer == 3 ? "mp3" : "mpeg"); - continue; } + else ++ ptr; + break; + + case MIDI: + if (midi_isheader(ptr, end, &length, &count)) + { + audio_start = ptr; + do { + ptr += length; + } while (count-- > 0 && midi_istrack(ptr, end, &length)); + + if (count != 0 && !quiet) + { + fprintf(stderr, "warning: midi file misses %zu tracks\n", count); + } + + WRITE_FILE(audio_start, ptr - audio_start, "mid"); + } + else ++ ptr; break; case NONE: + ++ ptr; break; } - - ++ ptr; } goto cleanup; @@ -354,69 +384,50 @@ int parse_formats(const char *formats) end = formats + strlen(formats); size_t len = (size_t)(end - start); + unsigned int mask = NONE; + int remove = *start == '-'; + + if (remove) + { + ++ start; + -- len; + } + if (strncasecmp("ogg", start, len) == 0) { - parsed |= OGG; + mask = OGG; } - else if (strncasecmp("riff", start, len) == 0 || strncasecmp("wav", start, len) == 0) + else if (strncasecmp("riff", start, len) == 0) { - parsed |= RIFF; + mask = RIFF; } else if (strncasecmp("aiff", start, len) == 0) { - parsed |= AIFF; + mask = AIFF; } else if (strncasecmp("wave", start, len) == 0) { - parsed |= RIFF | AIFF; + mask = RIFF | AIFF; } else if (strncasecmp("mpeg", start, len) == 0) { - parsed |= MPEG; + mask = MPEG; } else if (strncasecmp("id3v2", start, len) == 0) { - parsed |= ID3v2; + mask = ID3v2; + } + else if (strncasecmp("midi", start, len) == 0) + { + mask = MIDI; } else if (strncasecmp("all", start, len) == 0) { - parsed = OGG | RIFF | AIFF | MPEG | ID3v2; + mask = OGG | RIFF | AIFF | MPEG | ID3v2 | MIDI; } else if (strncasecmp("default", start, len) == 0) { - parsed |= OGG | RIFF | AIFF | ID3v2; - } - else if (strncasecmp("-ogg", start, len) == 0) - { - parsed &= ~OGG; - } - else if (strncasecmp("-riff", start, len) == 0 || strncasecmp("-wav", start, len) == 0) - { - parsed &= ~RIFF; - } - else if (strncasecmp("-aiff", start, len) == 0) - { - parsed &= ~AIFF; - } - else if (strncasecmp("-wave", start, len) == 0) - { - parsed &= ~(RIFF | AIFF); - } - else if (strncasecmp("-mpeg", start, len) == 0) - { - parsed &= ~MPEG; - } - else if (strncasecmp("-id3v2", start, len) == 0) - { - parsed &= ~ID3v2; - } - else if (strncasecmp("-all", start, len) == 0) - { - parsed &= ~(OGG | RIFF | AIFF | MPEG | ID3v2); - } - else if (strncasecmp("-default", start, len) == 0) - { - parsed &= ~(OGG | RIFF | AIFF | ID3v2); + mask = OGG | RIFF | AIFF | ID3v2 | MIDI; } else if (len != 0) { @@ -426,6 +437,9 @@ int parse_formats(const char *formats) return -1; } + if (remove) parsed &= ~mask; + else parsed |= mask; + if (!*end) break; @@ -454,7 +468,7 @@ int main(int argc, char **argv) size_t numfiles = 0; size_t minsize = 0; size_t maxsize = (size_t)-1; - int formats = OGG | RIFF | AIFF | ID3v2; + int formats = OGG | RIFF | AIFF | ID3v2 | MIDI; const char *outdir = "."; long long tmp = 0; size_t size = 0; diff --git a/audioextract.h b/audioextract.h index 4339849..6155ec5 100644 --- a/audioextract.h +++ b/audioextract.h @@ -15,8 +15,22 @@ #define __WINDOWS__ #endif -#ifndef __WINDOWS__ -#include +#if defined(__NetBSD__) || defined(__FreeBSD__) || defined(__DragonFly__) + +# include +# define le32toh letoh32 +# define be32toh betoh32 +# define le16toh letoh16 +# define be16toh betoh16 + +#elif defined(__OpenBSD__) + +# include + +#else + +# include + #endif #ifndef __BYTE_ORDER diff --git a/midi.c b/midi.c new file mode 100644 index 0000000..9596367 --- /dev/null +++ b/midi.c @@ -0,0 +1,44 @@ +#include "midi.h" + +int midi_isheader(const unsigned char *start, const unsigned char *end, size_t *lengthptr, size_t *tracksptr) +{ + if ((intptr_t)end <= MIDI_HEADER_SIZE || end - MIDI_HEADER_SIZE < start) + return 0; + + if (*(const int32_t *)start != MIDI_MAGIC) + return 0; + + uint32_t chunk_size = be32toh(*(const uint32_t *)(start + 4)); + uint16_t format_type = be16toh(*(const uint16_t *)(start + 8)); + uint16_t tracks = be16toh(*(const uint16_t *)(start + 10)); + + if (chunk_size != 6 + || (format_type != 0 && format_type != 1 && format_type != 2) + || tracks == 0 + || (format_type == 0 && tracks > 1)) + return 0; + + if (lengthptr) *lengthptr = MIDI_HEADER_SIZE; + if (tracksptr) *tracksptr = tracks; + + return 1; +} + +int midi_istrack(const unsigned char *start, const unsigned char *end, size_t *lengthptr) +{ + if ((intptr_t)end <= MIDI_TRACK_HEADER_SIZE || end - MIDI_TRACK_HEADER_SIZE < start) + return 0; + + if (*(const int32_t *)start != MIDI_TRACK_MAGIC) + return 0; + + uint32_t chunk_size = be32toh(*(const uint32_t *)(start + 4)); + size_t length = MIDI_TRACK_HEADER_SIZE + chunk_size; + + if ((intptr_t)end <= length || end - length < start) + return 0; + + if (lengthptr) *lengthptr = length; + + return 1; +} diff --git a/midi.h b/midi.h new file mode 100644 index 0000000..4404c6f --- /dev/null +++ b/midi.h @@ -0,0 +1,28 @@ +#ifndef AUDIOEXTRACT_MIDI_H__ +#define AUDIOEXTRACT_MIDI_H__ + +#include "audioextract.h" + +#if __BYTE_ORDER == __LITTLE_ENDIAN + +# define MIDI_MAGIC 0x6468544D /* "dhTM" */ +# define MIDI_TRACK_MAGIC 0x6B72544D /* "krTM" */ + +#elif __BYTE_ORDER == __BIG_ENDIAN + +# define MIDI_MAGIC 0x4D546864 /* "MThd" */ +# define MIDI_TRACK_MAGIC 0x4D54726B /* "MTrk" */ + +#else + +# error unsupported endian + +#endif + +#define MIDI_HEADER_SIZE 14 +#define MIDI_TRACK_HEADER_SIZE 8 + +int midi_isheader(const unsigned char *start, const unsigned char *end, size_t *lengthptr, size_t *tracksptr); +int midi_istrack(const unsigned char *start, const unsigned char *end, size_t *lengthptr); + +#endif /* AUDIOEXTRACT_MIDI_H__ */ diff --git a/ogg.h b/ogg.h index bad249e..ac686ed 100644 --- a/ogg.h +++ b/ogg.h @@ -5,7 +5,7 @@ #if __BYTE_ORDER == __LITTLE_ENDIAN -# define OGG_MAGIC 0x5367674f /* "OggS" (reversed) */ +# define OGG_MAGIC 0x5367674f /* "SggO" */ #elif __BYTE_ORDER == __BIG_ENDIAN diff --git a/wave.c b/wave.c index 191c07f..598d0a8 100644 --- a/wave.c +++ b/wave.c @@ -1,19 +1,3 @@ -#if defined(__NetBSD__) || defined(__FreeBSD__) || defined(__DragonFly__) - -# include -# define le32toh letoh32 -# define be32toh betoh32 - -#elif defined(__OpenBSD__) - -# include - -#else - -# include - -#endif - #include "wave.h" int wave_ischunk(const unsigned char *start, const unsigned char *end, size_t *lengthptr)