added midi support

This commit is contained in:
Mathias Panzenböck 2012-12-28 18:23:27 +01:00
parent 4f42177275
commit b2f1bae729
8 changed files with 178 additions and 85 deletions

View File

@ -1,6 +1,6 @@
PREFIX=/usr/local
BUILDDIR=build
OBJ=$(BUILDDIR)/audioextract.o $(BUILDDIR)/wave.o $(BUILDDIR)/ogg.o $(BUILDDIR)/mpeg.o $(BUILDDIR)/id3.o
OBJ=$(BUILDDIR)/audioextract.o $(BUILDDIR)/wave.o $(BUILDDIR)/ogg.o $(BUILDDIR)/mpeg.o $(BUILDDIR)/id3.o $(BUILDDIR)/midi.o
CC=gcc
CFLAGS=-Wall -std=gnu99 -O2 -fmessage-length=0 -g
BIN=$(BUILDDIR)/audioextract
@ -12,7 +12,7 @@ all: $(BIN)
$(BIN): $(OBJ)
$(CC) $(CFLAGS) $(OBJ) -o $@
$(BUILDDIR)/audioextract.o: audioextract.c audioextract.h ogg.h wave.h mpeg.h id3.h
$(BUILDDIR)/audioextract.o: audioextract.c audioextract.h ogg.h wave.h mpeg.h id3.h midi.h
$(CC) $(CFLAGS) $< -o $@ -c
$(BUILDDIR)/wave.o: wave.c audioextract.h wave.h
@ -27,6 +27,9 @@ $(BUILDDIR)/mpeg.o: mpeg.c audioextract.h mpeg.h
$(BUILDDIR)/id3.o: id3.c audioextract.h id3.h
$(CC) $(CFLAGS) $< -o $@ -c
$(BUILDDIR)/midi.o: midi.c audioextract.h midi.h
$(CC) $(CFLAGS) $< -o $@ -c
install: all
install -s -D $(BIN) "$(PREFIX)/bin/audioextract"

View File

@ -3,6 +3,12 @@ Audioextract
Extract audio files that are embedded within other files.
Setup
-----
make
make install PREFIX=/usr
Usage
-----
@ -29,13 +35,13 @@ using the `--min-size` one can hopefully extract only real MPEG files.
-f, --formats=FORMATS Comma separated list of formats (file magics) to extract.
Supported formats:
all all supported formats
default the default set of formats (AIFF, ID3v2, Ogg, RIFF)
default the default set of formats (AIFF, ID3v2, Ogg, RIFF, MIDI)
aiff big-endian (Apple) wave files
id3v2 MPEG files with ID3v2 tags at the start
midi MIDI files
mpeg any MPEG files (e.g. MP3)
ogg Ogg files (Vorbis, FLAC, Opus, Theora, etc.)
riff little-endian (Windows) wave files
wav alias for riff
wave both RIFF and AIFF wave files
WARNING: Because MPEG files do not have a nice file magic, using

View File

@ -16,16 +16,21 @@
#include "ogg.h"
#include "mpeg.h"
#include "id3.h"
#include "midi.h"
enum fileformat {
NONE = 0,
OGG = 1,
RIFF = 2,
AIFF = 4,
MPEG = 8,
ID3v2 = 16
/* TODO: AAC and MKV/WebM? */
NONE = 0,
OGG = 1,
RIFF = 2,
AIFF = 4,
MPEG = 8,
ID3v2 = 16,
MIDI = 32,
// TODO:
// MOD = 64,
// S3M = 128,
// IT = 256,
// XM = 512,
};
int usage(int argc, char **argv)
@ -43,13 +48,13 @@ int usage(int argc, char **argv)
" -f, --formats=FORMATS Comma separated list of formats (file magics) to extract.\n"
" Supported formats:\n"
" all all supported formats\n"
" default the default set of formats (AIFF, ID3v2, Ogg, RIFF)\n"
" default the default set of formats (AIFF, ID3v2, Ogg, RIFF, MIDI)\n"
" aiff big-endian (Apple) wave files\n"
" id3v2 MPEG files with ID3v2 tags at the start\n"
" midi MIDI files\n"
" mpeg any MPEG files (e.g. MP3)\n"
" ogg Ogg files (Vorbis, FLAC, Opus, Theora, etc.)\n"
" riff little-endian (Windows) wave files\n"
" wav alias for riff\n"
" wave both RIFF and AIFF wave files\n"
"\n"
" WARNING: Because MPEG files do not have a nice file magic, using\n"
@ -107,6 +112,11 @@ const unsigned char *findmagic(const unsigned char *start, const unsigned char *
*format = AIFF;
return start;
}
else if (formats & MIDI && magic == MIDI_MAGIC)
{
*format = MIDI;
return start;
}
else if (formats & ID3v2 && IS_ID3v2_MAGIC(start))
{
*format = ID3v2;
@ -189,6 +199,7 @@ int extract(const char *filepath, const char *outdir, size_t minsize, size_t max
size_t namelen = strlen(outdir) + strlen(filename) + 24;
struct mpeg_info mpeg;
size_t count = 0; // e.g. for tracks count in midi
const unsigned char *audio_start = NULL;
if (!quiet)
@ -245,11 +256,11 @@ int extract(const char *filepath, const char *outdir, size_t minsize, size_t max
do {
ptr += length;
} while (ptr < end && ogg_ispage(ptr, end, &length));
} while (ogg_ispage(ptr, end, &length));
WRITE_FILE(audio_start, ptr - audio_start, "ogg");
continue;
}
else ++ ptr;
break;
case RIFF:
@ -257,8 +268,8 @@ int extract(const char *filepath, const char *outdir, size_t minsize, size_t max
{
WRITE_FILE(ptr, length, "wav");
ptr += length;
continue;
}
else ++ ptr;
break;
case AIFF:
@ -266,8 +277,8 @@ int extract(const char *filepath, const char *outdir, size_t minsize, size_t max
{
WRITE_FILE(ptr, length, "aif");
ptr += length;
continue;
}
else ++ ptr;
break;
case ID3v2:
@ -275,7 +286,10 @@ int extract(const char *filepath, const char *outdir, size_t minsize, size_t max
if (format == ID3v2)
{
if (!id3v2_istag(ptr, end, 0, &length))
{
++ ptr;
break;
}
}
else
length = 0;
@ -290,10 +304,9 @@ int extract(const char *filepath, const char *outdir, size_t minsize, size_t max
do {
ptr += mpeg.frame_size;
} while (ptr < end
&& mpeg_isframe(ptr, end, &mpeg)
} while (mpeg_isframe(ptr, end, &mpeg)
&& mpeg.version == version
&& mpeg.layer == layer);
&& mpeg.layer == layer);
if (id3v1_istag(ptr, end, &length))
{
@ -310,15 +323,32 @@ int extract(const char *filepath, const char *outdir, size_t minsize, size_t max
layer == 2 ? "mp2" :
layer == 3 ? "mp3" :
"mpeg");
continue;
}
else ++ ptr;
break;
case MIDI:
if (midi_isheader(ptr, end, &length, &count))
{
audio_start = ptr;
do {
ptr += length;
} while (count-- > 0 && midi_istrack(ptr, end, &length));
if (count != 0 && !quiet)
{
fprintf(stderr, "warning: midi file misses %zu tracks\n", count);
}
WRITE_FILE(audio_start, ptr - audio_start, "mid");
}
else ++ ptr;
break;
case NONE:
++ ptr;
break;
}
++ ptr;
}
goto cleanup;
@ -354,69 +384,50 @@ int parse_formats(const char *formats)
end = formats + strlen(formats);
size_t len = (size_t)(end - start);
unsigned int mask = NONE;
int remove = *start == '-';
if (remove)
{
++ start;
-- len;
}
if (strncasecmp("ogg", start, len) == 0)
{
parsed |= OGG;
mask = OGG;
}
else if (strncasecmp("riff", start, len) == 0 || strncasecmp("wav", start, len) == 0)
else if (strncasecmp("riff", start, len) == 0)
{
parsed |= RIFF;
mask = RIFF;
}
else if (strncasecmp("aiff", start, len) == 0)
{
parsed |= AIFF;
mask = AIFF;
}
else if (strncasecmp("wave", start, len) == 0)
{
parsed |= RIFF | AIFF;
mask = RIFF | AIFF;
}
else if (strncasecmp("mpeg", start, len) == 0)
{
parsed |= MPEG;
mask = MPEG;
}
else if (strncasecmp("id3v2", start, len) == 0)
{
parsed |= ID3v2;
mask = ID3v2;
}
else if (strncasecmp("midi", start, len) == 0)
{
mask = MIDI;
}
else if (strncasecmp("all", start, len) == 0)
{
parsed = OGG | RIFF | AIFF | MPEG | ID3v2;
mask = OGG | RIFF | AIFF | MPEG | ID3v2 | MIDI;
}
else if (strncasecmp("default", start, len) == 0)
{
parsed |= OGG | RIFF | AIFF | ID3v2;
}
else if (strncasecmp("-ogg", start, len) == 0)
{
parsed &= ~OGG;
}
else if (strncasecmp("-riff", start, len) == 0 || strncasecmp("-wav", start, len) == 0)
{
parsed &= ~RIFF;
}
else if (strncasecmp("-aiff", start, len) == 0)
{
parsed &= ~AIFF;
}
else if (strncasecmp("-wave", start, len) == 0)
{
parsed &= ~(RIFF | AIFF);
}
else if (strncasecmp("-mpeg", start, len) == 0)
{
parsed &= ~MPEG;
}
else if (strncasecmp("-id3v2", start, len) == 0)
{
parsed &= ~ID3v2;
}
else if (strncasecmp("-all", start, len) == 0)
{
parsed &= ~(OGG | RIFF | AIFF | MPEG | ID3v2);
}
else if (strncasecmp("-default", start, len) == 0)
{
parsed &= ~(OGG | RIFF | AIFF | ID3v2);
mask = OGG | RIFF | AIFF | ID3v2 | MIDI;
}
else if (len != 0)
{
@ -426,6 +437,9 @@ int parse_formats(const char *formats)
return -1;
}
if (remove) parsed &= ~mask;
else parsed |= mask;
if (!*end)
break;
@ -454,7 +468,7 @@ int main(int argc, char **argv)
size_t numfiles = 0;
size_t minsize = 0;
size_t maxsize = (size_t)-1;
int formats = OGG | RIFF | AIFF | ID3v2;
int formats = OGG | RIFF | AIFF | ID3v2 | MIDI;
const char *outdir = ".";
long long tmp = 0;
size_t size = 0;

View File

@ -15,8 +15,22 @@
#define __WINDOWS__
#endif
#ifndef __WINDOWS__
#include <endian.h>
#if defined(__NetBSD__) || defined(__FreeBSD__) || defined(__DragonFly__)
# include <sys/endian.h>
# define le32toh letoh32
# define be32toh betoh32
# define le16toh letoh16
# define be16toh betoh16
#elif defined(__OpenBSD__)
# include <sys/endian.h>
#else
# include <endian.h>
#endif
#ifndef __BYTE_ORDER

44
midi.c Normal file
View File

@ -0,0 +1,44 @@
#include "midi.h"
int midi_isheader(const unsigned char *start, const unsigned char *end, size_t *lengthptr, size_t *tracksptr)
{
if ((intptr_t)end <= MIDI_HEADER_SIZE || end - MIDI_HEADER_SIZE < start)
return 0;
if (*(const int32_t *)start != MIDI_MAGIC)
return 0;
uint32_t chunk_size = be32toh(*(const uint32_t *)(start + 4));
uint16_t format_type = be16toh(*(const uint16_t *)(start + 8));
uint16_t tracks = be16toh(*(const uint16_t *)(start + 10));
if (chunk_size != 6
|| (format_type != 0 && format_type != 1 && format_type != 2)
|| tracks == 0
|| (format_type == 0 && tracks > 1))
return 0;
if (lengthptr) *lengthptr = MIDI_HEADER_SIZE;
if (tracksptr) *tracksptr = tracks;
return 1;
}
int midi_istrack(const unsigned char *start, const unsigned char *end, size_t *lengthptr)
{
if ((intptr_t)end <= MIDI_TRACK_HEADER_SIZE || end - MIDI_TRACK_HEADER_SIZE < start)
return 0;
if (*(const int32_t *)start != MIDI_TRACK_MAGIC)
return 0;
uint32_t chunk_size = be32toh(*(const uint32_t *)(start + 4));
size_t length = MIDI_TRACK_HEADER_SIZE + chunk_size;
if ((intptr_t)end <= length || end - length < start)
return 0;
if (lengthptr) *lengthptr = length;
return 1;
}

28
midi.h Normal file
View File

@ -0,0 +1,28 @@
#ifndef AUDIOEXTRACT_MIDI_H__
#define AUDIOEXTRACT_MIDI_H__
#include "audioextract.h"
#if __BYTE_ORDER == __LITTLE_ENDIAN
# define MIDI_MAGIC 0x6468544D /* "dhTM" */
# define MIDI_TRACK_MAGIC 0x6B72544D /* "krTM" */
#elif __BYTE_ORDER == __BIG_ENDIAN
# define MIDI_MAGIC 0x4D546864 /* "MThd" */
# define MIDI_TRACK_MAGIC 0x4D54726B /* "MTrk" */
#else
# error unsupported endian
#endif
#define MIDI_HEADER_SIZE 14
#define MIDI_TRACK_HEADER_SIZE 8
int midi_isheader(const unsigned char *start, const unsigned char *end, size_t *lengthptr, size_t *tracksptr);
int midi_istrack(const unsigned char *start, const unsigned char *end, size_t *lengthptr);
#endif /* AUDIOEXTRACT_MIDI_H__ */

2
ogg.h
View File

@ -5,7 +5,7 @@
#if __BYTE_ORDER == __LITTLE_ENDIAN
# define OGG_MAGIC 0x5367674f /* "OggS" (reversed) */
# define OGG_MAGIC 0x5367674f /* "SggO" */
#elif __BYTE_ORDER == __BIG_ENDIAN

16
wave.c
View File

@ -1,19 +1,3 @@
#if defined(__NetBSD__) || defined(__FreeBSD__) || defined(__DragonFly__)
# include <sys/endian.h>
# define le32toh letoh32
# define be32toh betoh32
#elif defined(__OpenBSD__)
# include <sys/endian.h>
#else
# include <endian.h>
#endif
#include "wave.h"
int wave_ischunk(const unsigned char *start, const unsigned char *end, size_t *lengthptr)