added midi support
This commit is contained in:
parent
4f42177275
commit
b2f1bae729
7
Makefile
7
Makefile
|
@ -1,6 +1,6 @@
|
|||
PREFIX=/usr/local
|
||||
BUILDDIR=build
|
||||
OBJ=$(BUILDDIR)/audioextract.o $(BUILDDIR)/wave.o $(BUILDDIR)/ogg.o $(BUILDDIR)/mpeg.o $(BUILDDIR)/id3.o
|
||||
OBJ=$(BUILDDIR)/audioextract.o $(BUILDDIR)/wave.o $(BUILDDIR)/ogg.o $(BUILDDIR)/mpeg.o $(BUILDDIR)/id3.o $(BUILDDIR)/midi.o
|
||||
CC=gcc
|
||||
CFLAGS=-Wall -std=gnu99 -O2 -fmessage-length=0 -g
|
||||
BIN=$(BUILDDIR)/audioextract
|
||||
|
@ -12,7 +12,7 @@ all: $(BIN)
|
|||
$(BIN): $(OBJ)
|
||||
$(CC) $(CFLAGS) $(OBJ) -o $@
|
||||
|
||||
$(BUILDDIR)/audioextract.o: audioextract.c audioextract.h ogg.h wave.h mpeg.h id3.h
|
||||
$(BUILDDIR)/audioextract.o: audioextract.c audioextract.h ogg.h wave.h mpeg.h id3.h midi.h
|
||||
$(CC) $(CFLAGS) $< -o $@ -c
|
||||
|
||||
$(BUILDDIR)/wave.o: wave.c audioextract.h wave.h
|
||||
|
@ -27,6 +27,9 @@ $(BUILDDIR)/mpeg.o: mpeg.c audioextract.h mpeg.h
|
|||
$(BUILDDIR)/id3.o: id3.c audioextract.h id3.h
|
||||
$(CC) $(CFLAGS) $< -o $@ -c
|
||||
|
||||
$(BUILDDIR)/midi.o: midi.c audioextract.h midi.h
|
||||
$(CC) $(CFLAGS) $< -o $@ -c
|
||||
|
||||
install: all
|
||||
install -s -D $(BIN) "$(PREFIX)/bin/audioextract"
|
||||
|
||||
|
|
10
README.md
10
README.md
|
@ -3,6 +3,12 @@ Audioextract
|
|||
|
||||
Extract audio files that are embedded within other files.
|
||||
|
||||
Setup
|
||||
-----
|
||||
|
||||
make
|
||||
make install PREFIX=/usr
|
||||
|
||||
Usage
|
||||
-----
|
||||
|
||||
|
@ -29,13 +35,13 @@ using the `--min-size` one can hopefully extract only real MPEG files.
|
|||
-f, --formats=FORMATS Comma separated list of formats (file magics) to extract.
|
||||
Supported formats:
|
||||
all all supported formats
|
||||
default the default set of formats (AIFF, ID3v2, Ogg, RIFF)
|
||||
default the default set of formats (AIFF, ID3v2, Ogg, RIFF, MIDI)
|
||||
aiff big-endian (Apple) wave files
|
||||
id3v2 MPEG files with ID3v2 tags at the start
|
||||
midi MIDI files
|
||||
mpeg any MPEG files (e.g. MP3)
|
||||
ogg Ogg files (Vorbis, FLAC, Opus, Theora, etc.)
|
||||
riff little-endian (Windows) wave files
|
||||
wav alias for riff
|
||||
wave both RIFF and AIFF wave files
|
||||
|
||||
WARNING: Because MPEG files do not have a nice file magic, using
|
||||
|
|
138
audioextract.c
138
audioextract.c
|
@ -16,16 +16,21 @@
|
|||
#include "ogg.h"
|
||||
#include "mpeg.h"
|
||||
#include "id3.h"
|
||||
#include "midi.h"
|
||||
|
||||
enum fileformat {
|
||||
NONE = 0,
|
||||
OGG = 1,
|
||||
RIFF = 2,
|
||||
AIFF = 4,
|
||||
MPEG = 8,
|
||||
ID3v2 = 16
|
||||
|
||||
/* TODO: AAC and MKV/WebM? */
|
||||
NONE = 0,
|
||||
OGG = 1,
|
||||
RIFF = 2,
|
||||
AIFF = 4,
|
||||
MPEG = 8,
|
||||
ID3v2 = 16,
|
||||
MIDI = 32,
|
||||
// TODO:
|
||||
// MOD = 64,
|
||||
// S3M = 128,
|
||||
// IT = 256,
|
||||
// XM = 512,
|
||||
};
|
||||
|
||||
int usage(int argc, char **argv)
|
||||
|
@ -43,13 +48,13 @@ int usage(int argc, char **argv)
|
|||
" -f, --formats=FORMATS Comma separated list of formats (file magics) to extract.\n"
|
||||
" Supported formats:\n"
|
||||
" all all supported formats\n"
|
||||
" default the default set of formats (AIFF, ID3v2, Ogg, RIFF)\n"
|
||||
" default the default set of formats (AIFF, ID3v2, Ogg, RIFF, MIDI)\n"
|
||||
" aiff big-endian (Apple) wave files\n"
|
||||
" id3v2 MPEG files with ID3v2 tags at the start\n"
|
||||
" midi MIDI files\n"
|
||||
" mpeg any MPEG files (e.g. MP3)\n"
|
||||
" ogg Ogg files (Vorbis, FLAC, Opus, Theora, etc.)\n"
|
||||
" riff little-endian (Windows) wave files\n"
|
||||
" wav alias for riff\n"
|
||||
" wave both RIFF and AIFF wave files\n"
|
||||
"\n"
|
||||
" WARNING: Because MPEG files do not have a nice file magic, using\n"
|
||||
|
@ -107,6 +112,11 @@ const unsigned char *findmagic(const unsigned char *start, const unsigned char *
|
|||
*format = AIFF;
|
||||
return start;
|
||||
}
|
||||
else if (formats & MIDI && magic == MIDI_MAGIC)
|
||||
{
|
||||
*format = MIDI;
|
||||
return start;
|
||||
}
|
||||
else if (formats & ID3v2 && IS_ID3v2_MAGIC(start))
|
||||
{
|
||||
*format = ID3v2;
|
||||
|
@ -189,6 +199,7 @@ int extract(const char *filepath, const char *outdir, size_t minsize, size_t max
|
|||
size_t namelen = strlen(outdir) + strlen(filename) + 24;
|
||||
|
||||
struct mpeg_info mpeg;
|
||||
size_t count = 0; // e.g. for tracks count in midi
|
||||
const unsigned char *audio_start = NULL;
|
||||
|
||||
if (!quiet)
|
||||
|
@ -245,11 +256,11 @@ int extract(const char *filepath, const char *outdir, size_t minsize, size_t max
|
|||
|
||||
do {
|
||||
ptr += length;
|
||||
} while (ptr < end && ogg_ispage(ptr, end, &length));
|
||||
} while (ogg_ispage(ptr, end, &length));
|
||||
|
||||
WRITE_FILE(audio_start, ptr - audio_start, "ogg");
|
||||
continue;
|
||||
}
|
||||
else ++ ptr;
|
||||
break;
|
||||
|
||||
case RIFF:
|
||||
|
@ -257,8 +268,8 @@ int extract(const char *filepath, const char *outdir, size_t minsize, size_t max
|
|||
{
|
||||
WRITE_FILE(ptr, length, "wav");
|
||||
ptr += length;
|
||||
continue;
|
||||
}
|
||||
else ++ ptr;
|
||||
break;
|
||||
|
||||
case AIFF:
|
||||
|
@ -266,8 +277,8 @@ int extract(const char *filepath, const char *outdir, size_t minsize, size_t max
|
|||
{
|
||||
WRITE_FILE(ptr, length, "aif");
|
||||
ptr += length;
|
||||
continue;
|
||||
}
|
||||
else ++ ptr;
|
||||
break;
|
||||
|
||||
case ID3v2:
|
||||
|
@ -275,7 +286,10 @@ int extract(const char *filepath, const char *outdir, size_t minsize, size_t max
|
|||
if (format == ID3v2)
|
||||
{
|
||||
if (!id3v2_istag(ptr, end, 0, &length))
|
||||
{
|
||||
++ ptr;
|
||||
break;
|
||||
}
|
||||
}
|
||||
else
|
||||
length = 0;
|
||||
|
@ -290,10 +304,9 @@ int extract(const char *filepath, const char *outdir, size_t minsize, size_t max
|
|||
|
||||
do {
|
||||
ptr += mpeg.frame_size;
|
||||
} while (ptr < end
|
||||
&& mpeg_isframe(ptr, end, &mpeg)
|
||||
} while (mpeg_isframe(ptr, end, &mpeg)
|
||||
&& mpeg.version == version
|
||||
&& mpeg.layer == layer);
|
||||
&& mpeg.layer == layer);
|
||||
|
||||
if (id3v1_istag(ptr, end, &length))
|
||||
{
|
||||
|
@ -310,15 +323,32 @@ int extract(const char *filepath, const char *outdir, size_t minsize, size_t max
|
|||
layer == 2 ? "mp2" :
|
||||
layer == 3 ? "mp3" :
|
||||
"mpeg");
|
||||
continue;
|
||||
}
|
||||
else ++ ptr;
|
||||
break;
|
||||
|
||||
case MIDI:
|
||||
if (midi_isheader(ptr, end, &length, &count))
|
||||
{
|
||||
audio_start = ptr;
|
||||
do {
|
||||
ptr += length;
|
||||
} while (count-- > 0 && midi_istrack(ptr, end, &length));
|
||||
|
||||
if (count != 0 && !quiet)
|
||||
{
|
||||
fprintf(stderr, "warning: midi file misses %zu tracks\n", count);
|
||||
}
|
||||
|
||||
WRITE_FILE(audio_start, ptr - audio_start, "mid");
|
||||
}
|
||||
else ++ ptr;
|
||||
break;
|
||||
|
||||
case NONE:
|
||||
++ ptr;
|
||||
break;
|
||||
}
|
||||
|
||||
++ ptr;
|
||||
}
|
||||
|
||||
goto cleanup;
|
||||
|
@ -354,69 +384,50 @@ int parse_formats(const char *formats)
|
|||
end = formats + strlen(formats);
|
||||
|
||||
size_t len = (size_t)(end - start);
|
||||
unsigned int mask = NONE;
|
||||
int remove = *start == '-';
|
||||
|
||||
if (remove)
|
||||
{
|
||||
++ start;
|
||||
-- len;
|
||||
}
|
||||
|
||||
if (strncasecmp("ogg", start, len) == 0)
|
||||
{
|
||||
parsed |= OGG;
|
||||
mask = OGG;
|
||||
}
|
||||
else if (strncasecmp("riff", start, len) == 0 || strncasecmp("wav", start, len) == 0)
|
||||
else if (strncasecmp("riff", start, len) == 0)
|
||||
{
|
||||
parsed |= RIFF;
|
||||
mask = RIFF;
|
||||
}
|
||||
else if (strncasecmp("aiff", start, len) == 0)
|
||||
{
|
||||
parsed |= AIFF;
|
||||
mask = AIFF;
|
||||
}
|
||||
else if (strncasecmp("wave", start, len) == 0)
|
||||
{
|
||||
parsed |= RIFF | AIFF;
|
||||
mask = RIFF | AIFF;
|
||||
}
|
||||
else if (strncasecmp("mpeg", start, len) == 0)
|
||||
{
|
||||
parsed |= MPEG;
|
||||
mask = MPEG;
|
||||
}
|
||||
else if (strncasecmp("id3v2", start, len) == 0)
|
||||
{
|
||||
parsed |= ID3v2;
|
||||
mask = ID3v2;
|
||||
}
|
||||
else if (strncasecmp("midi", start, len) == 0)
|
||||
{
|
||||
mask = MIDI;
|
||||
}
|
||||
else if (strncasecmp("all", start, len) == 0)
|
||||
{
|
||||
parsed = OGG | RIFF | AIFF | MPEG | ID3v2;
|
||||
mask = OGG | RIFF | AIFF | MPEG | ID3v2 | MIDI;
|
||||
}
|
||||
else if (strncasecmp("default", start, len) == 0)
|
||||
{
|
||||
parsed |= OGG | RIFF | AIFF | ID3v2;
|
||||
}
|
||||
else if (strncasecmp("-ogg", start, len) == 0)
|
||||
{
|
||||
parsed &= ~OGG;
|
||||
}
|
||||
else if (strncasecmp("-riff", start, len) == 0 || strncasecmp("-wav", start, len) == 0)
|
||||
{
|
||||
parsed &= ~RIFF;
|
||||
}
|
||||
else if (strncasecmp("-aiff", start, len) == 0)
|
||||
{
|
||||
parsed &= ~AIFF;
|
||||
}
|
||||
else if (strncasecmp("-wave", start, len) == 0)
|
||||
{
|
||||
parsed &= ~(RIFF | AIFF);
|
||||
}
|
||||
else if (strncasecmp("-mpeg", start, len) == 0)
|
||||
{
|
||||
parsed &= ~MPEG;
|
||||
}
|
||||
else if (strncasecmp("-id3v2", start, len) == 0)
|
||||
{
|
||||
parsed &= ~ID3v2;
|
||||
}
|
||||
else if (strncasecmp("-all", start, len) == 0)
|
||||
{
|
||||
parsed &= ~(OGG | RIFF | AIFF | MPEG | ID3v2);
|
||||
}
|
||||
else if (strncasecmp("-default", start, len) == 0)
|
||||
{
|
||||
parsed &= ~(OGG | RIFF | AIFF | ID3v2);
|
||||
mask = OGG | RIFF | AIFF | ID3v2 | MIDI;
|
||||
}
|
||||
else if (len != 0)
|
||||
{
|
||||
|
@ -426,6 +437,9 @@ int parse_formats(const char *formats)
|
|||
return -1;
|
||||
}
|
||||
|
||||
if (remove) parsed &= ~mask;
|
||||
else parsed |= mask;
|
||||
|
||||
if (!*end)
|
||||
break;
|
||||
|
||||
|
@ -454,7 +468,7 @@ int main(int argc, char **argv)
|
|||
size_t numfiles = 0;
|
||||
size_t minsize = 0;
|
||||
size_t maxsize = (size_t)-1;
|
||||
int formats = OGG | RIFF | AIFF | ID3v2;
|
||||
int formats = OGG | RIFF | AIFF | ID3v2 | MIDI;
|
||||
const char *outdir = ".";
|
||||
long long tmp = 0;
|
||||
size_t size = 0;
|
||||
|
|
|
@ -15,8 +15,22 @@
|
|||
#define __WINDOWS__
|
||||
#endif
|
||||
|
||||
#ifndef __WINDOWS__
|
||||
#include <endian.h>
|
||||
#if defined(__NetBSD__) || defined(__FreeBSD__) || defined(__DragonFly__)
|
||||
|
||||
# include <sys/endian.h>
|
||||
# define le32toh letoh32
|
||||
# define be32toh betoh32
|
||||
# define le16toh letoh16
|
||||
# define be16toh betoh16
|
||||
|
||||
#elif defined(__OpenBSD__)
|
||||
|
||||
# include <sys/endian.h>
|
||||
|
||||
#else
|
||||
|
||||
# include <endian.h>
|
||||
|
||||
#endif
|
||||
|
||||
#ifndef __BYTE_ORDER
|
||||
|
|
|
@ -0,0 +1,44 @@
|
|||
#include "midi.h"
|
||||
|
||||
int midi_isheader(const unsigned char *start, const unsigned char *end, size_t *lengthptr, size_t *tracksptr)
|
||||
{
|
||||
if ((intptr_t)end <= MIDI_HEADER_SIZE || end - MIDI_HEADER_SIZE < start)
|
||||
return 0;
|
||||
|
||||
if (*(const int32_t *)start != MIDI_MAGIC)
|
||||
return 0;
|
||||
|
||||
uint32_t chunk_size = be32toh(*(const uint32_t *)(start + 4));
|
||||
uint16_t format_type = be16toh(*(const uint16_t *)(start + 8));
|
||||
uint16_t tracks = be16toh(*(const uint16_t *)(start + 10));
|
||||
|
||||
if (chunk_size != 6
|
||||
|| (format_type != 0 && format_type != 1 && format_type != 2)
|
||||
|| tracks == 0
|
||||
|| (format_type == 0 && tracks > 1))
|
||||
return 0;
|
||||
|
||||
if (lengthptr) *lengthptr = MIDI_HEADER_SIZE;
|
||||
if (tracksptr) *tracksptr = tracks;
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
int midi_istrack(const unsigned char *start, const unsigned char *end, size_t *lengthptr)
|
||||
{
|
||||
if ((intptr_t)end <= MIDI_TRACK_HEADER_SIZE || end - MIDI_TRACK_HEADER_SIZE < start)
|
||||
return 0;
|
||||
|
||||
if (*(const int32_t *)start != MIDI_TRACK_MAGIC)
|
||||
return 0;
|
||||
|
||||
uint32_t chunk_size = be32toh(*(const uint32_t *)(start + 4));
|
||||
size_t length = MIDI_TRACK_HEADER_SIZE + chunk_size;
|
||||
|
||||
if ((intptr_t)end <= length || end - length < start)
|
||||
return 0;
|
||||
|
||||
if (lengthptr) *lengthptr = length;
|
||||
|
||||
return 1;
|
||||
}
|
|
@ -0,0 +1,28 @@
|
|||
#ifndef AUDIOEXTRACT_MIDI_H__
|
||||
#define AUDIOEXTRACT_MIDI_H__
|
||||
|
||||
#include "audioextract.h"
|
||||
|
||||
#if __BYTE_ORDER == __LITTLE_ENDIAN
|
||||
|
||||
# define MIDI_MAGIC 0x6468544D /* "dhTM" */
|
||||
# define MIDI_TRACK_MAGIC 0x6B72544D /* "krTM" */
|
||||
|
||||
#elif __BYTE_ORDER == __BIG_ENDIAN
|
||||
|
||||
# define MIDI_MAGIC 0x4D546864 /* "MThd" */
|
||||
# define MIDI_TRACK_MAGIC 0x4D54726B /* "MTrk" */
|
||||
|
||||
#else
|
||||
|
||||
# error unsupported endian
|
||||
|
||||
#endif
|
||||
|
||||
#define MIDI_HEADER_SIZE 14
|
||||
#define MIDI_TRACK_HEADER_SIZE 8
|
||||
|
||||
int midi_isheader(const unsigned char *start, const unsigned char *end, size_t *lengthptr, size_t *tracksptr);
|
||||
int midi_istrack(const unsigned char *start, const unsigned char *end, size_t *lengthptr);
|
||||
|
||||
#endif /* AUDIOEXTRACT_MIDI_H__ */
|
2
ogg.h
2
ogg.h
|
@ -5,7 +5,7 @@
|
|||
|
||||
#if __BYTE_ORDER == __LITTLE_ENDIAN
|
||||
|
||||
# define OGG_MAGIC 0x5367674f /* "OggS" (reversed) */
|
||||
# define OGG_MAGIC 0x5367674f /* "SggO" */
|
||||
|
||||
#elif __BYTE_ORDER == __BIG_ENDIAN
|
||||
|
||||
|
|
16
wave.c
16
wave.c
|
@ -1,19 +1,3 @@
|
|||
#if defined(__NetBSD__) || defined(__FreeBSD__) || defined(__DragonFly__)
|
||||
|
||||
# include <sys/endian.h>
|
||||
# define le32toh letoh32
|
||||
# define be32toh betoh32
|
||||
|
||||
#elif defined(__OpenBSD__)
|
||||
|
||||
# include <sys/endian.h>
|
||||
|
||||
#else
|
||||
|
||||
# include <endian.h>
|
||||
|
||||
#endif
|
||||
|
||||
#include "wave.h"
|
||||
|
||||
int wave_ischunk(const unsigned char *start, const unsigned char *end, size_t *lengthptr)
|
||||
|
|
Loading…
Reference in New Issue