mediaextract/audioextract.c

379 lines
7.1 KiB
C
Raw Normal View History

2012-12-26 21:08:18 +00:00
/*
* audioextract
2012-12-26 21:08:18 +00:00
*
* Author: Mathaias Panzenböck
* This is derived from oggextract:
* http://ner.mine.nu/oggextract/
*
* Original author of oggextract: Adrian Keet
2012-12-26 21:08:18 +00:00
*/
#include <stdio.h>
2012-12-27 03:58:43 +00:00
#include <stdint.h>
#include <stdlib.h>
2012-12-26 21:08:18 +00:00
#include <string.h>
#include <unistd.h>
#include <fcntl.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <sys/mman.h>
2012-12-27 03:58:43 +00:00
#include <arpa/inet.h>
#if (defined(_WIN16) || defined(_WIN32) || defined(_WIN64)) && !defined(__WINDOWS__)
#define __WINDOWS__
#endif
#ifndef __WINDOWS__
#include <endian.h>
#endif
2012-12-26 21:08:18 +00:00
2012-12-27 01:05:02 +00:00
#define OGG_HEADER_SIZE 27
#define ogg_isinitial(data) ((data)[5] & 2)
2012-12-27 03:58:43 +00:00
#define WAVE_HEADER_SIZE 8
#if defined(__WINDOWS__) || __BYTE_ORDER == __LITTLE_ENDIAN
# define OGG_MAGIC 0x5367674f /* "OggS" (reversed) */
# define RIFF_MAGIC 0x46464952 /* "RIFF" (reversed) */
# define WAVE_MAGIC 0x45564157 /* "WAVE" (reversed) */
# define FORM_MAGIC 0x4d524f46 /* "FORM" (reversed) */
# define AIFF_MAGIC 0x46464941 /* "AIFF" (reversed) */
# define AIFC_MAGIC 0x43464941 /* "AIFC" (reversed) */
#elif __BYTE_ORDER == __BIG_ENDIAN
# define OGG_MAGIC 0x5367674f /* "OggS" */
# define RIFF_MAGIC 0x46464952 /* "RIFF" */
# define WAVE_MAGIC 0x57415645 /* "WAVE" */
# define FORM_MAGIC 0x464f524d /* "FORM" */
# define AIFF_MAGIC 0x41494646 /* "AIFF" */
# define AIFC_MAGIC 0x41494643 /* "AIFC" */
#else
#error unsupported endian
#endif
enum fileformat {
NONE = 0,
OGG = 1,
RIFF = 2,
AIFF = 3
/* TODO: MP3, AAC and MKV? */
};
int usage(int argc, char **argv)
2012-12-26 21:08:18 +00:00
{
fprintf(stderr, "Usage: %s <filename> [<filename> ...]\n", argc <= 0 ? "audioextract" : argv[0]);
2012-12-26 21:08:18 +00:00
return 255;
}
2012-12-27 03:58:43 +00:00
const unsigned char *findmagic(const unsigned char *start, const unsigned char *end, enum fileformat *format)
2012-12-26 21:08:18 +00:00
{
if (end < (unsigned char *)4)
2012-12-27 03:58:43 +00:00
return NULL;
2012-12-27 01:05:02 +00:00
end -= 4;
for (; start < end; ++ start)
{
2012-12-27 03:58:43 +00:00
switch (*(const int32_t *)start)
{
case OGG_MAGIC:
*format = OGG;
return start;
case RIFF_MAGIC:
*format = RIFF;
return start;
case FORM_MAGIC:
*format = AIFF;
return start;
}
2012-12-27 01:05:02 +00:00
}
2012-12-26 21:08:18 +00:00
2012-12-27 01:05:02 +00:00
return NULL;
2012-12-26 21:08:18 +00:00
}
2012-12-27 01:05:02 +00:00
int ogg_ispage(const unsigned char *start, const unsigned char *end, size_t *lengthptr)
2012-12-26 21:08:18 +00:00
{
2012-12-27 01:05:02 +00:00
unsigned char nsegs;
size_t length, i;
const unsigned char *segs = start + OGG_HEADER_SIZE;
/* full header available? */
if (end <= (unsigned char*)OGG_HEADER_SIZE || end - OGG_HEADER_SIZE < start)
return 0;
2012-12-26 21:08:18 +00:00
/* capture pattern */
2012-12-27 03:58:43 +00:00
if (*(const int32_t *)start != OGG_MAGIC)
2012-12-26 21:08:18 +00:00
return 0;
/* stream structure version */
2012-12-27 01:05:02 +00:00
if (start[4] != 0x00)
2012-12-26 21:08:18 +00:00
return 0;
/* header type flag */
2012-12-27 01:05:02 +00:00
if ((start[5] & ~7) != 0x00)
2012-12-26 21:08:18 +00:00
return 0;
2012-12-27 01:05:02 +00:00
nsegs = start[26];
length = OGG_HEADER_SIZE + nsegs;
/* segment sizes fully available? */
if (end <= (unsigned char*)length || end - length < start)
return 0;
for (i = 0; i < nsegs; ++ i)
{
length += segs[i];
}
/* segments fully available? */
if (end <= (unsigned char*)length || end - length < start)
return 0;
if (lengthptr)
*lengthptr = length;
2012-12-26 21:08:18 +00:00
/* I think we can reasonably assume it is a real page now */
return 1;
}
2012-12-27 03:58:43 +00:00
int wave_ischunk(const unsigned char *start, const unsigned char *end, size_t *lengthptr)
2012-12-26 21:08:18 +00:00
{
2012-12-27 03:58:43 +00:00
size_t length;
2012-12-26 21:08:18 +00:00
2012-12-27 03:58:43 +00:00
if (end <= (unsigned char *)WAVE_HEADER_SIZE || end - WAVE_HEADER_SIZE < start)
return 0;
if (*(const int32_t *)start != RIFF_MAGIC)
return 0;
length = *(const uint32_t *)(start + 4) + 8;
2012-12-27 03:58:43 +00:00
if (end <= (unsigned char *)length || end - length < start)
return 0;
if (*(const uint32_t *)(start + 8) != WAVE_MAGIC)
return 0;
if (lengthptr)
*lengthptr = length;
return 1;
}
int aiff_ischunk(const unsigned char *start, const unsigned char *end, size_t *lengthptr)
{
size_t length;
int16_t format;
if (end <= (unsigned char *)WAVE_HEADER_SIZE || end - WAVE_HEADER_SIZE < start)
return 0;
if (*(const int32_t *)start != FORM_MAGIC)
return 0;
length = ntohl(*(const uint32_t *)(start + 4)) + 8;
2012-12-27 03:58:43 +00:00
if (end <= (unsigned char *)length || end - length < start)
return 0;
format = *(const uint32_t *)(start + 8);
if (format != AIFF_MAGIC && format != AIFC_MAGIC)
return 0;
if (lengthptr)
*lengthptr = length;
return 1;
2012-12-26 21:08:18 +00:00
}
2012-12-27 01:05:02 +00:00
const char *basename(const char *path)
2012-12-26 21:08:18 +00:00
{
2012-12-27 01:05:02 +00:00
const char *ptr = strrchr(path, '/');
2012-12-27 03:58:43 +00:00
#ifdef __WINDOWS__
2012-12-27 01:05:02 +00:00
/* Windows supports both / and \ */
const char *ptr2 = strrchr(path, '\\');
2012-12-27 01:05:02 +00:00
if (ptr2 > ptr)
ptr = ptr2;
#endif
return ptr ? ptr + 1 : path;
2012-12-26 21:08:18 +00:00
}
2012-12-27 01:05:02 +00:00
int extract(const char *filepath, size_t *numfilesptr)
2012-12-26 21:08:18 +00:00
{
2012-12-27 01:05:02 +00:00
int fd = -1;
2012-12-26 21:08:18 +00:00
struct stat statdata;
2012-12-27 01:05:02 +00:00
size_t filesize = 0;
unsigned char *filedata = NULL;
const unsigned char *ptr = NULL, *end = NULL;
2012-12-27 03:58:43 +00:00
enum fileformat format = NONE;
2012-12-26 21:08:18 +00:00
2012-12-27 03:58:43 +00:00
size_t length = 0;
2012-12-26 21:08:18 +00:00
int outfd = -1;
2012-12-27 01:05:02 +00:00
int success = 1;
char *outfilename = NULL;
size_t numfiles = 0;
const char *filename = basename(filepath);
size_t namelen = strlen(filename) + 22;
2012-12-26 21:08:18 +00:00
2012-12-27 01:05:02 +00:00
printf("Extracting %s\n", filepath);
2012-12-26 21:08:18 +00:00
2012-12-27 01:05:02 +00:00
fd = open(filepath, O_RDONLY);
if (fd < 0)
2012-12-26 21:08:18 +00:00
{
2012-12-27 01:05:02 +00:00
perror("open");
success = 0;
goto exit_numfiles;
2012-12-26 21:08:18 +00:00
}
2012-12-27 01:05:02 +00:00
if (fstat(fd, &statdata) < 0)
2012-12-26 21:08:18 +00:00
{
2012-12-27 01:05:02 +00:00
perror("stat");
success = 0;
goto exit_fd;
2012-12-26 21:08:18 +00:00
}
2012-12-27 01:05:02 +00:00
if (S_ISDIR(statdata.st_mode))
2012-12-26 21:08:18 +00:00
{
2012-12-27 01:05:02 +00:00
fprintf(stderr, "error: Is a directory: %s\n", filepath);
success = 0;
goto exit_fd;
2012-12-26 21:08:18 +00:00
}
2012-12-27 01:05:02 +00:00
filesize = statdata.st_size;
2012-12-26 21:08:18 +00:00
filedata = mmap(0, filesize, PROT_READ, MAP_PRIVATE, fd, 0);
2012-12-27 03:58:43 +00:00
if (filedata == MAP_FAILED)
{
2012-12-27 01:05:02 +00:00
perror("mmap");
success = 0;
goto exit_fd;
}
2012-12-26 21:08:18 +00:00
2012-12-27 01:05:02 +00:00
outfilename = malloc(namelen);
2012-12-27 03:58:43 +00:00
if (outfilename == NULL)
{
2012-12-27 01:05:02 +00:00
perror("malloc");
success = 0;
goto exit_munmap;
}
2012-12-26 21:08:18 +00:00
2012-12-27 03:58:43 +00:00
#define OPEN_OUTFD(ext) \
snprintf(outfilename, namelen, "%s_%08zx.%s", filename, (size_t)(ptr - filedata), ext); \
2012-12-27 03:58:43 +00:00
outfd = creat(outfilename, -1); \
if (outfd < 0) \
{ \
perror("creat"); \
success = 0; \
goto exit_free; \
} \
++ numfiles; \
2012-12-27 03:58:43 +00:00
printf("Writing: %s\n", outfilename)
2012-12-27 01:05:02 +00:00
ptr = filedata;
2012-12-27 03:58:43 +00:00
for (end = filedata + filesize; (ptr = findmagic(ptr, end, &format));)
2012-12-26 21:08:18 +00:00
{
2012-12-27 03:58:43 +00:00
switch (format)
2012-12-26 21:08:18 +00:00
{
2012-12-27 03:58:43 +00:00
case OGG:
if (ogg_ispage(ptr, end, &length) && ogg_isinitial(ptr))
{
OPEN_OUTFD("ogg");
do {
write(outfd, ptr, length);
ptr += length;
} while (ptr < end && ogg_ispage(ptr, end, &length));
close(outfd);
continue;
}
break;
2012-12-26 21:08:18 +00:00
2012-12-27 03:58:43 +00:00
case RIFF:
if (wave_ischunk(ptr, end, &length))
2012-12-27 01:05:02 +00:00
{
2012-12-27 03:58:43 +00:00
OPEN_OUTFD("wav");
write(outfd, ptr, length);
ptr += length;
close(outfd);
continue;
2012-12-27 01:05:02 +00:00
}
2012-12-27 03:58:43 +00:00
break;
case AIFF:
if (aiff_ischunk(ptr, end, &length))
{
OPEN_OUTFD("aif");
write(outfd, ptr, length);
ptr += length;
close(outfd);
continue;
}
break;
case NONE:
break;
2012-12-26 21:08:18 +00:00
}
2012-12-27 03:58:43 +00:00
ptr += 4;
}
2012-12-27 01:05:02 +00:00
exit_free:
2012-12-26 21:08:18 +00:00
free(outfilename);
2012-12-27 01:05:02 +00:00
exit_munmap:
2012-12-26 21:08:18 +00:00
munmap(filedata, filesize);
2012-12-27 01:05:02 +00:00
exit_fd:
2012-12-26 21:08:18 +00:00
close(fd);
2012-12-27 01:05:02 +00:00
exit_numfiles:
if (numfilesptr)
*numfilesptr = numfiles;
2012-12-26 21:08:18 +00:00
2012-12-27 01:05:02 +00:00
return success;
2012-12-26 21:08:18 +00:00
}
int main(int argc, char **argv)
{
2012-12-27 01:05:02 +00:00
int i = 0;
size_t failures = 0;
size_t sumnumfiles = 0;
size_t numfiles = 0;
2012-12-26 21:08:18 +00:00
if (argc < 2)
return usage(argc, argv);
2012-12-26 21:08:18 +00:00
failures = 0;
2012-12-27 01:05:02 +00:00
for (i = 1; i < argc; ++i)
{
if (extract(argv[i], &numfiles))
{
sumnumfiles += numfiles;
}
else {
fprintf(stderr, "Error processing file: %s\n", argv[i]);
failures += 1;
}
}
printf("Extracted %lu file(s).\n", numfiles);
2012-12-26 21:08:18 +00:00
if (failures > 0)
2012-12-27 01:05:02 +00:00
{
fprintf(stderr, "%zu error(s) during extraction.\n", failures);
2012-12-27 01:05:02 +00:00
return 1;
}
return 0;
2012-12-26 21:08:18 +00:00
}