parallel tar Exctractor c++ program to read tar file and extract files from it.
ID: 3749781 • Letter: P
Question
parallel tar Exctractor
c++ program to read tar file and extract files from it. the program will fork of a copy
of itself to hand each file in archive.
We will
limit the archive to hold only normal files (no links or special files). The format for the
header of a tar file is described as:
The header record for an old-style tar archive consists of the following:
struct header_old_tar {
char name[100];
char mode[8];
char uid[8];
char gid[8];
char size[12];
char mtime[12];
char checksum[8];
char linkflag[1];
char linkname[100];
char pad[255];
};
All unused bytes in the header record are filled with nulls.
Explanation / Answer
#define ASCII_TO_NUMBER(num) ((num)-48)
static uint64_t decodeTarOctal(char* info, size_t siz = 12) {
unsigned char* cur_ptr = (unsigned char*) info + siz;
uint64_t total = 0;
uint64_t cur_multiplier = 1;
unsigned char* checkPtr = cur_ptr;
for (; checkPtr >= (unsigned char*) info; checkPtr--) {
if ((*checkPtr) == 0 || (*checkPtr) == ' ') {
cur_ptr = checkPtr - 1;
}
}
for (; cur_ptr >= (unsigned char*) info; cur_ptr--) {
total += ASCII_TO_NUMBER(*cur_ptr) * cur_multiplier;
cur_multiplier *= 8;
}
return total;
}
Header info structure
struct TARFileHeader {
char filename[100];
char mod[8];
char u_id[8];
char gid[8];
char file_siz[12];
char lastModification[12];
char check_sum[8];
char typeFlag;
char linked_filename[100];
char ustarIndicator[6];
char u_star_Version[2];
char owner_userName[32];
char owner_groupname[32];
char deviceMajor_Number[8];
char deviceMinor_Number[8];
char filenamePrefix[155];
char pading[12];
bool isUSTAR() {
return (memcmp("ustar", ustarIndicator, 5) == 0);
}
size_t getFileSize() {
return decodeTarOctal(file_siz);
}
bool check_total() {
char original_checksum[8];
memcpy(original_checksum, check_sum, 8);
memset(check_sum, ' ', 8);
int64_t unsigned_sum = 0;
int64_t signedSum = 0;
for (int i = 0; i < sizeof (TARFileHeader); i++) {
unsigned_sum += ((unsigned char*) this)[i];
signedSum += ((signed char*) this)[i];
}
memcpy(check_sum, original_checksum, 8);
uint64_t referenceChecksum = decodeTarOctal(original_checksum);
return (referenceChecksum == unsigned_sum || referenceChecksum == signedSum);
}
};
ANSI-C implementation. It might be possible, but decompression isn’t nearly as plug-and-play as with boost::iostreams.
Complete source code:
#include <cstdlib>
#include <cassert>
#include <cstdio>
#include <fstream>
#include <cmath>
#include <iostream>
#include <boost/iostreams/device/file.hpp>
#include <boost/iostreams/filtering_stream.hpp>
#include <boost/iostreams/filter/gzip.hpp>
#include <boost/iostreams/filter/bzip2.hpp>
#include <boost/algorithm/string.hpp>
using namespace std;
using namespace boost::iostreams;
#define ASCII_TO_NUMBER(num) ((num)-48)
static uint64_t decodeTarOctal(char* info, size_t siz = 12) {
unsigned char* cur_ptr = (unsigned char*) info + siz;
uint64_t total = 0;
uint64_t cur_multiplier = 1;
unsigned char* check_Ptr = cur_ptr;
for (; check_Ptr >= (unsigned char*) info; check_Ptr--) {
if ((*check_Ptr) == 0 || (*check_Ptr) == ' ') {
cur_ptr = check_Ptr - 1;
}
}
for (; cur_ptr >= (unsigned char*) info; cur_ptr--) {
total += ASCII_TO_NUMBER(*cur_ptr) * cur_multiplier;
cur_multiplier *= 8;
}
return total;
}
struct TARFileHeader {
char filename[100];
char mod[8];
char u_id[8];
char gid[8];
char file_siz[12];
char lastModification[12];
char check_sum[8];
char typeFlag;
char linked_filename[100];
char ustarIndicator[6];
char u_star_Version[2];
char owner_userName[32];
char owner_groupname[32];
char deviceMajor_Number[8];
char deviceMinor_Number[8];
char filenamePrefix[155];
char pading[12];
bool isUSTAR() {
return (memcmp("ustar", ustarIndicator, 5) == 0);
}
size_t getFileSize() {
return decodeTarOctal(file_siz);
}
bool check_total() {
char original_checksum[8];
memcpy(original_checksum, check_sum, 8);
memset(check_sum, ' ', 8);
int64_t unsigned_sum = 0;
int64_t signedSum = 0;
for (int i = 0; i < sizeof (TARFileHeader); i++) {
unsigned_sum += ((unsigned char*) this)[i];
signedSum += ((signed char*) this)[i];
}
memcpy(check_sum, original_checksum, 8);
uint64_t referenceChecksum = decodeTarOctal(original_checksum);
return (referenceChecksum == unsigned_sum || referenceChecksum == signedSum);
}
};
int main(int argc, char** argv) {
if (argc < 2) {
cerr << "Usage: " << argv[0] << " <TAR archive>" << endl;
return 1;
}
ifstream fin(argv[1], ios_base::in | ios_base::binary);
filtering_istream in;
string filename(argv[1]);
if (boost::algorithm::iends_with(filename, ".gz")) {
in.push(gzip_decompressor());
} else if (boost::algorithm::iends_with(filename, ".bz2")) {
in.push(bzip2_decompressor());
} else if (boost::algorithm::iends_with(filename, ".tar")) {
} else {
cerr << "Unknown file suffix: " << filename << endl;
return 1;
}
in.push(fin);
char zeroblock[512];
memset(zeroblock, 0, 512);
bool nextEntryHasLongName = false;
while (in) {
TARFileHeader currentFileHeader;
in.read((char*) ¤tFileHeader, 512);
if(memcmp(¤tFileHeader, zeroblock, 512) == 0) {
cout << "Found TAR end ";
break;
}
string filename(currentFileHeader.filename, min((size_t)100, strlen(currentFileHeader.filename)));
size_t prefixLength = strlen(currentFileHeader.filenamePrefix);
if(prefixLength > 0) {
filename = string(currentFileHeader.filenamePrefix, min((size_t)155, prefixLength)) + "/" + filename;
}
if (currentFileHeader.typeFlag == '0' || currentFileHeader.typeFlag == 0) {
if(nextEntryHasLongName) {
filename = string(currentFileHeader.filename);
in.read((char*) ¤tFileHeader, 512);
nextEntryHasLongName = false;
}
size_t siz = currentFileHeader.getFileSize();
cout << "Found file '" << filename << "' (" << siz << " bytes) ";
char* fileData = new char[siz + 1];
in.read(fileData, siz);
delete[] fileData;
size_t paddingBytes = (512 - (siz % 512)) % 512;
in.ignore(paddingBytes);
} else if (currentFileHeader.typeFlag == '5') {
cout << "Found directory '" << filename << "' ";
} else if(currentFileHeader.typeFlag == 'L') {
nextEntryHasLongName = true;
} else {
cout << "Found unhandled TAR Entry type " << currentFileHeader.typeFlag << " ";
}
}
fin.close();
}
Related Questions
Navigate
Integrity-first tutoring: explanations and feedback only — we do not complete graded work. Learn more.