As an example I’m using the hexdump executable from the REC homepage. The program named hexdump reads the files from the command line and outputs the content in hex and ascii. A sample output looks like
00000000: 7F 45 4C 46 01 01 01 00 00 00 00 00 00 00 00 00.ELF............|
00000010: 02 00 03 00 01 00 00 00 A0 85 04 08 34 00 00 00............4...|
00000020: 40 23 00 00 00 00 00 00 34 00 20 00 05 00 28 00@#......4. ...(.|
00000030: 19 00 16 00 06 00 00 00 34 00 00 00 34 80 04 08........4...4...|
00000040: 34 80 04 08 A0 00 00 00 A0 00 00 00 05 00 00 004...............|
00000050: 04 00 00 00 03 00 00 00 D4 00 00 00 D4 80 04 08................|
...
The original source code is the following:
#include <stdio.h>
#include <sys/stat.h>
void dumpline(unsigned char *p, unsigned long offset, int cnt)
{
char buff[80];
int c;
int len;
sprintf(buff, "%08lX:", offset);
if(cnt > 16)
cnt = 16;
for(c = 0; c < cnt; ++c)
sprintf(buff + c * 3 + 9, " %02lX", p[c]);
while(c++ < 16)
strcat(buff, " ");
len = strlen(buff);
strcpy(buff + len, " |");
for(c = 0; c < cnt; ++c)
buff[len + c] = (p[c] >= ' ' && p[c] <= 0x7e) ? p[c] : '.';
while(c < 16)
buff[len + c++] = ' ';
strcpy(buff + len + c, "|");
printf("%s\n", buff);
}
int hexdump(char *fname)
{
unsigned char buff[16];
unsigned long offset;
FILE *fp;
struct stat st;
int cnt;
if(!stat(fname, &st)) {
perror(fname);
return(1);
}
if(!(fp = fopen(fname, "rb"))) {
perror(fname);
return(1);
}
offset = 0;
while(offset < st.st_size) {
cnt = fread(buff, 1, sizeof(buff), fp);
if(!cnt)
break;
dumpline(buff, offset, cnt);
offset += cnt;
}
fclose(fp);
return(0);
}
int main(int argc, char *argv[])
{
int i;
int errs;
errs = 0;
for(i = 1; i < argc; ++i)
errs += hexdump(argv[i]);
return(errs);
}
As you can see three user functions and some library calls.
Using the current version of holdec the decompiled output looks like:
// address: 08048660.0
// full-signature: func(dumpline, return=[], parameter=[<ptr(int(unsigned, 1)),p,unknown>, <int(unsigned, 4),offset,unknown>, <int(undef, 4),cnt,unknown>], varargs=false)
void dumpline(u1* p, u4 offset, d4 cnt)
{
(void) sprintf(&buffer, "%08lX:", offset);
if(cnt > 16) {
(void) STORE(&cnt, 16);
}
reg_var1 = 0;
while(reg_var1 < cnt) {
(void) sprintf(&buffer + reg_var1 * 3 + 9, " %02lX", UNSIGNED_EXTEND(LOAD(p + reg_var1)));
reg_var1++;
}
while(1) {
reg_var1++;
if(reg_var1 + -1 > 15) {
break;
}
(void) strcat(&buffer, " ");
}
reg_f = strlen(&buffer);
(void) strcpy(&buffer + reg_f, " |");
reg_var2 = 0;
while(reg_var2 < cnt) {
reg_l = LOAD(p + reg_f + reg_var2) < 32 || LOAD(p + reg_f + reg_var2) > 126 ? 46 : LOAD(p + reg_f + reg_var2);
(void) STORE(&buffer + reg_f + reg_var2, reg_l);
reg_var2 += reg_f + 1;
}
for(...) {
// state = PASSIVE_USED_IN_MULTIPLE_JUMPS
while(reg_var2 < 16) {
(void) STORE(&buffer + reg_f + reg_var2, 32);
reg_var2++;
}
}
(void) strcpy(&buffer + reg_f + reg_var2, "|");
(void) printf("%s\n", &buffer);
}
// address: 080487f0.0
// full-signature: func(hexdump, return=[<int(undef, 4),null,unknown>], parameter=[<ptr(int(undef, 1)),fname,unknown>], varargs=false)
d4 hexdump(d1* fname)
{
reg_a = stat(fname, &stat_buffer);
if(reg_a == 0) {
(void) perror(fname);
reg_result = 1;
} else {
reg_c = fopen(fname, "rb");
if(reg_c == 0) {
(void) perror(fname);
reg_result = 1;
} else {
reg_var1 = 0;
while(reg_var1 < LOAD(&stat_buffer + 20)) {
reg_g = fread(&read_buffer, 1, 16, reg_c);
if(reg_g == 0) {
break;
}
(void) dumpline(&read_buffer, reg_var1, reg_g);
reg_var1 += reg_g;
}
(void) fclose(reg_c);
reg_result = 0;
}
}
return reg_result;
}
// address: 080488e0.0
// full-signature: func(main, return=[<int(undef, 4),null,unknown>], parameter=[<int(undef, 4),argc,unknown>, <ptr(ptr(int(undef, 1))),argv,unknown>], varargs=false)
d4 main(d4 argc, d1** argv)
{
reg_result = 0;
reg_var1 = 1;
while(reg_var1 < argc) {
reg_e = hexdump(LOAD(argv + reg_var1 * 4));
reg_result += reg_e;
reg_var1++;
}
return reg_result;
}
This is still work in progress since obviously some aspects are still open:
- The type system is not running again. With it in place the LOAD and STORE would be replaced by pointer or
struct
accesses. The function signatures also depend on the type system.