Extra Project 1

Memory Tracing and Semantic Recovery using Intel Pin

Due: 9/28 Fri 11:59 PM

Late Policy: For each late day (24 hours), -1 point out of 15 points

Project is not difficult but requires time. Please start early. : )

Submission

Please upload your project files to Google Drive (or other file sharing service) and share with me (yongkwon@virginia.edu).

Then send me an email with the *link*. Please don't send the files as they maybe filtered and flagged. : )

Resource

0. Announcement [Description]

1. Pin intro. [slides]

2. Useful primitives in Pin [Note: Tips for the projects]

3. Pintool template: [Project Template (See the note)]

4. Target Program Sample [C Source, Asm (objdump)]

5. Code Samples [Note]

6. Virtual Machine (Linux) [1, 2, 3, 4, 5]

(1) Download VirtualBox and 5 files.

(2) Unzip the file (if you are using Mac, KeKa can extract the 7z files. 7zip is available for other OSes).

(3) Import the Ova file (extracted from 7z files) in VirtualBox.

Target Program Sample

This will give you an idea of what program you will trace and what you will be asked to do.

#include <stdio.h>

#include <stdlib.h>


void main(int argc, char** argv)

{

double dblData[20];

int nCntDbl;

float flData[20];

int nCntFl;

char* strTestString = "String Test\n";

char strData[255];

int nStrLen;

int bGoWrong = (argc > 1);

int i;

for( i = 0; i < 20; i++ ){

dblData[i] = (double)rand()/i+1;

nCntDbl++;

}

for( i = 0; i < 20; i++ ){

flData[i] = (float)rand()/i+1;

nCntFl++;

}

for( i = 0; i < strlen(strTestString ); i++ ) {

strData[i] = strTestString [i];

nStrLen++;

}

strData[i] = 0;

// access violation

if( bGoWrong ) {

printf("gowrong\n");

char* pbuf = (char*)dblData;

for( i = 0; i < strlen(strTestString); i++ ) {

pbuf[i] = strTestString[i];

}

}

printf("%f %lf %s\n", dblData[1], flData[2], strData);

return 0;

}

Assembly Code

This will give you an idea how you can infer things from the assembly code and what would be the hints.

0000000000400666 <main>:

#include <stdio.h>

#include <stdlib.h>


void main(int argc, char** argv)

{

400666: 55 push %rbp

400667: 48 89 e5 mov %rsp,%rbp

40066a: 53 push %rbx

40066b: 48 81 ec 58 02 00 00 sub $0x258,%rsp

400672: 89 bd bc fd ff ff mov %edi,-0x244(%rbp)

400678: 48 89 b5 b0 fd ff ff mov %rsi,-0x250(%rbp)

40067f: 64 48 8b 04 25 28 00 mov %fs:0x28,%rax

400686: 00 00

400688: 48 89 45 e8 mov %rax,-0x18(%rbp)

40068c: 31 c0 xor %eax,%eax

double dblData[20];

int nCntDbl;

float flData[20];

int nCntFl;

char* strTestString = "String Test\n";

40068e: 48 c7 85 e0 fd ff ff movq $0x400938,-0x220(%rbp)

////////////////////////////////////////////////////////////////

Hint: Statically, %rbp related memory buffers are local variables.

////////////////////////////////////////////////////////////////

400695: 38 09 40 00

char strData[255];

int nStrLen;

int bGoWrong = (argc > 1);

400699: 83 bd bc fd ff ff 01 cmpl $0x1,-0x244(%rbp)

////////////////////////////////////////////////////////////////

Hint: Statically, %rbp related memory buffers are local variables.

////////////////////////////////////////////////////////////////

4006a0: 0f 9f c0 setg %al

4006a3: 0f b6 c0 movzbl %al,%eax

4006a6: 89 85 dc fd ff ff mov %eax,-0x224(%rbp)

////////////////////////////////////////////////////////////////

Hint: Statically, %rbp related memory buffers are local variables.

////////////////////////////////////////////////////////////////

int i;

for( i = 0; i < 20; i++ ){

4006ac: c7 85 d8 fd ff ff 00 movl $0x0,-0x228(%rbp)

4006b3: 00 00 00

4006b6: eb 48 jmp 400700 <main+0x9a>

dblData[i] = (double)rand()/i+1;

4006b8: e8 93 fe ff ff callq 400550 <rand@plt>

4006bd: 66 0f ef c0 pxor %xmm0,%xmm0

4006c1: f2 0f 2a c0 cvtsi2sd %eax,%xmm0

4006c5: 66 0f ef c9 pxor %xmm1,%xmm1

4006c9: f2 0f 2a 8d d8 fd ff cvtsi2sdl -0x228(%rbp),%xmm1

4006d0: ff

4006d1: f2 0f 5e c1 divsd %xmm1,%xmm0

4006d5: f2 0f 10 0d 7b 02 00 movsd 0x27b(%rip),%xmm1 # 400958 <_IO_stdin_used+0x28>

4006dc: 00

4006dd: f2 0f 58 c1 addsd %xmm1,%xmm0

4006e1: 8b 85 d8 fd ff ff mov -0x228(%rbp),%eax

4006e7: 48 98 cltq

4006e9: f2 0f 11 84 c5 40 fe movsd %xmm0,-0x1c0(%rbp,%rax,8)

////////////////////////////////////////////////////////////////

Hint: Statically, %rbp related memory buffers are local variables.

%xmm0 is a floating point register. With those hints, now you can know this variable holds floating point.

////////////////////////////////////////////////////////////////

4006f0: ff ff

nCntDbl++;

4006f2: 83 85 cc fd ff ff 01 addl $0x1,-0x234(%rbp)

int nStrLen;

int bGoWrong = (argc > 1);

int i;

for( i = 0; i < 20; i++ ){

4006f9: 83 85 d8 fd ff ff 01 addl $0x1,-0x228(%rbp)

400700: 83 bd d8 fd ff ff 13 cmpl $0x13,-0x228(%rbp)

400707: 7e af jle 4006b8 <main+0x52>

dblData[i] = (double)rand()/i+1;

nCntDbl++;

}

for( i = 0; i < 20; i++ ){

400709: c7 85 d8 fd ff ff 00 movl $0x0,-0x228(%rbp)

400710: 00 00 00

400713: eb 48 jmp 40075d <main+0xf7>

flData[i] = (float)rand()/i+1;

400715: e8 36 fe ff ff callq 400550 <rand@plt>

40071a: 66 0f ef c0 pxor %xmm0,%xmm0

40071e: f3 0f 2a c0 cvtsi2ss %eax,%xmm0

400722: 66 0f ef c9 pxor %xmm1,%xmm1

400726: f3 0f 2a 8d d8 fd ff cvtsi2ssl -0x228(%rbp),%xmm1

40072d: ff

40072e: f3 0f 5e c1 divss %xmm1,%xmm0

400732: f3 0f 10 0d 26 02 00 movss 0x226(%rip),%xmm1 # 400960 <_IO_stdin_used+0x30>

400739: 00

40073a: f3 0f 58 c1 addss %xmm1,%xmm0

40073e: 8b 85 d8 fd ff ff mov -0x228(%rbp),%eax

400744: 48 98 cltq

400746: f3 0f 11 84 85 f0 fd movss %xmm0,-0x210(%rbp,%rax,4)

40074d: ff ff

nCntFl++;

40074f: 83 85 d0 fd ff ff 01 addl $0x1,-0x230(%rbp)

for( i = 0; i < 20; i++ ){

dblData[i] = (double)rand()/i+1;

nCntDbl++;

}

for( i = 0; i < 20; i++ ){

400756: 83 85 d8 fd ff ff 01 addl $0x1,-0x228(%rbp)

40075d: 83 bd d8 fd ff ff 13 cmpl $0x13,-0x228(%rbp)

400764: 7e af jle 400715 <main+0xaf>

flData[i] = (float)rand()/i+1;

nCntFl++;

}

for( i = 0; i < strlen(strTestString ); i++ ) {

400766: c7 85 d8 fd ff ff 00 movl $0x0,-0x228(%rbp)

40076d: 00 00 00

400770: eb 33 jmp 4007a5 <main+0x13f>

strData[i] = strTestString [i];

400772: 8b 85 d8 fd ff ff mov -0x228(%rbp),%eax

400778: 48 63 d0 movslq %eax,%rdx

40077b: 48 8b 85 e0 fd ff ff mov -0x220(%rbp),%rax

400782: 48 01 d0 add %rdx,%rax

400785: 0f b6 10 movzbl (%rax),%edx

400788: 8b 85 d8 fd ff ff mov -0x228(%rbp),%eax

40078e: 48 98 cltq

400790: 88 94 05 e0 fe ff ff mov %dl,-0x120(%rbp,%rax,1)

////////////////////////////////////////////////////////////////

Hint: we can infer types by looking at the size of buffer (%dl)

////////////////////////////////////////////////////////////////

nStrLen++;

400797: 83 85 d4 fd ff ff 01 addl $0x1,-0x22c(%rbp)

for( i = 0; i < 20; i++ ){

flData[i] = (float)rand()/i+1;

nCntFl++;

}

for( i = 0; i < strlen(strTestString ); i++ ) {

40079e: 83 85 d8 fd ff ff 01 addl $0x1,-0x228(%rbp)

4007a5: 8b 85 d8 fd ff ff mov -0x228(%rbp),%eax

4007ab: 48 63 d8 movslq %eax,%rbx

4007ae: 48 8b 85 e0 fd ff ff mov -0x220(%rbp),%rax

4007b5: 48 89 c7 mov %rax,%rdi

4007b8: e8 53 fd ff ff callq 400510 <strlen@plt>

////////////////////////////////////////////////////////////////

Hint: strlen function tells it is not dealing with a string

////////////////////////////////////////////////////////////////

4007bd: 48 39 c3 cmp %rax,%rbx

4007c0: 72 b0 jb 400772 <main+0x10c>

strData[i] = strTestString [i];

nStrLen++;

}

strData[i] = 0;

4007c2: 8b 85 d8 fd ff ff mov -0x228(%rbp),%eax

4007c8: 48 98 cltq

4007ca: c6 84 05 e0 fe ff ff movb $0x0,-0x120(%rbp,%rax,1)

////////////////////////////////////////////////////////////////

Hint: Copying a constant

////////////////////////////////////////////////////////////////

4007d1: 00

// access violation

if( bGoWrong ) {

4007d2: 83 bd dc fd ff ff 00 cmpl $0x0,-0x224(%rbp)

4007d9: 74 73 je 40084e <main+0x1e8>

printf("gowrong\n");

4007db: bf 45 09 40 00 mov $0x400945,%edi

4007e0: e8 1b fd ff ff callq 400500 <puts@plt>

char* pbuf = (char*)dblData;

4007e5: 48 8d 85 40 fe ff ff lea -0x1c0(%rbp),%rax

4007ec: 48 89 85 e8 fd ff ff mov %rax,-0x218(%rbp)

for( i = 0; i < strlen(strTestString); i++ ) {

4007f3: c7 85 d8 fd ff ff 00 movl $0x0,-0x228(%rbp)

4007fa: 00 00 00

4007fd: eb 32 jmp 400831 <main+0x1cb>

pbuf[i] = strTestString[i];

4007ff: 8b 85 d8 fd ff ff mov -0x228(%rbp),%eax

400805: 48 63 d0 movslq %eax,%rdx

400808: 48 8b 85 e8 fd ff ff mov -0x218(%rbp),%rax

40080f: 48 01 c2 add %rax,%rdx

400812: 8b 85 d8 fd ff ff mov -0x228(%rbp),%eax

400818: 48 63 c8 movslq %eax,%rcx

40081b: 48 8b 85 e0 fd ff ff mov -0x220(%rbp),%rax

400822: 48 01 c8 add %rcx,%rax

400825: 0f b6 00 movzbl (%rax),%eax

400828: 88 02 mov %al,(%rdx)

// access violation

if( bGoWrong ) {

printf("gowrong\n");

char* pbuf = (char*)dblData;

for( i = 0; i < strlen(strTestString); i++ ) {

40082a: 83 85 d8 fd ff ff 01 addl $0x1,-0x228(%rbp)

400831: 8b 85 d8 fd ff ff mov -0x228(%rbp),%eax

400837: 48 63 d8 movslq %eax,%rbx

40083a: 48 8b 85 e0 fd ff ff mov -0x220(%rbp),%rax

400841: 48 89 c7 mov %rax,%rdi

400844: e8 c7 fc ff ff callq 400510 <strlen@plt>

400849: 48 39 c3 cmp %rax,%rbx

40084c: 72 b1 jb 4007ff <main+0x199>

pbuf[i] = strTestString[i];

}

}

printf("%f %lf %s\n", dblData[1], flData[2], strData);

40084e: f3 0f 10 85 f8 fd ff movss -0x208(%rbp),%xmm0

400855: ff

400856: f3 0f 5a c0 cvtss2sd %xmm0,%xmm0

40085a: 48 8b 85 48 fe ff ff mov -0x1b8(%rbp),%rax

400861: 48 8d 95 e0 fe ff ff lea -0x120(%rbp),%rdx

400868: 48 89 d6 mov %rdx,%rsi

40086b: 66 0f 28 c8 movapd %xmm0,%xmm1

40086f: 48 89 85 a8 fd ff ff mov %rax,-0x258(%rbp)

400876: f2 0f 10 85 a8 fd ff movsd -0x258(%rbp),%xmm0

40087d: ff

40087e: bf 4d 09 40 00 mov $0x40094d,%edi

400883: b8 02 00 00 00 mov $0x2,%eax

400888: e8 a3 fc ff ff callq 400530 <printf@plt>

return 0;

40088d: 90 nop

}

40088e: 48 8b 45 e8 mov -0x18(%rbp),%rax

400892: 64 48 33 04 25 28 00 xor %fs:0x28,%rax

400899: 00 00

40089b: 74 05 je 4008a2 <main+0x23c>

40089d: e8 7e fc ff ff callq 400520 <__stack_chk_fail@plt>

4008a2: 48 81 c4 58 02 00 00 add $0x258,%rsp

4008a9: 5b pop %rbx

4008aa: 5d pop %rbp

4008ab: c3 retq

4008ac: 0f 1f 40 00 nopl 0x0(%rax)


00000000004008b0 <__libc_csu_init>:

4008b0: 41 57 push %r15

4008b2: 41 56 push %r14

4008b4: 41 89 ff mov %edi,%r15d

4008b7: 41 55 push %r13

4008b9: 41 54 push %r12

4008bb: 4c 8d 25 4e 05 20 00 lea 0x20054e(%rip),%r12 # 600e10 <__frame_dummy_init_array_entry>

4008c2: 55 push %rbp

4008c3: 48 8d 2d 4e 05 20 00 lea 0x20054e(%rip),%rbp # 600e18 <__init_array_end>

4008ca: 53 push %rbx

4008cb: 49 89 f6 mov %rsi,%r14

4008ce: 49 89 d5 mov %rdx,%r13

4008d1: 4c 29 e5 sub %r12,%rbp

4008d4: 48 83 ec 08 sub $0x8,%rsp

4008d8: 48 c1 fd 03 sar $0x3,%rbp

4008dc: e8 ef fb ff ff callq 4004d0 <_init>

4008e1: 48 85 ed test %rbp,%rbp

4008e4: 74 20 je 400906 <__libc_csu_init+0x56>

4008e6: 31 db xor %ebx,%ebx

4008e8: 0f 1f 84 00 00 00 00 nopl 0x0(%rax,%rax,1)

4008ef: 00

4008f0: 4c 89 ea mov %r13,%rdx

4008f3: 4c 89 f6 mov %r14,%rsi

4008f6: 44 89 ff mov %r15d,%edi

4008f9: 41 ff 14 dc callq *(%r12,%rbx,8)

4008fd: 48 83 c3 01 add $0x1,%rbx

400901: 48 39 eb cmp %rbp,%rbx

400904: 75 ea jne 4008f0 <__libc_csu_init+0x40>

400906: 48 83 c4 08 add $0x8,%rsp

40090a: 5b pop %rbx

40090b: 5d pop %rbp

40090c: 41 5c pop %r12

40090e: 41 5d pop %r13

400910: 41 5e pop %r14

400912: 41 5f pop %r15

400914: c3 retq

400915: 90 nop

400916: 66 2e 0f 1f 84 00 00 nopw %cs:0x0(%rax,%rax,1)

40091d: 00 00 00