Extra Project 1
Memory Tracing and Semantic Recovery using Intel Pin
Due: 9/28 Fri 11:59 PM
Late Policy: For each late day (24 hours), -1 point out of 15 points
Project is not difficult but requires time. Please start early. : )
Submission
Please upload your project files to Google Drive (or other file sharing service) and share with me (yongkwon@virginia.edu).
Then send me an email with the *link*. Please don't send the files as they maybe filtered and flagged. : )
Resource
0. Announcement [Description]
1. Pin intro. [slides]
2. Useful primitives in Pin [Note: Tips for the projects]
3. Pintool template: [Project Template (See the note)]
5. Code Samples [Note]
(1) Download VirtualBox and 5 files.
(2) Unzip the file (if you are using Mac, KeKa can extract the 7z files. 7zip is available for other OSes).
(3) Import the Ova file (extracted from 7z files) in VirtualBox.
Target Program Sample
This will give you an idea of what program you will trace and what you will be asked to do.
#include <stdio.h>
#include <stdlib.h>
void main(int argc, char** argv)
{
double dblData[20];
int nCntDbl;
float flData[20];
int nCntFl;
char* strTestString = "String Test\n";
char strData[255];
int nStrLen;
int bGoWrong = (argc > 1);
int i;
for( i = 0; i < 20; i++ ){
dblData[i] = (double)rand()/i+1;
nCntDbl++;
}
for( i = 0; i < 20; i++ ){
flData[i] = (float)rand()/i+1;
nCntFl++;
}
for( i = 0; i < strlen(strTestString ); i++ ) {
strData[i] = strTestString [i];
nStrLen++;
}
strData[i] = 0;
// access violation
if( bGoWrong ) {
printf("gowrong\n");
char* pbuf = (char*)dblData;
for( i = 0; i < strlen(strTestString); i++ ) {
pbuf[i] = strTestString[i];
}
}
printf("%f %lf %s\n", dblData[1], flData[2], strData);
return 0;
}
Assembly Code
This will give you an idea how you can infer things from the assembly code and what would be the hints.
0000000000400666 <main>:
#include <stdio.h>
#include <stdlib.h>
void main(int argc, char** argv)
{
400666: 55 push %rbp
400667: 48 89 e5 mov %rsp,%rbp
40066a: 53 push %rbx
40066b: 48 81 ec 58 02 00 00 sub $0x258,%rsp
400672: 89 bd bc fd ff ff mov %edi,-0x244(%rbp)
400678: 48 89 b5 b0 fd ff ff mov %rsi,-0x250(%rbp)
40067f: 64 48 8b 04 25 28 00 mov %fs:0x28,%rax
400686: 00 00
400688: 48 89 45 e8 mov %rax,-0x18(%rbp)
40068c: 31 c0 xor %eax,%eax
double dblData[20];
int nCntDbl;
float flData[20];
int nCntFl;
char* strTestString = "String Test\n";
40068e: 48 c7 85 e0 fd ff ff movq $0x400938,-0x220(%rbp)
////////////////////////////////////////////////////////////////
Hint: Statically, %rbp related memory buffers are local variables.
////////////////////////////////////////////////////////////////
400695: 38 09 40 00
char strData[255];
int nStrLen;
int bGoWrong = (argc > 1);
400699: 83 bd bc fd ff ff 01 cmpl $0x1,-0x244(%rbp)
////////////////////////////////////////////////////////////////
Hint: Statically, %rbp related memory buffers are local variables.
////////////////////////////////////////////////////////////////
4006a0: 0f 9f c0 setg %al
4006a3: 0f b6 c0 movzbl %al,%eax
4006a6: 89 85 dc fd ff ff mov %eax,-0x224(%rbp)
////////////////////////////////////////////////////////////////
Hint: Statically, %rbp related memory buffers are local variables.
////////////////////////////////////////////////////////////////
int i;
for( i = 0; i < 20; i++ ){
4006ac: c7 85 d8 fd ff ff 00 movl $0x0,-0x228(%rbp)
4006b3: 00 00 00
4006b6: eb 48 jmp 400700 <main+0x9a>
dblData[i] = (double)rand()/i+1;
4006b8: e8 93 fe ff ff callq 400550 <rand@plt>
4006bd: 66 0f ef c0 pxor %xmm0,%xmm0
4006c1: f2 0f 2a c0 cvtsi2sd %eax,%xmm0
4006c5: 66 0f ef c9 pxor %xmm1,%xmm1
4006c9: f2 0f 2a 8d d8 fd ff cvtsi2sdl -0x228(%rbp),%xmm1
4006d0: ff
4006d1: f2 0f 5e c1 divsd %xmm1,%xmm0
4006d5: f2 0f 10 0d 7b 02 00 movsd 0x27b(%rip),%xmm1 # 400958 <_IO_stdin_used+0x28>
4006dc: 00
4006dd: f2 0f 58 c1 addsd %xmm1,%xmm0
4006e1: 8b 85 d8 fd ff ff mov -0x228(%rbp),%eax
4006e7: 48 98 cltq
4006e9: f2 0f 11 84 c5 40 fe movsd %xmm0,-0x1c0(%rbp,%rax,8)
////////////////////////////////////////////////////////////////
Hint: Statically, %rbp related memory buffers are local variables.
%xmm0 is a floating point register. With those hints, now you can know this variable holds floating point.
////////////////////////////////////////////////////////////////
4006f0: ff ff
nCntDbl++;
4006f2: 83 85 cc fd ff ff 01 addl $0x1,-0x234(%rbp)
int nStrLen;
int bGoWrong = (argc > 1);
int i;
for( i = 0; i < 20; i++ ){
4006f9: 83 85 d8 fd ff ff 01 addl $0x1,-0x228(%rbp)
400700: 83 bd d8 fd ff ff 13 cmpl $0x13,-0x228(%rbp)
400707: 7e af jle 4006b8 <main+0x52>
dblData[i] = (double)rand()/i+1;
nCntDbl++;
}
for( i = 0; i < 20; i++ ){
400709: c7 85 d8 fd ff ff 00 movl $0x0,-0x228(%rbp)
400710: 00 00 00
400713: eb 48 jmp 40075d <main+0xf7>
flData[i] = (float)rand()/i+1;
400715: e8 36 fe ff ff callq 400550 <rand@plt>
40071a: 66 0f ef c0 pxor %xmm0,%xmm0
40071e: f3 0f 2a c0 cvtsi2ss %eax,%xmm0
400722: 66 0f ef c9 pxor %xmm1,%xmm1
400726: f3 0f 2a 8d d8 fd ff cvtsi2ssl -0x228(%rbp),%xmm1
40072d: ff
40072e: f3 0f 5e c1 divss %xmm1,%xmm0
400732: f3 0f 10 0d 26 02 00 movss 0x226(%rip),%xmm1 # 400960 <_IO_stdin_used+0x30>
400739: 00
40073a: f3 0f 58 c1 addss %xmm1,%xmm0
40073e: 8b 85 d8 fd ff ff mov -0x228(%rbp),%eax
400744: 48 98 cltq
400746: f3 0f 11 84 85 f0 fd movss %xmm0,-0x210(%rbp,%rax,4)
40074d: ff ff
nCntFl++;
40074f: 83 85 d0 fd ff ff 01 addl $0x1,-0x230(%rbp)
for( i = 0; i < 20; i++ ){
dblData[i] = (double)rand()/i+1;
nCntDbl++;
}
for( i = 0; i < 20; i++ ){
400756: 83 85 d8 fd ff ff 01 addl $0x1,-0x228(%rbp)
40075d: 83 bd d8 fd ff ff 13 cmpl $0x13,-0x228(%rbp)
400764: 7e af jle 400715 <main+0xaf>
flData[i] = (float)rand()/i+1;
nCntFl++;
}
for( i = 0; i < strlen(strTestString ); i++ ) {
400766: c7 85 d8 fd ff ff 00 movl $0x0,-0x228(%rbp)
40076d: 00 00 00
400770: eb 33 jmp 4007a5 <main+0x13f>
strData[i] = strTestString [i];
400772: 8b 85 d8 fd ff ff mov -0x228(%rbp),%eax
400778: 48 63 d0 movslq %eax,%rdx
40077b: 48 8b 85 e0 fd ff ff mov -0x220(%rbp),%rax
400782: 48 01 d0 add %rdx,%rax
400785: 0f b6 10 movzbl (%rax),%edx
400788: 8b 85 d8 fd ff ff mov -0x228(%rbp),%eax
40078e: 48 98 cltq
400790: 88 94 05 e0 fe ff ff mov %dl,-0x120(%rbp,%rax,1)
////////////////////////////////////////////////////////////////
Hint: we can infer types by looking at the size of buffer (%dl)
////////////////////////////////////////////////////////////////
nStrLen++;
400797: 83 85 d4 fd ff ff 01 addl $0x1,-0x22c(%rbp)
for( i = 0; i < 20; i++ ){
flData[i] = (float)rand()/i+1;
nCntFl++;
}
for( i = 0; i < strlen(strTestString ); i++ ) {
40079e: 83 85 d8 fd ff ff 01 addl $0x1,-0x228(%rbp)
4007a5: 8b 85 d8 fd ff ff mov -0x228(%rbp),%eax
4007ab: 48 63 d8 movslq %eax,%rbx
4007ae: 48 8b 85 e0 fd ff ff mov -0x220(%rbp),%rax
4007b5: 48 89 c7 mov %rax,%rdi
4007b8: e8 53 fd ff ff callq 400510 <strlen@plt>
////////////////////////////////////////////////////////////////
Hint: strlen function tells it is not dealing with a string
////////////////////////////////////////////////////////////////
4007bd: 48 39 c3 cmp %rax,%rbx
4007c0: 72 b0 jb 400772 <main+0x10c>
strData[i] = strTestString [i];
nStrLen++;
}
strData[i] = 0;
4007c2: 8b 85 d8 fd ff ff mov -0x228(%rbp),%eax
4007c8: 48 98 cltq
4007ca: c6 84 05 e0 fe ff ff movb $0x0,-0x120(%rbp,%rax,1)
////////////////////////////////////////////////////////////////
Hint: Copying a constant
////////////////////////////////////////////////////////////////
4007d1: 00
// access violation
if( bGoWrong ) {
4007d2: 83 bd dc fd ff ff 00 cmpl $0x0,-0x224(%rbp)
4007d9: 74 73 je 40084e <main+0x1e8>
printf("gowrong\n");
4007db: bf 45 09 40 00 mov $0x400945,%edi
4007e0: e8 1b fd ff ff callq 400500 <puts@plt>
char* pbuf = (char*)dblData;
4007e5: 48 8d 85 40 fe ff ff lea -0x1c0(%rbp),%rax
4007ec: 48 89 85 e8 fd ff ff mov %rax,-0x218(%rbp)
for( i = 0; i < strlen(strTestString); i++ ) {
4007f3: c7 85 d8 fd ff ff 00 movl $0x0,-0x228(%rbp)
4007fa: 00 00 00
4007fd: eb 32 jmp 400831 <main+0x1cb>
pbuf[i] = strTestString[i];
4007ff: 8b 85 d8 fd ff ff mov -0x228(%rbp),%eax
400805: 48 63 d0 movslq %eax,%rdx
400808: 48 8b 85 e8 fd ff ff mov -0x218(%rbp),%rax
40080f: 48 01 c2 add %rax,%rdx
400812: 8b 85 d8 fd ff ff mov -0x228(%rbp),%eax
400818: 48 63 c8 movslq %eax,%rcx
40081b: 48 8b 85 e0 fd ff ff mov -0x220(%rbp),%rax
400822: 48 01 c8 add %rcx,%rax
400825: 0f b6 00 movzbl (%rax),%eax
400828: 88 02 mov %al,(%rdx)
// access violation
if( bGoWrong ) {
printf("gowrong\n");
char* pbuf = (char*)dblData;
for( i = 0; i < strlen(strTestString); i++ ) {
40082a: 83 85 d8 fd ff ff 01 addl $0x1,-0x228(%rbp)
400831: 8b 85 d8 fd ff ff mov -0x228(%rbp),%eax
400837: 48 63 d8 movslq %eax,%rbx
40083a: 48 8b 85 e0 fd ff ff mov -0x220(%rbp),%rax
400841: 48 89 c7 mov %rax,%rdi
400844: e8 c7 fc ff ff callq 400510 <strlen@plt>
400849: 48 39 c3 cmp %rax,%rbx
40084c: 72 b1 jb 4007ff <main+0x199>
pbuf[i] = strTestString[i];
}
}
printf("%f %lf %s\n", dblData[1], flData[2], strData);
40084e: f3 0f 10 85 f8 fd ff movss -0x208(%rbp),%xmm0
400855: ff
400856: f3 0f 5a c0 cvtss2sd %xmm0,%xmm0
40085a: 48 8b 85 48 fe ff ff mov -0x1b8(%rbp),%rax
400861: 48 8d 95 e0 fe ff ff lea -0x120(%rbp),%rdx
400868: 48 89 d6 mov %rdx,%rsi
40086b: 66 0f 28 c8 movapd %xmm0,%xmm1
40086f: 48 89 85 a8 fd ff ff mov %rax,-0x258(%rbp)
400876: f2 0f 10 85 a8 fd ff movsd -0x258(%rbp),%xmm0
40087d: ff
40087e: bf 4d 09 40 00 mov $0x40094d,%edi
400883: b8 02 00 00 00 mov $0x2,%eax
400888: e8 a3 fc ff ff callq 400530 <printf@plt>
return 0;
40088d: 90 nop
}
40088e: 48 8b 45 e8 mov -0x18(%rbp),%rax
400892: 64 48 33 04 25 28 00 xor %fs:0x28,%rax
400899: 00 00
40089b: 74 05 je 4008a2 <main+0x23c>
40089d: e8 7e fc ff ff callq 400520 <__stack_chk_fail@plt>
4008a2: 48 81 c4 58 02 00 00 add $0x258,%rsp
4008a9: 5b pop %rbx
4008aa: 5d pop %rbp
4008ab: c3 retq
4008ac: 0f 1f 40 00 nopl 0x0(%rax)
00000000004008b0 <__libc_csu_init>:
4008b0: 41 57 push %r15
4008b2: 41 56 push %r14
4008b4: 41 89 ff mov %edi,%r15d
4008b7: 41 55 push %r13
4008b9: 41 54 push %r12
4008bb: 4c 8d 25 4e 05 20 00 lea 0x20054e(%rip),%r12 # 600e10 <__frame_dummy_init_array_entry>
4008c2: 55 push %rbp
4008c3: 48 8d 2d 4e 05 20 00 lea 0x20054e(%rip),%rbp # 600e18 <__init_array_end>
4008ca: 53 push %rbx
4008cb: 49 89 f6 mov %rsi,%r14
4008ce: 49 89 d5 mov %rdx,%r13
4008d1: 4c 29 e5 sub %r12,%rbp
4008d4: 48 83 ec 08 sub $0x8,%rsp
4008d8: 48 c1 fd 03 sar $0x3,%rbp
4008dc: e8 ef fb ff ff callq 4004d0 <_init>
4008e1: 48 85 ed test %rbp,%rbp
4008e4: 74 20 je 400906 <__libc_csu_init+0x56>
4008e6: 31 db xor %ebx,%ebx
4008e8: 0f 1f 84 00 00 00 00 nopl 0x0(%rax,%rax,1)
4008ef: 00
4008f0: 4c 89 ea mov %r13,%rdx
4008f3: 4c 89 f6 mov %r14,%rsi
4008f6: 44 89 ff mov %r15d,%edi
4008f9: 41 ff 14 dc callq *(%r12,%rbx,8)
4008fd: 48 83 c3 01 add $0x1,%rbx
400901: 48 39 eb cmp %rbp,%rbx
400904: 75 ea jne 4008f0 <__libc_csu_init+0x40>
400906: 48 83 c4 08 add $0x8,%rsp
40090a: 5b pop %rbx
40090b: 5d pop %rbp
40090c: 41 5c pop %r12
40090e: 41 5d pop %r13
400910: 41 5e pop %r14
400912: 41 5f pop %r15
400914: c3 retq
400915: 90 nop
400916: 66 2e 0f 1f 84 00 00 nopw %cs:0x0(%rax,%rax,1)
40091d: 00 00 00