From 8341628b1f97f3c2d71d43617102b63c054dd72f Mon Sep 17 00:00:00 2001 From: Mark McKinnon Date: Tue, 12 Apr 2016 09:18:29 -0400 Subject: [PATCH] Added files via upload --- sqlparse_v1.3.py | 435 ++++++++++++++++++++++++----------------------- 1 file changed, 218 insertions(+), 217 deletions(-) diff --git a/sqlparse_v1.3.py b/sqlparse_v1.3.py index 314de48..5b61a5e 100644 --- a/sqlparse_v1.3.py +++ b/sqlparse_v1.3.py @@ -1,217 +1,218 @@ -#sqlparse.py -# -#This program parses an SQLite3 database for deleted entires and -#places the output into either and TSV file, or text file -# -#The SQLite file format, offsets etc is described at -#sqlite.org/fileformat.html -# -# -# Copyright (C) 2015 Mari DeGrazia (arizona4n6@gmail.com) -# -# This program is free software: you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation, either version 3 of the License, or -# any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You can view the GNU General Public License at -# -# Version History: -# v1.1 2013-11-05 -# -# v1.2 2015-06-20 -#support added in to print out non b-tree pages -# -# v.1.3 2015-06-21 -#minor changes / comments etc. -# -# -#Find a bug???? Please let me know and I'll try to fix it (if you ask nicely....) -# - -import struct -from optparse import OptionParser -import sys - -#function to remove the non-printable characters, tabs and white spaces -def remove_ascii_non_printable(chunk): - chunk = ' '.join(chunk .split()) - return ''.join([ch for ch in chunk if ord(ch) > 31 and ord(ch) < 126 or ord(ch) ==9]) - - -usage = "Parse deleted records from an SQLite file into a TSV File or text file \n\ -Examples:\n\ --f /home/sanforensics/smsmms.db -o report.tsv\n\ --f /home/sanforensics/smssms.db -r -o report.txt \n" - -parser = OptionParser(usage=usage) - -parser.add_option("-f", "--file", dest = "infile", help = "sqlite database file", metavar = "smsmms.db") -parser.add_option("-o", "--output", dest = "outfile", help = "Output to a tsv file. Strips white space, tabs and non-printable characters from data field", metavar = "output.tsv") -parser.add_option("-r", "--raw", action ="store_true", dest = "raw", help = "Optional. Will out put data field in a raw format and text file.", metavar = "output.tsv") -parser.add_option("-p", "--printpages", action ="store_true", dest = "printpages", help = "Optional. Will print any printable non-whitespace chars from all non-leaf b-tree pages (in case page has been re-purposed). WARNING: May output a lot of string data.") - -(options,args)=parser.parse_args() - -#no arugments given by user,exit -if len(sys.argv) == 1: - parser.print_help() - sys.exit(0) - -#if input of output file missing, exit -if (options.infile == None) or (options.outfile == None): - parser.print_help() - print "Filename or Output file not given" - sys.exit(0) - -#open file, confirm it is an SQLite DB -try: - f=open(options.infile,"rb") -except: - print ("File not Found") - sys.exit(0) - -try: - output = open(options.outfile, 'w') -except: - print "Error opening output file" - sys.exit(0) - - -#write the column header if not outputting to text file -if options.raw !=True: - output.write("Type\tOffset\tLength\tData\n") - -#get the file size, we'll need this later -#filesize = len(f.read()) -# Cheeky suggestion ... so it doesnt read the whole file unecessarily -import os -stats = os.stat(options.infile) -filesize = stats.st_size - -#be kind, rewind (to the beginning of the file, that is) -f.seek(0) - -#verify the file is an sqlite db; read the first 16 bytes for the header -header = f.read(16) - -if "SQLite" not in header: - print ("File does not appear to be an SQLite File") - sys.exit(0) - - -#OK, lets get started. The SQLite database is made up of multiple Pages. We need to get the size of each page. -#The pagesize this is stored at offset 16 at is 2 bytes long - -pagesize = struct.unpack('>H', f.read(2))[0] - -#According to SQLite.org/fileformat.html, all the data is contained in the table-b-trees leaves. -#Let's go to each Page, read the B-Tree Header, and see if it is a table b-tree, which is designated by the flag 13 - -#set the offset to 0, so we can also process any strings in the first page -offset = 0 - -#while the offset is less then the filesize, keep processing the pages - -while offset < filesize: - - #move to the beginning of the page and read the b-tree flag, if it's 13, its a leaf table b tree and we want to process it - f.seek(offset) - flag = struct.unpack('>b',f.read(1))[0] - - if flag == 13: - - #this is a table_b_tree - get the header information which is contained in the first 8 bytes - - freeblock_offset = struct.unpack('>h',f.read(2))[0] - num_cells = struct.unpack('>h',f.read(2))[0] - cell_offset = struct.unpack('>h',f.read(2))[0] - num_free_bytes = struct.unpack('>b',f.read(1))[0] - - - #unallocated is the space after the header information and before the first cell starts - - #start after the header (8 bytes) and after the cell pointer array. The cell pointer array will be the number of cells x 2 bytes per cell - start = 8 + (num_cells * 2) - - # the length of the unallocated space will be the difference between the start and the cell offset - length = cell_offset-start - - #move to start of unallocated, then read the data (if any) in unallocated - remember, we already read in the first 8 bytes, so now we just need to move past the cell pointer array - f.read(num_cells*2) - unallocated = f.read(length) - - if options.raw == True: - output.write("Unallocated, Offset " + str(offset+start) + " Length " + str(length) + "\n") - output.write("Data:\n") - output.write((unallocated)) - output.write("\n\n") - - else: - #lets clean this up so its mainly the strings - remove white spaces and tabs too - - unallocated = remove_ascii_non_printable(unallocated ) - if unallocated != "": - output.write("Unallocated" + "\t" + str(offset+start) + "\t" + str(length) + "\t" + str(unallocated) + "\n" ) - - #if there are freeblocks, lets pull the data - - while freeblock_offset != 0: - - #move to the freeblock offset - f.seek(offset+freeblock_offset) - - #get next freeblock chain - next_fb_offset = struct.unpack('>h',f.read(2))[0] - - #get the size of this freeblock - free_block_size = struct.unpack('>hh',f.read(4))[0] - - #move to the offset so we can read the free block data - f.seek(offset+freeblock_offset) - - #read in this freeblock - free_block = f.read(free_block_size) - - if options.raw == True: - output.write("Free Block, Offset " + str(offset+freeblock_offset) + ", Length " + str(free_block_size) + "\n") - output.write("Data:\n") - output.write((free_block)) - output.write( "\n\n") - - else: - #lets clean this up so its mainly the strings - remove white spaces and tabs too - free_block = remove_ascii_non_printable(free_block) - if unallocated != "": - output.write("Free Block" + "\t" + str(offset+freeblock_offset) + "\t" + str(free_block_size) + "\t" + str(free_block) + "\n" ) - - freeblock_offset = next_fb_offset - - # Cheeky's Change: Extract strings from non-Leaf-Table B-tree pages to handle re-purposed/re-used pages - # According to docs, valid flag values are 2, 5, 10, 13 BUT pages containing string data have also been observed with flag = 0 - # So just print strings from all non flag = 13 pages. - elif (options.printpages): - # read block into one big string, filter unprintables, then print - pagestring = f.read(pagesize-1) # we've already read the flag byte - printable_pagestring = remove_ascii_non_printable(pagestring) - - if options.raw == True: - output.write("Non-Leaf-Table-Btree-Type_"+ str(flag) + ", Offset " + str(offset) + ", Length " + str(pagesize) + "\n") - output.write("Data: (ONLY PRINTABLE STRINGS ARE SHOWN HERE. FOR RAW DATA, CHECK FILE IN HEX VIEWER AT ABOVE LISTED OFFSET):\n\n") - output.write(printable_pagestring) - output.write( "\n\n") - else: - output.write("Non-Leaf-Table-Btree-Type_" + str(flag) + "\t" + str(offset) + "\t" + str(pagesize) + "\t" + printable_pagestring + "\n" ) - - #increase the offset by one pagesize and loop - offset = offset + pagesize - -output.close() - -#end - +#sqlparse.py +# +#This program parses an SQLite3 database for deleted entires and +#places the output into either and TSV file, or text file +# +#The SQLite file format, offsets etc is described at +#sqlite.org/fileformat.html +# +# +# Copyright (C) 2015 Mari DeGrazia (arizona4n6@gmail.com) +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You can view the GNU General Public License at +# +# Version History: +# v1.1 2013-11-05 +# +# v1.2 2015-06-20 +#support added in to print out non b-tree pages +# +# v.1.3 2015-06-21 +#minor changes / comments etc. +# +# +#Find a bug???? Please let me know and I'll try to fix it (if you ask nicely....) +# + +import struct +from optparse import OptionParser +import sys +import re + +#function to remove the non-printable characters, tabs and white spaces +def remove_ascii_non_printable(chunk): + chunk = ''.join(map(chr, chunk)) + return ''.join([ch for ch in chunk if ord(ch) > 31 and ord(ch) < 126 or ord(ch) ==9]) + + +usage = "Parse deleted records from an SQLite file into a TSV File or text file \n\ +Examples:\n\ +-f /home/sanforensics/smsmms.db -o report.tsv\n\ +-f /home/sanforensics/smssms.db -r -o report.txt \n" + +parser = OptionParser(usage=usage) + +parser.add_option("-f", "--file", dest = "infile", help = "sqlite database file", metavar = "smsmms.db") +parser.add_option("-o", "--output", dest = "outfile", help = "Output to a tsv file. Strips white space, tabs and non-printable characters from data field", metavar = "output.tsv") +parser.add_option("-r", "--raw", action ="store_true", dest = "raw", help = "Optional. Will out put data field in a raw format and text file.", metavar = "output.tsv") +parser.add_option("-p", "--printpages", action ="store_true", dest = "printpages", help = "Optional. Will print any printable non-whitespace chars from all non-leaf b-tree pages (in case page has been re-purposed). WARNING: May output a lot of string data.") + +(options,args)=parser.parse_args() + +#no arugments given by user,exit +if len(sys.argv) == 1: + parser.print_help() + sys.exit(0) + +#if input of output file missing, exit +if (options.infile == None) or (options.outfile == None): + parser.print_help() + print ("Filename or Output file not given") + sys.exit(0) + +#open file, confirm it is an SQLite DB +try: + f=open(options.infile,"rb") +except: + print ("File not Found") + sys.exit(0) + +try: + output = open(options.outfile, 'w') +except: + print ("Error opening output file") + sys.exit(0) + + +#write the column header if not outputting to text file +if options.raw !=True: + output.write("Type\tOffset\tLength\tData\n") + +#get the file size, we'll need this later +#filesize = len(f.read()) +# Cheeky suggestion ... so it doesnt read the whole file unecessarily +import os +stats = os.stat(options.infile) +filesize = stats.st_size + +#be kind, rewind (to the beginning of the file, that is) +f.seek(0) + +#verify the file is an sqlite db; read the first 16 bytes for the header +header = f.read(16) + +if b"SQLite" not in header: + print ("File does not appear to be an SQLite File") + sys.exit(0) + + +#OK, lets get started. The SQLite database is made up of multiple Pages. We need to get the size of each page. +#The pagesize this is stored at offset 16 at is 2 bytes long + +pagesize = struct.unpack('>H', f.read(2))[0] + +#According to SQLite.org/fileformat.html, all the data is contained in the table-b-trees leaves. +#Let's go to each Page, read the B-Tree Header, and see if it is a table b-tree, which is designated by the flag 13 + +#set the offset to 0, so we can also process any strings in the first page +offset = 0 + +#while the offset is less then the filesize, keep processing the pages + +while offset < filesize: + + #move to the beginning of the page and read the b-tree flag, if it's 13, its a leaf table b tree and we want to process it + f.seek(offset) + flag = struct.unpack('>b',f.read(1))[0] + + if flag == 13: + + #this is a table_b_tree - get the header information which is contained in the first 8 bytes + + freeblock_offset = struct.unpack('>h',f.read(2))[0] + num_cells = struct.unpack('>h',f.read(2))[0] + cell_offset = struct.unpack('>h',f.read(2))[0] + num_free_bytes = struct.unpack('>b',f.read(1))[0] + + + #unallocated is the space after the header information and before the first cell starts + + #start after the header (8 bytes) and after the cell pointer array. The cell pointer array will be the number of cells x 2 bytes per cell + start = 8 + (num_cells * 2) + + # the length of the unallocated space will be the difference between the start and the cell offset + length = cell_offset-start + + #move to start of unallocated, then read the data (if any) in unallocated - remember, we already read in the first 8 bytes, so now we just need to move past the cell pointer array + f.read(num_cells*2) + unallocated = f.read(length) + + if options.raw == True: + output.write("Unallocated, Offset " + str(offset+start) + " Length " + str(length) + "\n") + output.write("Data:\n") + output.write((unallocated)) + output.write("\n\n") + + else: + #lets clean this up so its mainly the strings - remove white spaces and tabs too + + unallocated = remove_ascii_non_printable(unallocated ) + if unallocated != "": + output.write("Unallocated" + "\t" + str(offset+start) + "\t" + str(length) + "\t" + re.sub('\s+',' ', str(unallocated)) + "\n" ) + + #if there are freeblocks, lets pull the data + + while freeblock_offset != 0: + + #move to the freeblock offset + f.seek(offset+freeblock_offset) + + #get next freeblock chain + next_fb_offset = struct.unpack('>h',f.read(2))[0] + + #get the size of this freeblock + free_block_size = struct.unpack('>hh',f.read(4))[0] + + #move to the offset so we can read the free block data + f.seek(offset+freeblock_offset) + + #read in this freeblock + free_block = f.read(free_block_size) + + if options.raw == True: + output.write("Free Block, Offset " + str(offset+freeblock_offset) + ", Length " + str(free_block_size) + "\n") + output.write("Data:\n") + output.write((free_block)) + output.write( "\n\n") + + else: + #lets clean this up so its mainly the strings - remove white spaces and tabs too + free_block = remove_ascii_non_printable(free_block) + if unallocated != "": + output.write("Free Block" + "\t" + str(offset+freeblock_offset) + "\t" + str(free_block_size) + "\t" + re.sub('\s+',' ', str(free_block)) + "\n" ) + + freeblock_offset = next_fb_offset + + # Cheeky's Change: Extract strings from non-Leaf-Table B-tree pages to handle re-purposed/re-used pages + # According to docs, valid flag values are 2, 5, 10, 13 BUT pages containing string data have also been observed with flag = 0 + # So just print strings from all non flag = 13 pages. + elif (options.printpages): + # read block into one big string, filter unprintables, then print + pagestring = f.read(pagesize-1) # we've already read the flag byte + printable_pagestring = remove_ascii_non_printable(pagestring) + + if options.raw == True: + output.write("Non-Leaf-Table-Btree-Type_"+ str(flag) + ", Offset " + str(offset) + ", Length " + str(pagesize) + "\n") + output.write("Data: (ONLY PRINTABLE STRINGS ARE SHOWN HERE. FOR RAW DATA, CHECK FILE IN HEX VIEWER AT ABOVE LISTED OFFSET):\n\n") + output.write(printable_pagestring) + output.write( "\n\n") + else: + output.write("Non-Leaf-Table-Btree-Type_" + str(flag) + "\t" + str(offset) + "\t" + str(pagesize) + "\t" + re.sub('\s+',' ', printable_pagestring) + "\n" ) + + #increase the offset by one pagesize and loop + offset = offset + pagesize + +output.close() + +#end +