From 8341628b1f97f3c2d71d43617102b63c054dd72f Mon Sep 17 00:00:00 2001
From: Mark McKinnon <mark.mckinnon@davenport.edu>
Date: Tue, 12 Apr 2016 09:18:29 -0400
Subject: [PATCH] Added files via upload

---
 sqlparse_v1.3.py | 435 ++++++++++++++++++++++++-----------------------
 1 file changed, 218 insertions(+), 217 deletions(-)

diff --git a/sqlparse_v1.3.py b/sqlparse_v1.3.py
index 314de48..5b61a5e 100644
--- a/sqlparse_v1.3.py
+++ b/sqlparse_v1.3.py
@@ -1,217 +1,218 @@
-#sqlparse.py
-#
-#This program parses an SQLite3 database for deleted entires and
-#places the output into either and TSV file, or text file
-#
-#The SQLite file format, offsets etc is described at
-#sqlite.org/fileformat.html
-#
-#
-# Copyright (C) 2015 Mari DeGrazia (arizona4n6@gmail.com)
-#
-# This program is free software: you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation, either version 3 of the License, or
-# any later version.
-#
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-# GNU General Public License for more details.
-#
-# You can view the GNU General Public License at <http://www.gnu.org/licenses/>
-#
-# Version History:
-# v1.1 2013-11-05
-#
-# v1.2 2015-06-20
-#support added in to print out non b-tree pages
-#
-# v.1.3 2015-06-21
-#minor changes / comments etc.
-# 
-#		
-#Find a bug???? Please let me know and I'll try to fix it (if you ask nicely....)
-#
-
-import struct
-from optparse import OptionParser
-import sys
-
-#function to remove the non-printable characters, tabs and white spaces
-def remove_ascii_non_printable(chunk):
-    chunk = ' '.join(chunk .split())
-    return ''.join([ch for ch in chunk if ord(ch) > 31 and ord(ch) < 126 or ord(ch) ==9])
-
-
-usage = "Parse deleted records from an SQLite file into a TSV File or text file \n\
-Examples:\n\
--f /home/sanforensics/smsmms.db -o report.tsv\n\
--f /home/sanforensics/smssms.db -r -o report.txt \n"
-
-parser = OptionParser(usage=usage)
-
-parser.add_option("-f", "--file", dest = "infile", help = "sqlite database file", metavar = "smsmms.db")
-parser.add_option("-o", "--output", dest = "outfile", help = "Output to a tsv file. Strips white space, tabs and non-printable characters from data field", metavar = "output.tsv")
-parser.add_option("-r", "--raw", action ="store_true", dest = "raw", help = "Optional. Will out put data field in a raw format and text file.", metavar = "output.tsv")
-parser.add_option("-p", "--printpages", action ="store_true", dest = "printpages", help = "Optional. Will print any printable non-whitespace chars from all non-leaf b-tree pages (in case page has been re-purposed). WARNING: May output a lot of string data.")
-
-(options,args)=parser.parse_args()
-
-#no arugments given by user,exit
-if len(sys.argv) == 1:
-    parser.print_help()
-    sys.exit(0)
-
-#if input of output file missing, exit
-if (options.infile == None) or (options.outfile == None):
-    parser.print_help()
-    print "Filename or Output file not given"
-    sys.exit(0)
-
-#open file, confirm it is an SQLite DB
-try:
-    f=open(options.infile,"rb")
-except:
-    print ("File not Found")
-    sys.exit(0)
-    
-try:
-    output = open(options.outfile, 'w')
-except:
-    print "Error opening output file"
-    sys.exit(0)
-
-
-#write the column header if not outputting to text file
-if options.raw !=True:
-    output.write("Type\tOffset\tLength\tData\n")
-    
-#get the file size, we'll need this later
-#filesize = len(f.read())
-# Cheeky suggestion ... so it doesnt read the whole file unecessarily
-import os
-stats = os.stat(options.infile)
-filesize = stats.st_size
-
-#be kind, rewind (to the beginning of the file, that is)
-f.seek(0)
-
-#verify the file is an sqlite db; read the first 16 bytes for the header
-header = f.read(16)
-
-if "SQLite" not in header:
-    print ("File does not appear to be an SQLite File")
-    sys.exit(0)
-
-
-#OK, lets get started. The SQLite database is made up of multiple Pages. We need to get the size of each page.
-#The pagesize this is stored at offset 16 at is 2 bytes long
-
-pagesize = struct.unpack('>H', f.read(2))[0]
-
-#According to SQLite.org/fileformat.html,  all the data is contained in the table-b-trees leaves.
-#Let's go to each Page, read the B-Tree Header, and see if it is a table b-tree, which is designated by the flag 13
-
-#set the offset to 0, so we can also process any strings in the first page
-offset = 0
-
-#while the offset is less then the filesize, keep processing the pages
-
-while offset < filesize: 
-    
-    #move to the beginning of the page and read the b-tree flag, if it's 13, its a leaf table b tree and we want to process it
-    f.seek(offset)
-    flag = struct.unpack('>b',f.read(1))[0]
-    
-    if flag == 13:
-        
-        #this is a table_b_tree - get the header information which is contained in the first 8 bytes
-        
-        freeblock_offset = struct.unpack('>h',f.read(2))[0] 
-        num_cells = struct.unpack('>h',f.read(2))[0]
-        cell_offset = struct.unpack('>h',f.read(2))[0]
-        num_free_bytes = struct.unpack('>b',f.read(1))[0]
-        
-        
-        #unallocated is the space after the header information and before the first cell starts 
-        
-        #start after the header (8 bytes) and after the cell pointer array. The cell pointer array will be the number of cells x 2 bytes per cell
-        start = 8 + (num_cells * 2)
-        
-        # the length of the unallocated space will be the difference between the start and the cell offset
-        length = cell_offset-start
-        
-        #move to start of unallocated, then read the data (if any) in unallocated - remember, we already read in the first 8 bytes, so now we just need to move past the cell pointer array
-        f.read(num_cells*2)
-        unallocated = f.read(length)
-        
-        if options.raw == True:
-            output.write("Unallocated, Offset " + str(offset+start) + " Length " +  str(length) + "\n")
-            output.write("Data:\n")
-            output.write((unallocated))
-            output.write("\n\n")
-        
-        else:
-        #lets clean this up so its mainly the strings - remove white spaces and tabs too
-            
-            unallocated  = remove_ascii_non_printable(unallocated )
-            if unallocated != "":
-                output.write("Unallocated" + "\t" +  str(offset+start) + "\t" + str(length) + "\t" + str(unallocated) + "\n" )   
-                
-        #if there are freeblocks, lets pull the data
-        
-        while freeblock_offset != 0:
-            
-            #move to the freeblock offset
-            f.seek(offset+freeblock_offset)
-            
-            #get next freeblock chain
-            next_fb_offset = struct.unpack('>h',f.read(2))[0]
-        
-            #get the size of this freeblock
-            free_block_size = struct.unpack('>hh',f.read(4))[0]
-            
-            #move to the offset so we can read the free block data
-            f.seek(offset+freeblock_offset)
-            
-            #read in this freeblock
-            free_block = f.read(free_block_size)
-            
-            if options.raw == True:
-                output.write("Free Block, Offset " + str(offset+freeblock_offset) + ", Length " + str(free_block_size) + "\n")
-                output.write("Data:\n")
-                output.write((free_block))
-                output.write( "\n\n")
-            
-            else:
-                #lets clean this up so its mainly the strings - remove white spaces and tabs too
-                free_block  = remove_ascii_non_printable(free_block)
-                if unallocated != "":
-                    output.write("Free Block" + "\t" +  str(offset+freeblock_offset) + "\t" + str(free_block_size) + "\t" + str(free_block) + "\n" )
-            
-            freeblock_offset = next_fb_offset
-        
-    # Cheeky's Change: Extract strings from non-Leaf-Table B-tree pages to handle re-purposed/re-used pages 
-    # According to docs, valid flag values are 2, 5, 10, 13 BUT pages containing string data have also been observed with flag = 0
-    # So just print strings from all non flag = 13 pages. 
-    elif (options.printpages):
-        # read block into one big string, filter unprintables, then print
-        pagestring = f.read(pagesize-1) # we've already read the flag byte
-        printable_pagestring = remove_ascii_non_printable(pagestring)
-        
-        if options.raw == True:
-            output.write("Non-Leaf-Table-Btree-Type_"+ str(flag) + ", Offset " + str(offset) + ", Length " + str(pagesize) + "\n")
-            output.write("Data: (ONLY PRINTABLE STRINGS ARE SHOWN HERE. FOR RAW DATA, CHECK FILE IN HEX VIEWER AT ABOVE LISTED OFFSET):\n\n")
-            output.write(printable_pagestring)
-            output.write( "\n\n")
-        else:
-            output.write("Non-Leaf-Table-Btree-Type_" + str(flag) + "\t" +  str(offset) + "\t" + str(pagesize) + "\t" + printable_pagestring + "\n" )
-        
-    #increase the offset by one pagesize and loop
-    offset = offset + pagesize
-
-output.close()
-    
-#end
-
+#sqlparse.py
+#
+#This program parses an SQLite3 database for deleted entires and
+#places the output into either and TSV file, or text file
+#
+#The SQLite file format, offsets etc is described at
+#sqlite.org/fileformat.html
+#
+#
+# Copyright (C) 2015 Mari DeGrazia (arizona4n6@gmail.com)
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You can view the GNU General Public License at <http://www.gnu.org/licenses/>
+#
+# Version History:
+# v1.1 2013-11-05
+#
+# v1.2 2015-06-20
+#support added in to print out non b-tree pages
+#
+# v.1.3 2015-06-21
+#minor changes / comments etc.
+# 
+#		
+#Find a bug???? Please let me know and I'll try to fix it (if you ask nicely....)
+#
+
+import struct
+from optparse import OptionParser
+import sys
+import re
+
+#function to remove the non-printable characters, tabs and white spaces
+def remove_ascii_non_printable(chunk):
+    chunk = ''.join(map(chr, chunk))
+    return ''.join([ch for ch in chunk if ord(ch) > 31 and ord(ch) < 126 or ord(ch) ==9])
+
+
+usage = "Parse deleted records from an SQLite file into a TSV File or text file \n\
+Examples:\n\
+-f /home/sanforensics/smsmms.db -o report.tsv\n\
+-f /home/sanforensics/smssms.db -r -o report.txt \n"
+
+parser = OptionParser(usage=usage)
+
+parser.add_option("-f", "--file", dest = "infile", help = "sqlite database file", metavar = "smsmms.db")
+parser.add_option("-o", "--output", dest = "outfile", help = "Output to a tsv file. Strips white space, tabs and non-printable characters from data field", metavar = "output.tsv")
+parser.add_option("-r", "--raw", action ="store_true", dest = "raw", help = "Optional. Will out put data field in a raw format and text file.", metavar = "output.tsv")
+parser.add_option("-p", "--printpages", action ="store_true", dest = "printpages", help = "Optional. Will print any printable non-whitespace chars from all non-leaf b-tree pages (in case page has been re-purposed). WARNING: May output a lot of string data.")
+
+(options,args)=parser.parse_args()
+
+#no arugments given by user,exit
+if len(sys.argv) == 1:
+    parser.print_help()
+    sys.exit(0)
+
+#if input of output file missing, exit
+if (options.infile == None) or (options.outfile == None):
+    parser.print_help()
+    print ("Filename or Output file not given")
+    sys.exit(0)
+
+#open file, confirm it is an SQLite DB
+try:
+    f=open(options.infile,"rb")
+except:
+    print ("File not Found")
+    sys.exit(0)
+    
+try:
+    output = open(options.outfile, 'w')
+except:
+    print ("Error opening output file")
+    sys.exit(0)
+
+
+#write the column header if not outputting to text file
+if options.raw !=True:
+    output.write("Type\tOffset\tLength\tData\n")
+    
+#get the file size, we'll need this later
+#filesize = len(f.read())
+# Cheeky suggestion ... so it doesnt read the whole file unecessarily
+import os
+stats = os.stat(options.infile)
+filesize = stats.st_size
+
+#be kind, rewind (to the beginning of the file, that is)
+f.seek(0)
+
+#verify the file is an sqlite db; read the first 16 bytes for the header
+header = f.read(16)
+
+if b"SQLite" not in header:
+    print ("File does not appear to be an SQLite File")
+    sys.exit(0)
+
+
+#OK, lets get started. The SQLite database is made up of multiple Pages. We need to get the size of each page.
+#The pagesize this is stored at offset 16 at is 2 bytes long
+
+pagesize = struct.unpack('>H', f.read(2))[0]
+
+#According to SQLite.org/fileformat.html,  all the data is contained in the table-b-trees leaves.
+#Let's go to each Page, read the B-Tree Header, and see if it is a table b-tree, which is designated by the flag 13
+
+#set the offset to 0, so we can also process any strings in the first page
+offset = 0
+
+#while the offset is less then the filesize, keep processing the pages
+
+while offset < filesize: 
+    
+    #move to the beginning of the page and read the b-tree flag, if it's 13, its a leaf table b tree and we want to process it
+    f.seek(offset)
+    flag = struct.unpack('>b',f.read(1))[0]
+    
+    if flag == 13:
+        
+        #this is a table_b_tree - get the header information which is contained in the first 8 bytes
+        
+        freeblock_offset = struct.unpack('>h',f.read(2))[0] 
+        num_cells = struct.unpack('>h',f.read(2))[0]
+        cell_offset = struct.unpack('>h',f.read(2))[0]
+        num_free_bytes = struct.unpack('>b',f.read(1))[0]
+        
+        
+        #unallocated is the space after the header information and before the first cell starts 
+        
+        #start after the header (8 bytes) and after the cell pointer array. The cell pointer array will be the number of cells x 2 bytes per cell
+        start = 8 + (num_cells * 2)
+        
+        # the length of the unallocated space will be the difference between the start and the cell offset
+        length = cell_offset-start
+        
+        #move to start of unallocated, then read the data (if any) in unallocated - remember, we already read in the first 8 bytes, so now we just need to move past the cell pointer array
+        f.read(num_cells*2)
+        unallocated = f.read(length)
+        
+        if options.raw == True:
+            output.write("Unallocated, Offset " + str(offset+start) + " Length " +  str(length) + "\n")
+            output.write("Data:\n")
+            output.write((unallocated))
+            output.write("\n\n")
+        
+        else:
+        #lets clean this up so its mainly the strings - remove white spaces and tabs too
+            
+            unallocated  = remove_ascii_non_printable(unallocated )
+            if unallocated != "":
+                output.write("Unallocated" + "\t" +  str(offset+start) + "\t" + str(length) + "\t" + re.sub('\s+',' ', str(unallocated)) + "\n" )   
+                
+        #if there are freeblocks, lets pull the data
+        
+        while freeblock_offset != 0:
+            
+            #move to the freeblock offset
+            f.seek(offset+freeblock_offset)
+            
+            #get next freeblock chain
+            next_fb_offset = struct.unpack('>h',f.read(2))[0]
+        
+            #get the size of this freeblock
+            free_block_size = struct.unpack('>hh',f.read(4))[0]
+            
+            #move to the offset so we can read the free block data
+            f.seek(offset+freeblock_offset)
+            
+            #read in this freeblock
+            free_block = f.read(free_block_size)
+            
+            if options.raw == True:
+                output.write("Free Block, Offset " + str(offset+freeblock_offset) + ", Length " + str(free_block_size) + "\n")
+                output.write("Data:\n")
+                output.write((free_block))
+                output.write( "\n\n")
+            
+            else:
+                #lets clean this up so its mainly the strings - remove white spaces and tabs too
+                free_block  = remove_ascii_non_printable(free_block)
+                if unallocated != "":
+                    output.write("Free Block" + "\t" +  str(offset+freeblock_offset) + "\t" + str(free_block_size) + "\t" + re.sub('\s+',' ', str(free_block)) + "\n" )
+            
+            freeblock_offset = next_fb_offset
+        
+    # Cheeky's Change: Extract strings from non-Leaf-Table B-tree pages to handle re-purposed/re-used pages 
+    # According to docs, valid flag values are 2, 5, 10, 13 BUT pages containing string data have also been observed with flag = 0
+    # So just print strings from all non flag = 13 pages. 
+    elif (options.printpages):
+        # read block into one big string, filter unprintables, then print
+        pagestring = f.read(pagesize-1) # we've already read the flag byte
+        printable_pagestring = remove_ascii_non_printable(pagestring)
+        
+        if options.raw == True:
+            output.write("Non-Leaf-Table-Btree-Type_"+ str(flag) + ", Offset " + str(offset) + ", Length " + str(pagesize) + "\n")
+            output.write("Data: (ONLY PRINTABLE STRINGS ARE SHOWN HERE. FOR RAW DATA, CHECK FILE IN HEX VIEWER AT ABOVE LISTED OFFSET):\n\n")
+            output.write(printable_pagestring)
+            output.write( "\n\n")
+        else:
+            output.write("Non-Leaf-Table-Btree-Type_" + str(flag) + "\t" +  str(offset) + "\t" + str(pagesize) + "\t" + re.sub('\s+',' ', printable_pagestring) + "\n" )
+        
+    #increase the offset by one pagesize and loop
+    offset = offset + pagesize
+
+output.close()
+    
+#end
+