IIS Logs Scripts

While working with some IIS logs, I decided to start practicing my Python. I put together some handy Python functions to work with IIS Log files. These will come in handy. On a 3GB, 2.5GHz, running WinXP machine, these functions take about 3 seconds to process a 180MB Text file. Python code could be optimized to be faster if you’re dealing with larger sized files.

#!/usr/bin/env python
 
# An IIS log file can have various log properties. Everytime you add new columns to log for
# in IIS, it creates a new row full of columns.
import re
import os
 
MainLogDelimiter = "#Software: Microsoft Internet Information Services 6.0"
TestFile         = "C:\\Dan\\IIS-Log-Import\\Logs\\not-the-same.txt"
BigTestFile      = "C:\\Dan\\IIS-Log-Import\\Logs\\ex090914\\ex090914.log"
LogsDir          = "C:\\Dan\\IIS-Log-Import\\Logs"
 
def SearchForFile( rootpath, searchfor, includepath = 0 ):
 
  # Search for a file recursively from a root directory.
  #  rootpath  = root directory to start searching from.
  #  searchfor = regexp to search for, e.g.:
  #                 search for *.jpg : \.exe$                     
  #  includepath = appends the full path to the file
  #                this attribute is optional
  # Returns a list of filenames that can be used to loop
  # through.
  #
  # TODO: Use the glob module instead. Could be faster.  
  names = []
  append = ""
  for root, dirs, files in os.walk( rootpath ): 
    for name in files:
      if re.search( searchfor, name ):
        if includepath == 0:
          root = ""          
        else:          
          append = "\\"
        names.append( root + append + name )        
  return names  
 
 
def isSameLogProperties( FILE ):
  # Tests to see if a log file has the same number of columns throughout
  # This is in case new column properties were added/subtracted in the course
  # of the log file.
  FILE.seek( 0, 0 )
  SubLogs = FILE.read().split( MainLogDelimiter )
 
  # SubLogs[0] Stores the number of different log variations in the log file  
  SubLogs[0] = len( SubLogs ) - 1    
 
  # Grab the column names from the log file, separated by space
  columns = re.search( "^#Fields:\s([\w\-()\s]+)$", SubLogs[1], re.IGNORECASE | re.MULTILINE ).group(1)   
  LogSameProperties = True
 
  for i in range( 2, SubLogs[0] + 1 ):
    # If there are columns
    if ( len( columns ) > 0 ):    
      if ( columns != re.search( "^#Fields:\s([\w\-()\s]+)$", SubLogs[i], re.IGNORECASE | re.MULTILINE ).group(1) ):        
        LogSameProperties = False
        break  
 
  return LogSameProperties
 
 
def getFirstColumn( FILE ):
  # This gets the columns from a log file. It returns only the first columns, and ignores another column
  # row that may exist in case new columns were added/subtracted in IIS. 
  # input: FILE
  # output: 1 single element List
  FILE.seek( 0, 0 )
  names = []
  # Grab the column names from the log file, separated by space
  names.append( re.search( "^#Fields:\s([\w\-()\s]+)$", FILE.read().split( MainLogDelimiter )[1], re.IGNORECASE | re.MULTILINE ).group(1).strip() )
  return names
 
 
def getAllColumns( FILE ):
  # This gets all the columns from a log file. 
  # input: FILE
  # output: List
  FILE.seek( 0, 0 )  
  names = []
  SubLogs = FILE.read().split( MainLogDelimiter )    
  # SubLogs[0] Stores the number of different log variations in the log file  
  SubLogs[0] = len( SubLogs ) - 1        
  for i in range( 1, SubLogs[0] + 1 ):        
    names.append( re.search( "^#Fields:\s([\w\-()\s]+)$", SubLogs[i], re.IGNORECASE | re.MULTILINE ).group(1).strip() )  
  return names  
 
 
# EXAMPLE:
# Loop through all the IIS log files in the directory
# for file in SearchForFile( LogsDir, "\.txt$", 1 ):  
LogFile = open( file, "r" )
if ( isSameLogProperties( LogFile ) ):
  print file, "the same"
else:
  print file, "not the same"
LogFile.close()

Leave a Reply