Wednesday, March 20, 2013

Finding URLs in Text Files with PowerShell

Allot of our connection string information is stored in our web.config files.  For a project I am working on I was asked to create a list of all of these.  This will help us determine what third parties we interface with so a new firewall can be setup accurately.  Below is the PowerShell script I came up with the help of others posts on the web.  I haven't gotten the syntax highlighter working yet on my blog but hopefully soon (I will come back and edit this post).

########################################################### 
# AUTHOR  : Mark Rainey  
# DATE    : 2013-03-13  
# COMMENT : Reads in a list of servers and searches
# the E:\Live folder for config files.  When it finds
# them it searches for anything starting with 3 or more
# letters (tcp or http) followed by a colon and \\
###########################################################

#ERROR REPORTING ALL
Set-StrictMode -Version latest

$scriptpath = Split-Path -parent $MyInvocation.MyCommand.Definition
# A friend at work helped me get this to output to Excel
$outputFile = $scriptpath + "\output.xls"
# This is a file with a FQDN of each server on a new line
$serverList = $scriptpath + "\serverlist.txt"
# Load server list
$servers = Get-Content $serverList
# Hash of all results from all files
$Results=@{}

# Find the string and save it to a file
Function getStringMatch
{
 Try { 
  # Loop through all servers
  Foreach ($server In $servers) {
      # Set UNC Path to files for this server
   #The path can be anywhere on your servers you want to search
   $path     = "\\$server\c$\temp\test\"
   #Get list of files
   $files    = Get-Childitem $path *.config -Recurse | Where-Object { !($_.psiscontainer) }
   # Loop through the server and search all config files under E:\Live
      $matches = New-Object System.Collections.ArrayList
   $matches.clear()
   Foreach ($file In $files)
      {
    $fullFileName = $file.FullName 
    # regular expression for a URL format
    $regex = '([a-zA-Z]{3,})://([\w-]+\.)+[\w-]+(/[\w- ./?%&=]*)*?'
                # Find all matches in current file and add the Value for each one to an array
    select-string -Path $fullFileName -Pattern $regex -AllMatches | % { $_.Matches } | % {
     $matches.add($_.Value)
    }
      } 
   # Remove duplicates
            $matches = $matches | select -Unique 
   # Add the array for this server to the Results hash
   $Results.Add($server,$matches)
  }
  
  #export to Excel workbook
  $excel = New-Object -comobject Excel.Application
  $excel.Visible = $True
  $workbook = $excel.Workbooks.Add()
  $ws = $workbook.Worksheets.Item(1)
  $column = 1

  foreach ($Result in $Results.GetEnumerator()){
   $row = 1
   $ws.Cells.Item($row,$column) = $Result.Key; 
   $row++
   if ($Result.Value -ne $null){
    if ($Result.Value.Count -gt 1){
     for ($i=0; $i -le $Result.Value.Count; $i++ ) {
      $ws.Cells.Item($row,$column) = $Result.Value[$i]
      $row++
     }
    }
    else {
     $ws.Cells.Item($row,$column) = $Result.Value
    }
   }
   $column++
  }
  $workbook.SaveAs($outputFile)
  
  Release-Ref $ws
  Release-Ref $workbook
  $excel.Quit()
  Release-Ref $excel
 }
    Catch [System.Management.Automation.ActionPreferenceStopException]{
  Write-host "$output is locked see error:`n $_"
 } 
 Catch {
  Write-host "Something failed $_"
 }
 Finally {
  "Finished"
 }
}

# Release Reference used in Office coms
Function Release-Ref ($ref) {
 [System.Runtime.InteropServices.Marshal]::ReleaseComObject($ref)
 [System.GC]::Collect() 
 [System.GC]::WaitForPendingFinalizers()
}
 
getStringMatch

No comments:

Post a Comment