]> git.openstreetmap.org Git - chef.git/blobdiff - cookbooks/hardware/templates/default/ohai.rb.erb
Handle failed megaraid disks with no ID
[chef.git] / cookbooks / hardware / templates / default / ohai.rb.erb
index 5f1cee9d79d7fdd811015407528bfe16c36a5644..6e6e5f37e200d1f86c50ba95882fcdf126553970 100644 (file)
@@ -253,19 +253,10 @@ Ohai.plugin(:Hardware) do
   end
 
   def find_md_arrays(devices)
-    controller = {
-      :id => devices[:controllers].count,
-      :type => "md",
-      :arrays => [],
-      :disks => []
-    }
-
-    devices[:controllers] << controller
-
     array = nil
 
     File.new("/proc/mdstat", "r").each do |line|
-      if line =~ /^(md\d+) : active raid(\d+)((?: (?:sd[a-z]|nvme\d+n\d+)\d*\[\d+\](?:\([A-Z]\))*)+)$/
+      if line =~ /^(md\d+) : active raid(\d+)((?: (?:sd[a-z]\d*|nvme\d+n\d+(?:p\d+)?)\[\d+\](?:\([A-Z]\))*)+)$/
         array = {
           :id => devices[:arrays].count,
           :device => "/dev/#{Regexp.last_match(1)}",
@@ -294,7 +285,6 @@ Ohai.plugin(:Hardware) do
         end
 
         devices[:arrays] << array
-        controller[:arrays] << array[:id]
       elsif array && line =~ /^\s+(\d+) blocks.*(?:\[([U_]+)\])?/
         array[:size] = format_disk_size(Regexp.last_match(1).to_i)
         array[:status] = "degraded" if Regexp.last_match(2) =~ /_/
@@ -317,6 +307,8 @@ Ohai.plugin(:Hardware) do
     disk = nil
 
     IO.popen(%w(ssacli controller all show config detail)).each do |line|
+      next unless line.valid_encoding?
+
       if line =~ /^Smart (?:Array|HBA) (\S+) /
         controller = {
           :id => devices[:controllers].count,
@@ -377,6 +369,7 @@ Ohai.plugin(:Hardware) do
       elsif array && line =~ /^         Status:\s+(.*\S)\s*$/
         case Regexp.last_match(1)
         when "OK" then array[:status] = "optimal"
+        when "Interim Recovery Mode" then array[:status] = "degraded"
         else array[:status] = "unknown"
         end
       elsif array && line =~ /^         (\S[^:]+):\s+(.*\S)\s*$/
@@ -414,10 +407,14 @@ Ohai.plugin(:Hardware) do
 
       if disk[:status] == "Failed"
         disk[:status] = "failed"
+      elsif disk[:status] == "Predictive Failure"
+        disk[:status] = "failed"
       elsif disk[:status] == "OK" && disk[:drive_type] == "Data Drive"
         disk[:status] = "online"
       elsif disk[:status] == "OK" && disk[:drive_type] == "Spare Drive"
         disk[:status] = "hotspare"
+      elsif disk[:drive_type] == "Unassigned Drive"
+        disk[:status] = "unconfigured"
       else
         disk[:status] = "unknown"
       end
@@ -429,6 +426,7 @@ Ohai.plugin(:Hardware) do
   def find_megaraid_disks(devices)
     controllers = []
     arrays = []
+    disks = []
 
     controller = nil
     array = nil
@@ -446,11 +444,11 @@ Ohai.plugin(:Hardware) do
         devices[:controllers] << controller
 
         controllers << controller
-      elsif line =~ /^Bus Number\s+:\s+(\d+)$/
+      elsif line =~ /^Bus Number\s+:\s+([0-9a-f]+)$/i
         controller[:pci_slot] = format "0000:%02x", Integer("0x#{Regexp.last_match(1)}")
-      elsif line =~ /^Device Number\s+:\s+(\d+)$/
+      elsif line =~ /^Device Number\s+:\s+([0-9a-f]+)$/i
         controller[:pci_slot] = format "%s:%02x", controller[:pci_slot], Integer("0x#{Regexp.last_match(1)}")
-      elsif line =~ /^Function Number\s+:\s+(\d+)$/
+      elsif line =~ /^Function Number\s+:\s+([0-9a-f]+)$/i
         controller[:pci_slot] = format "%s.%01x", controller[:pci_slot], Integer("0x#{Regexp.last_match(1)}")
       end
     end
@@ -499,8 +497,11 @@ Ohai.plugin(:Hardware) do
         devices[:disks] << disk
         controller[:disks] << disk[:id]
         array[:disks] << disk[:id]
-      elsif disk && line =~ /^Firmware state:\s+(.*\S),\s*(.*\S)\s*$/
-        case Regexp.last_match(1)
+
+        disks << disk
+      elsif disk && line =~ /^Firmware state:\s+(\S.*)$/
+        status, state = Regexp.last_match(1).split(/,\s*/)
+        case status
         when "Unconfigured(good)" then disk[:status] = "unconfigured"
         when "Unconfigured(bad)" then disk[:status] = "unconfigured"
         when "Hotspare" then disk[:status] = "hotspare"
@@ -511,14 +512,14 @@ Ohai.plugin(:Hardware) do
         when "Copyback" then disk[:status] = "rebuilding"
         else disk[:status] = "unknown"
         end
-        case Regexp.last_match(2)
+        case state
         when "Spun Up" then disk[:state] = "spun_up"
         when "Spun down" then disk[:state] = "spun_down"
         else disk[:state] = "unknown"
         end
       elsif disk && line =~ /^(\S.*\S)\s*:\s+(\S.*)$/
         case Regexp.last_match(1)
-        when "Device Id" then disk[:smart_device] = "megaraid,#{Regexp.last_match(2)}"
+        when "Device Id" then disk[:device_id] = Regexp.last_match(2)
         when "WWN" then disk[:wwn] = Regexp.last_match(2)
         when "PD Type" then disk[:interface] = Regexp.last_match(2)
         when "Raw Size" then disk[:size] = memory_to_disk_size(Regexp.last_match(2).sub(/\s*\[.*\]$/, ""))
@@ -548,7 +549,8 @@ Ohai.plugin(:Hardware) do
         controller = controllers[Regexp.last_match(1).to_i]
       elsif controller && line =~ /^Enclosure Device ID: \d+$/
         disk = {
-          :controller => controller[:id]
+          :controller => controller[:id],
+          :arrays => []
         }
       elsif disk && line =~ /^WWN:\s+(\S+)$/
         unless devices[:disks].find { |d| d[:wwn] == Regexp.last_match(1) }
@@ -556,9 +558,12 @@ Ohai.plugin(:Hardware) do
           disk[:wwn] = Regexp.last_match(1)
 
           devices[:disks] << disk
+
+          disks << disk
         end
-      elsif disk && line =~ /^Firmware state:\s+(.*\S),\s*(.*\S)\s*$/
-        case Regexp.last_match(1)
+      elsif disk && line =~ /^Firmware state:\s+(\S.*)$/
+        status, state = Regexp.last_match(1).split(/,\s*/)
+        case status
         when "Unconfigured(good)" then disk[:status] = "unconfigured"
         when "Unconfigured(bad)" then disk[:status] = "unconfigured"
         when "Hotspare" then disk[:status] = "hotspare"
@@ -569,14 +574,14 @@ Ohai.plugin(:Hardware) do
         when "Copyback" then disk[:status] = "rebuilding"
         else disk[:status] = "unknown"
         end
-        case Regexp.last_match(2)
+        case state
         when "Spun Up" then disk[:state] = "spun_up"
         when "Spun down" then disk[:state] = "spun_down"
         else disk[:state] = "unknown"
         end
       elsif disk && line =~ /^(\S.*\S)\s*:\s+(\S.*)$/
         case Regexp.last_match(1)
-        when "Device Id" then disk[:smart_device] = "megaraid,#{Regexp.last_match(2)}"
+        when "Device Id" then disk[:device_id] = Regexp.last_match(2)
         when "PD Type" then disk[:interface] = Regexp.last_match(2)
         when "Raw Size" then disk[:size] = memory_to_disk_size(Regexp.last_match(2).sub(/\s*\[.*\]$/, ""))
         when "Inquiry Data" then disk[:vendor], disk[:model], disk[:serial_number] = Regexp.last_match(2).split
@@ -589,6 +594,18 @@ Ohai.plugin(:Hardware) do
         controller[:device] = "/dev/#{File.basename(device)}"
       end
     end
+
+    disks.each do |disk|
+      controller = devices[:controllers][disk[:controller]]
+
+      if id = disk.delete(:device_id)
+        if device = Dir.glob("/sys/bus/pci/devices/#{controller[:pci_slot]}/host*/target0:0:#{id}/0:0:#{id}:0/block/sd*").first
+          disk[:device] = "/dev/#{File.basename(device)}"
+        else
+          disk[:smart_device] = "megaraid,#{id}"
+        end
+      end
+    end
   end
 
   def find_mpt1_disks(devices)