# File lib/puppet/util/puppetdb/char_encoding.rb, line 35 def self.utf8_string(str) if RUBY_VERSION =~ /1.8/ # Ruby 1.8 doesn't have String#encode and related methods, and there # appears to be a bug in iconv that will interpret some byte sequences # as 6-byte characters. Thus, we are forced to resort to some unfortunate # manual chicanery. warn_if_changed(str, ruby18_clean_utf8(str)) elsif str.encoding == Encoding::UTF_8 # If we get here, we're in ruby 1.9+, so we have the string encoding methods # available. However, just because a ruby String object is already # marked as UTF-8, that doesn't guarantee that its contents are actually # valid; and if you call ruby's ".encode" method with an encoding of # "utf-8" for a String that ruby already believes is UTF-8, ruby # seems to optimize that to be a no-op. So, we have to do some more # complex handling... # If the string already has valid encoding then we're fine. return str if str.valid_encoding? # If not, we basically have to walk over the characters and replace # them by hand. warn_if_changed(str, str.each_char.map { |c| c.valid_encoding? ? c : "\ufffd"}.join) else # if we get here, we're ruby 1.9 and the current string is *not* encoded # as UTF-8. Thus we can actually rely on ruby's "encode" method. begin str.encode('UTF-8') rescue Encoding::InvalidByteSequenceError, Encoding::UndefinedConversionError => e # If we got an exception, the string is either invalid or not # convertible to UTF-8, so drop those bytes. warn_if_changed(str, str.encode('UTF-8', :invalid => :replace, :undef => :replace)) end end end