parser.rb 480 B

1234567891011121314151617181920212223242526
  1. module Docs
  2. class Parser
  3. def initialize(content)
  4. @content = content
  5. end
  6. def html
  7. @html ||= document? ? parse_as_document : parse_as_fragment
  8. end
  9. private
  10. def document?
  11. @content =~ /\A\s*<(?:\!doctype|html)/i
  12. end
  13. def parse_as_document
  14. document = Nokogiri::HTML.parse @content, nil, 'UTF-8'
  15. document.at_css 'body'
  16. end
  17. def parse_as_fragment
  18. Nokogiri::HTML.fragment @content, 'UTF-8'
  19. end
  20. end
  21. end