1
0

parser.rb 579 B

12345678910111213141516171819202122232425262728
  1. module Docs
  2. class Parser
  3. attr_reader :title, :html
  4. def initialize(content)
  5. @content = content
  6. @html = document? ? parse_as_document : parse_as_fragment
  7. end
  8. private
  9. DOCUMENT_RGX = /\A(?:\s|(?:<!--.*?-->))*<(?:\!doctype|html)/i
  10. def document?
  11. @content =~ DOCUMENT_RGX
  12. end
  13. def parse_as_document
  14. document = Nokogiri::HTML.parse @content, nil, 'UTF-8'
  15. @title = document.at_css('title').try(:content)
  16. document
  17. end
  18. def parse_as_fragment
  19. Nokogiri::HTML.fragment @content, 'UTF-8'
  20. end
  21. end
  22. end