sqlite.rb 1.4 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758
  1. module Docs
  2. class Sqlite < FileScraper
  3. self.name = 'SQLite'
  4. self.type = 'sqlite'
  5. self.release = '3.40.0'
  6. self.base_url = 'https://sqlite.org/'
  7. self.root_path = 'docs.html'
  8. self.initial_paths = %w(keyword_index.html)
  9. self.links = {
  10. home: 'https://sqlite.org/',
  11. code: 'https://www.sqlite.org/src/'
  12. }
  13. html_filters.insert_before 'clean_html', 'sqlite/clean_js_tables'
  14. html_filters.push 'sqlite/entries', 'sqlite/clean_html'
  15. options[:clean_text] = false # keep SVG elements
  16. options[:only_patterns] = [/\.html\z/]
  17. options[:skip_patterns] = [/releaselog/, /consortium/]
  18. options[:skip] = %w(
  19. index.html
  20. about.html
  21. download.html
  22. copyright.html
  23. support.html
  24. prosupport.html
  25. hp1.html
  26. news.html
  27. oldnews.html
  28. doclist.html
  29. dev.html
  30. chronology.html
  31. not-found.html
  32. famous.html
  33. books.html
  34. crew.html
  35. mostdeployed.html
  36. requirements.html
  37. session/intro.html
  38. syntax.html
  39. src/doc/trunk/doc/lemon.html
  40. )
  41. options[:attribution] = 'SQLite is in the Public Domain.'
  42. def get_latest_version(opts)
  43. doc = fetch_doc('https://sqlite.org/chronology.html', opts)
  44. doc.at_css('#chrontab > tbody > tr > td:last-child > a').content
  45. end
  46. private
  47. def parse(response)
  48. response.body.gsub! %r{(<h2[^>]*>[^<]+)</h1>}, '\1</h2>'
  49. super
  50. end
  51. end
  52. end