sqlite.rb 1.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657
  1. module Docs
  2. class Sqlite < FileScraper
  3. self.name = 'SQLite'
  4. self.type = 'sqlite'
  5. self.release = '3.33.0'
  6. self.base_url = 'https://sqlite.org/'
  7. self.root_path = 'docs.html'
  8. self.initial_paths = %w(keyword_index.html)
  9. self.links = {
  10. home: 'https://sqlite.org/',
  11. code: 'https://www.sqlite.org/src/'
  12. }
  13. html_filters.insert_before 'clean_html', 'sqlite/clean_js_tables'
  14. html_filters.push 'sqlite/entries', 'sqlite/clean_html'
  15. options[:only_patterns] = [/\.html\z/]
  16. options[:skip_patterns] = [/releaselog/, /consortium/]
  17. options[:skip] = %w(
  18. index.html
  19. about.html
  20. download.html
  21. copyright.html
  22. support.html
  23. prosupport.html
  24. hp1.html
  25. news.html
  26. oldnews.html
  27. doclist.html
  28. dev.html
  29. chronology.html
  30. not-found.html
  31. famous.html
  32. books.html
  33. crew.html
  34. mostdeployed.html
  35. requirements.html
  36. session/intro.html
  37. syntax.html
  38. src/doc/trunk/doc/lemon.html
  39. )
  40. options[:attribution] = 'SQLite is in the Public Domain.'
  41. def get_latest_version(opts)
  42. doc = fetch_doc('https://sqlite.org/chronology.html', opts)
  43. doc.at_css('#chrontab > tbody > tr > td:last-child > a').content
  44. end
  45. private
  46. def parse(response)
  47. response.body.gsub! %r{(<h2[^>]*>[^<]+)</h1>}, '\1</h2>'
  48. super
  49. end
  50. end
  51. end