doc.rb 8.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298
  1. module Docs
  2. class Doc
  3. INDEX_FILENAME = 'index.json'
  4. DB_FILENAME = 'db.json'
  5. META_FILENAME = 'meta.json'
  6. class << self
  7. include Instrumentable
  8. attr_accessor :name, :slug, :type, :release, :abstract, :links
  9. def inherited(subclass)
  10. subclass.type = type
  11. end
  12. def version(version = nil, &block)
  13. return @version unless block_given?
  14. klass = Class.new(self)
  15. klass.name = name
  16. klass.slug = slug
  17. klass.version = version
  18. klass.release = release
  19. klass.links = links
  20. klass.class_exec(&block)
  21. @versions ||= []
  22. @versions << klass
  23. klass
  24. end
  25. def version=(value)
  26. @version = value.to_s
  27. end
  28. def versions
  29. @versions.presence || [self]
  30. end
  31. def version?
  32. version.present?
  33. end
  34. def versioned?
  35. @versions.presence
  36. end
  37. def name
  38. @name || super.demodulize
  39. end
  40. def slug
  41. slug = @slug || default_slug || raise('slug is required')
  42. version? ? "#{slug}~#{version_slug}" : slug
  43. end
  44. def version_slug
  45. return if version.blank?
  46. slug = version.downcase
  47. slug.gsub! '+', 'p'
  48. slug.gsub! '#', 's'
  49. slug.gsub! %r{[^a-z0-9\_\.]}, '_'
  50. slug
  51. end
  52. def path
  53. slug
  54. end
  55. def index_path
  56. File.join path, INDEX_FILENAME
  57. end
  58. def db_path
  59. File.join path, DB_FILENAME
  60. end
  61. def meta_path
  62. File.join path, META_FILENAME
  63. end
  64. def as_json
  65. json = { name: name, slug: slug, type: type }
  66. json[:links] = links if links.present?
  67. json[:version] = version if version.present? || defined?(@version)
  68. json[:release] = release if release.present?
  69. json
  70. end
  71. def as_json_extra(store)
  72. json = self.as_json
  73. if options[:attribution].is_a?(String)
  74. json[:attribution] = options[:attribution].strip
  75. end
  76. json[:db_size] = store.size(self.db_path) if store.exist?(self.db_path)
  77. json[:mtime] = store.mtime(self.meta_path).to_i if store.exist?(self.meta_path)
  78. json
  79. end
  80. def store_page(store, id)
  81. index = EntryIndex.new
  82. pages = PageDb.new
  83. store.open(path) do
  84. if page = new.build_page(id) and store_page?(page)
  85. index.add page[:entries]
  86. pages.add page[:path], page[:output]
  87. store_index(store, INDEX_FILENAME, index, false)
  88. store_index(store, DB_FILENAME, pages, false)
  89. store.write page[:store_path], page[:output]
  90. true
  91. else
  92. false
  93. end
  94. end
  95. rescue Docs::SetupError => error
  96. puts "ERROR: #{error.message}"
  97. false
  98. end
  99. def store_pages(store)
  100. index = EntryIndex.new
  101. pages = PageDb.new
  102. store.replace(path) do
  103. new.build_pages do |page|
  104. next unless store_page?(page)
  105. store.write page[:store_path], page[:output]
  106. index.add page[:entries]
  107. pages.add page[:path], page[:output]
  108. end
  109. if index.present?
  110. store_index(store, INDEX_FILENAME, index)
  111. store_index(store, DB_FILENAME, pages)
  112. store_meta(store)
  113. true
  114. else
  115. false
  116. end
  117. end
  118. rescue Docs::SetupError => error
  119. puts "ERROR: #{error.message}"
  120. false
  121. end
  122. private
  123. def default_slug
  124. return if name =~ /[^A-Za-z0-9_]/
  125. name.downcase
  126. end
  127. def store_page?(page)
  128. page[:entries].present?
  129. end
  130. def store_index(store, filename, index, read_write=true)
  131. old_json = read_write && store.read(filename) || '{}'
  132. new_json = index.to_json
  133. instrument "#{filename.remove('.json')}.doc", before: old_json, after: new_json
  134. read_write && store.write(filename, new_json)
  135. end
  136. def store_meta(store)
  137. json = as_json
  138. json[:mtime] = Time.now.to_i
  139. json[:db_size] = store.size(DB_FILENAME)
  140. store.write(META_FILENAME, json.to_json)
  141. end
  142. end
  143. def initialize
  144. raise NotImplementedError, "#{self.class} is an abstract class and cannot be instantiated." if self.class.abstract
  145. end
  146. def build_page(id, &block)
  147. raise NotImplementedError
  148. end
  149. def build_pages(&block)
  150. raise NotImplementedError
  151. end
  152. def get_scraper_version(opts)
  153. if self.class.method_defined?(:options) and !options[:release].nil?
  154. options[:release]
  155. else
  156. # If options[:release] does not exist, we return the Epoch timestamp of when the doc was last modified in DevDocs production
  157. json = fetch_json('https://devdocs.io/docs.json', opts)
  158. items = json.select {|item| item['name'] == self.class.name}
  159. items = items.map {|item| item['mtime']}
  160. items.max
  161. end
  162. end
  163. # Should return the latest version of this documentation
  164. # If options[:release] is defined, it should be in the same format
  165. # If options[:release] is not defined, it should return the Epoch timestamp of when the documentation was last updated
  166. # If the docs will never change, simply return '1.0.0'
  167. def get_latest_version(opts)
  168. raise NotImplementedError
  169. end
  170. # Returns whether or not this scraper is outdated ("Outdated major version", "Outdated minor version" or 'Up-to-date').
  171. #
  172. # The default implementation assumes the documentation uses a semver(-like) approach when it comes to versions.
  173. # Patch updates are ignored because there are usually little to no documentation changes in bug-fix-only releases.
  174. #
  175. # Scrapers of documentations that do not use this versioning approach should override this method.
  176. #
  177. # Examples of the default implementation:
  178. # 1 -> 2 = outdated
  179. # 1.1 -> 1.2 = outdated
  180. # 1.1.1 -> 1.1.2 = not outdated
  181. def outdated_state(scraper_version, latest_version)
  182. scraper_parts = scraper_version.to_s.split(/[-.]/).map(&:to_i)
  183. latest_parts = latest_version.to_s.split(/[-.]/).map(&:to_i)
  184. # Only check the first two parts, the third part is for patch updates
  185. [0, 1].each do |i|
  186. break if i >= scraper_parts.length or i >= latest_parts.length
  187. return 'Outdated major version' if i == 0 and latest_parts[i] > scraper_parts[i]
  188. return 'Outdated major version' if i == 1 and latest_parts[i] > scraper_parts[i] and latest_parts[0] == 0 and scraper_parts[0] == 0
  189. return 'Outdated major version' if i == 1 and latest_parts[i] > scraper_parts[i] and latest_parts[0] == 1 and scraper_parts[0] == 1
  190. return 'Outdated minor version' if i == 1 and latest_parts[i] > scraper_parts[i]
  191. return 'Up-to-date' if latest_parts[i] < scraper_parts[i]
  192. end
  193. 'Up-to-date'
  194. end
  195. private
  196. #
  197. # Utility methods for get_latest_version
  198. #
  199. def fetch(url, opts)
  200. headers = {}
  201. if opts.key?(:github_token) and url.start_with?('https://api.github.com/')
  202. headers['Authorization'] = "token #{opts[:github_token]}"
  203. elsif ENV['GITHUB_TOKEN'] and url.start_with?('https://api.github.com/')
  204. headers['Authorization'] = "token #{ENV['GITHUB_TOKEN']}"
  205. end
  206. opts[:logger].debug("Fetching #{url}")
  207. response = Request.run(url, { connecttimeout: 15, headers: headers })
  208. if response.success?
  209. response.body
  210. else
  211. reason = response.timed_out? ? "Timed out while connecting to #{url}" : "Couldn't fetch #{url} (response code #{response.code})"
  212. opts[:logger].error(reason)
  213. raise reason
  214. end
  215. end
  216. def fetch_doc(url, opts)
  217. body = fetch(url, opts)
  218. Nokogiri::HTML.parse(body, nil, 'UTF-8')
  219. end
  220. def fetch_json(url, opts)
  221. JSON.parse fetch(url, opts)
  222. end
  223. def get_npm_version(package, opts, tag='latest')
  224. json = fetch_json("https://registry.npmjs.com/#{package}", opts)
  225. json['dist-tags'][tag]
  226. end
  227. def get_latest_github_release(owner, repo, opts)
  228. release = fetch_json("https://api.github.com/repos/#{owner}/#{repo}/releases/latest", opts)
  229. tag_name = release['tag_name']
  230. tag_name.start_with?('v') ? tag_name[1..-1] : tag_name
  231. end
  232. def get_github_tags(owner, repo, opts)
  233. fetch_json("https://api.github.com/repos/#{owner}/#{repo}/tags", opts)
  234. end
  235. def get_github_file_contents(owner, repo, path, opts)
  236. json = fetch_json("https://api.github.com/repos/#{owner}/#{repo}/contents/#{path}", opts)
  237. Base64.decode64(json['content'])
  238. end
  239. def get_latest_github_commit_date(owner, repo, opts)
  240. commits = fetch_json("https://api.github.com/repos/#{owner}/#{repo}/commits", opts)
  241. timestamp = commits[0]['commit']['author']['date']
  242. Date.iso8601(timestamp).to_time.to_i
  243. end
  244. def get_gitlab_tags(hostname, group, project, opts)
  245. fetch_json("https://#{hostname}/api/v4/projects/#{group}%2F#{project}/repository/tags", opts)
  246. end
  247. end
  248. end