1
0

doc.rb 8.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288
  1. module Docs
  2. class Doc
  3. INDEX_FILENAME = 'index.json'
  4. DB_FILENAME = 'db.json'
  5. META_FILENAME = 'meta.json'
  6. class << self
  7. include Instrumentable
  8. attr_accessor :name, :slug, :type, :release, :abstract, :links
  9. def inherited(subclass)
  10. subclass.type = type
  11. end
  12. def version(version = nil, &block)
  13. return @version unless block_given?
  14. klass = Class.new(self)
  15. klass.name = name
  16. klass.slug = slug
  17. klass.version = version
  18. klass.release = release
  19. klass.links = links
  20. klass.class_exec(&block)
  21. @versions ||= []
  22. @versions << klass
  23. klass
  24. end
  25. def version=(value)
  26. @version = value.to_s
  27. end
  28. def versions
  29. @versions.presence || [self]
  30. end
  31. def version?
  32. version.present?
  33. end
  34. def versioned?
  35. @versions.presence
  36. end
  37. def name
  38. @name || super.demodulize
  39. end
  40. def slug
  41. slug = @slug || default_slug || raise('slug is required')
  42. version? ? "#{slug}~#{version_slug}" : slug
  43. end
  44. def version_slug
  45. return if version.blank?
  46. slug = version.downcase
  47. slug.gsub! '+', 'p'
  48. slug.gsub! '#', 's'
  49. slug.gsub! %r{[^a-z0-9\_\.]}, '_'
  50. slug
  51. end
  52. def path
  53. slug
  54. end
  55. def index_path
  56. File.join path, INDEX_FILENAME
  57. end
  58. def db_path
  59. File.join path, DB_FILENAME
  60. end
  61. def meta_path
  62. File.join path, META_FILENAME
  63. end
  64. def as_json
  65. json = { name: name, slug: slug, type: type }
  66. json[:links] = links if links.present?
  67. json[:version] = version if version.present? || defined?(@version)
  68. json[:release] = release if release.present?
  69. json
  70. end
  71. def store_page(store, id)
  72. index = EntryIndex.new
  73. pages = PageDb.new
  74. store.open(path) do
  75. if page = new.build_page(id) and store_page?(page)
  76. index.add page[:entries]
  77. pages.add page[:path], page[:output]
  78. store_index(store, INDEX_FILENAME, index, false)
  79. store_index(store, DB_FILENAME, pages, false)
  80. store.write page[:store_path], page[:output]
  81. true
  82. else
  83. false
  84. end
  85. end
  86. rescue Docs::SetupError => error
  87. puts "ERROR: #{error.message}"
  88. false
  89. end
  90. def store_pages(store)
  91. index = EntryIndex.new
  92. pages = PageDb.new
  93. store.replace(path) do
  94. new.build_pages do |page|
  95. next unless store_page?(page)
  96. store.write page[:store_path], page[:output]
  97. index.add page[:entries]
  98. pages.add page[:path], page[:output]
  99. end
  100. if index.present?
  101. store_index(store, INDEX_FILENAME, index)
  102. store_index(store, DB_FILENAME, pages)
  103. store_meta(store)
  104. true
  105. else
  106. false
  107. end
  108. end
  109. rescue Docs::SetupError => error
  110. puts "ERROR: #{error.message}"
  111. false
  112. end
  113. private
  114. def default_slug
  115. return if name =~ /[^A-Za-z0-9_]/
  116. name.downcase
  117. end
  118. def store_page?(page)
  119. page[:entries].present?
  120. end
  121. def store_index(store, filename, index, read_write=true)
  122. old_json = read_write && store.read(filename) || '{}'
  123. new_json = index.to_json
  124. instrument "#{filename.remove('.json')}.doc", before: old_json, after: new_json
  125. read_write && store.write(filename, new_json)
  126. end
  127. def store_meta(store)
  128. json = as_json
  129. json[:mtime] = Time.now.to_i
  130. json[:db_size] = store.size(DB_FILENAME)
  131. store.write(META_FILENAME, json.to_json)
  132. end
  133. end
  134. def initialize
  135. raise NotImplementedError, "#{self.class} is an abstract class and cannot be instantiated." if self.class.abstract
  136. end
  137. def build_page(id, &block)
  138. raise NotImplementedError
  139. end
  140. def build_pages(&block)
  141. raise NotImplementedError
  142. end
  143. def get_scraper_version(opts)
  144. if self.class.method_defined?(:options) and !options[:release].nil?
  145. options[:release]
  146. else
  147. # If options[:release] does not exist, we return the Epoch timestamp of when the doc was last modified in DevDocs production
  148. json = fetch_json('https://devdocs.io/docs.json', opts)
  149. items = json.select {|item| item['name'] == self.class.name}
  150. items = items.map {|item| item['mtime']}
  151. items.max
  152. end
  153. end
  154. # Should return the latest version of this documentation
  155. # If options[:release] is defined, it should be in the same format
  156. # If options[:release] is not defined, it should return the Epoch timestamp of when the documentation was last updated
  157. # If the docs will never change, simply return '1.0.0'
  158. def get_latest_version(opts)
  159. raise NotImplementedError
  160. end
  161. # Returns whether or not this scraper is outdated ("Outdated major version", "Outdated minor version" or 'Up-to-date').
  162. #
  163. # The default implementation assumes the documentation uses a semver(-like) approach when it comes to versions.
  164. # Patch updates are ignored because there are usually little to no documentation changes in bug-fix-only releases.
  165. #
  166. # Scrapers of documentations that do not use this versioning approach should override this method.
  167. #
  168. # Examples of the default implementation:
  169. # 1 -> 2 = outdated
  170. # 1.1 -> 1.2 = outdated
  171. # 1.1.1 -> 1.1.2 = not outdated
  172. def outdated_state(scraper_version, latest_version)
  173. scraper_parts = scraper_version.to_s.split(/[-.]/).map(&:to_i)
  174. latest_parts = latest_version.to_s.split(/[-.]/).map(&:to_i)
  175. # Only check the first two parts, the third part is for patch updates
  176. [0, 1].each do |i|
  177. break if i >= scraper_parts.length or i >= latest_parts.length
  178. return 'Outdated major version' if i == 0 and latest_parts[i] > scraper_parts[i]
  179. return 'Outdated major version' if i == 1 and latest_parts[i] > scraper_parts[i] and latest_parts[0] == 0 and scraper_parts[0] == 0
  180. return 'Outdated major version' if i == 1 and latest_parts[i] > scraper_parts[i] and latest_parts[0] == 1 and scraper_parts[0] == 1
  181. return 'Outdated minor version' if i == 1 and latest_parts[i] > scraper_parts[i]
  182. return 'Up-to-date' if latest_parts[i] < scraper_parts[i]
  183. end
  184. 'Up-to-date'
  185. end
  186. private
  187. #
  188. # Utility methods for get_latest_version
  189. #
  190. def fetch(url, opts)
  191. headers = {}
  192. if opts.key?(:github_token) and url.start_with?('https://api.github.com/')
  193. headers['Authorization'] = "token #{opts[:github_token]}"
  194. elsif ENV['GITHUB_TOKEN'] and url.start_with?('https://api.github.com/')
  195. headers['Authorization'] = "token #{ENV['GITHUB_TOKEN']}"
  196. end
  197. opts[:logger].debug("Fetching #{url}")
  198. response = Request.run(url, { connecttimeout: 15, headers: headers })
  199. if response.success?
  200. response.body
  201. else
  202. reason = response.timed_out? ? "Timed out while connecting to #{url}" : "Couldn't fetch #{url} (response code #{response.code})"
  203. opts[:logger].error(reason)
  204. raise reason
  205. end
  206. end
  207. def fetch_doc(url, opts)
  208. body = fetch(url, opts)
  209. Nokogiri::HTML.parse(body, nil, 'UTF-8')
  210. end
  211. def fetch_json(url, opts)
  212. JSON.parse fetch(url, opts)
  213. end
  214. def get_npm_version(package, opts, tag='latest')
  215. json = fetch_json("https://registry.npmjs.com/#{package}", opts)
  216. json['dist-tags'][tag]
  217. end
  218. def get_latest_github_release(owner, repo, opts)
  219. release = fetch_json("https://api.github.com/repos/#{owner}/#{repo}/releases/latest", opts)
  220. tag_name = release['tag_name']
  221. tag_name.start_with?('v') ? tag_name[1..-1] : tag_name
  222. end
  223. def get_github_tags(owner, repo, opts)
  224. fetch_json("https://api.github.com/repos/#{owner}/#{repo}/tags", opts)
  225. end
  226. def get_github_file_contents(owner, repo, path, opts)
  227. json = fetch_json("https://api.github.com/repos/#{owner}/#{repo}/contents/#{path}", opts)
  228. Base64.decode64(json['content'])
  229. end
  230. def get_latest_github_commit_date(owner, repo, opts)
  231. commits = fetch_json("https://api.github.com/repos/#{owner}/#{repo}/commits", opts)
  232. timestamp = commits[0]['commit']['author']['date']
  233. Date.iso8601(timestamp).to_time.to_i
  234. end
  235. def get_gitlab_tags(hostname, group, project, opts)
  236. fetch_json("https://#{hostname}/api/v4/projects/#{group}%2F#{project}/repository/tags", opts)
  237. end
  238. end
  239. end