doc.rb 8.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281
  1. module Docs
  2. class Doc
  3. INDEX_FILENAME = 'index.json'
  4. DB_FILENAME = 'db.json'
  5. META_FILENAME = 'meta.json'
  6. class << self
  7. include Instrumentable
  8. attr_accessor :name, :slug, :type, :release, :abstract, :links
  9. def inherited(subclass)
  10. subclass.type = type
  11. end
  12. def version(version = nil, &block)
  13. return @version unless block_given?
  14. klass = Class.new(self)
  15. klass.name = name
  16. klass.slug = slug
  17. klass.version = version
  18. klass.release = release
  19. klass.links = links
  20. klass.class_exec(&block)
  21. @versions ||= []
  22. @versions << klass
  23. klass
  24. end
  25. def version=(value)
  26. @version = value.to_s
  27. end
  28. def versions
  29. @versions.presence || [self]
  30. end
  31. def version?
  32. version.present?
  33. end
  34. def versioned?
  35. @versions.presence
  36. end
  37. def name
  38. @name || super.demodulize
  39. end
  40. def slug
  41. slug = @slug || default_slug || raise('slug is required')
  42. version? ? "#{slug}~#{version_slug}" : slug
  43. end
  44. def version_slug
  45. return if version.blank?
  46. slug = version.downcase
  47. slug.gsub! '+', 'p'
  48. slug.gsub! '#', 's'
  49. slug.gsub! %r{[^a-z0-9\_\.]}, '_'
  50. slug
  51. end
  52. def path
  53. slug
  54. end
  55. def index_path
  56. File.join path, INDEX_FILENAME
  57. end
  58. def db_path
  59. File.join path, DB_FILENAME
  60. end
  61. def meta_path
  62. File.join path, META_FILENAME
  63. end
  64. def as_json
  65. json = { name: name, slug: slug, type: type }
  66. json[:links] = links if links.present?
  67. json[:version] = version if version.present? || defined?(@version)
  68. json[:release] = release if release.present?
  69. json
  70. end
  71. def store_page(store, id)
  72. store.open(path) do
  73. if page = new.build_page(id) and store_page?(page)
  74. store.write page[:store_path], page[:output]
  75. true
  76. else
  77. false
  78. end
  79. end
  80. rescue Docs::SetupError => error
  81. puts "ERROR: #{error.message}"
  82. false
  83. end
  84. def store_pages(store)
  85. index = EntryIndex.new
  86. pages = PageDb.new
  87. store.replace(path) do
  88. new.build_pages do |page|
  89. next unless store_page?(page)
  90. store.write page[:store_path], page[:output]
  91. index.add page[:entries]
  92. pages.add page[:path], page[:output]
  93. end
  94. if index.present?
  95. store_index(store, INDEX_FILENAME, index)
  96. store_index(store, DB_FILENAME, pages)
  97. store_meta(store)
  98. true
  99. else
  100. false
  101. end
  102. end
  103. rescue Docs::SetupError => error
  104. puts "ERROR: #{error.message}"
  105. false
  106. end
  107. private
  108. def default_slug
  109. return if name =~ /[^A-Za-z0-9_]/
  110. name.downcase
  111. end
  112. def store_page?(page)
  113. page[:entries].present?
  114. end
  115. def store_index(store, filename, index)
  116. old_json = store.read(filename) || '{}'
  117. new_json = index.to_json
  118. instrument "#{filename.remove('.json')}.doc", before: old_json, after: new_json
  119. store.write(filename, new_json)
  120. end
  121. def store_meta(store)
  122. json = as_json
  123. json[:mtime] = Time.now.to_i
  124. json[:db_size] = store.size(DB_FILENAME)
  125. store.write(META_FILENAME, json.to_json)
  126. end
  127. end
  128. def initialize
  129. raise NotImplementedError, "#{self.class} is an abstract class and cannot be instantiated." if self.class.abstract
  130. end
  131. def build_page(id, &block)
  132. raise NotImplementedError
  133. end
  134. def build_pages(&block)
  135. raise NotImplementedError
  136. end
  137. def get_scraper_version(opts)
  138. if self.class.method_defined?(:options) and !options[:release].nil?
  139. options[:release]
  140. else
  141. # If options[:release] does not exist, we return the Epoch timestamp of when the doc was last modified in DevDocs production
  142. json = fetch_json('https://devdocs.io/docs.json', opts)
  143. items = json.select {|item| item['name'] == self.class.name}
  144. items = items.map {|item| item['mtime']}
  145. items.max
  146. end
  147. end
  148. # Should return the latest version of this documentation
  149. # If options[:release] is defined, it should be in the same format
  150. # If options[:release] is not defined, it should return the Epoch timestamp of when the documentation was last updated
  151. # If the docs will never change, simply return '1.0.0'
  152. def get_latest_version(opts)
  153. raise NotImplementedError
  154. end
  155. # Returns whether or not this scraper is outdated ("Outdated major version", "Outdated minor version" or 'Up-to-date').
  156. #
  157. # The default implementation assumes the documentation uses a semver(-like) approach when it comes to versions.
  158. # Patch updates are ignored because there are usually little to no documentation changes in bug-fix-only releases.
  159. #
  160. # Scrapers of documentations that do not use this versioning approach should override this method.
  161. #
  162. # Examples of the default implementation:
  163. # 1 -> 2 = outdated
  164. # 1.1 -> 1.2 = outdated
  165. # 1.1.1 -> 1.1.2 = not outdated
  166. def outdated_state(scraper_version, latest_version)
  167. scraper_parts = scraper_version.to_s.split(/[-.]/).map(&:to_i)
  168. latest_parts = latest_version.to_s.split(/[-.]/).map(&:to_i)
  169. # Only check the first two parts, the third part is for patch updates
  170. [0, 1].each do |i|
  171. break if i >= scraper_parts.length or i >= latest_parts.length
  172. return 'Outdated major version' if i == 0 and latest_parts[i] > scraper_parts[i]
  173. return 'Outdated major version' if i == 1 and latest_parts[i] > scraper_parts[i] and latest_parts[0] == 0 and scraper_parts[0] == 0
  174. return 'Outdated major version' if i == 1 and latest_parts[i] > scraper_parts[i] and latest_parts[0] == 1 and scraper_parts[0] == 1
  175. return 'Outdated minor version' if i == 1 and latest_parts[i] > scraper_parts[i]
  176. return 'Up-to-date' if latest_parts[i] < scraper_parts[i]
  177. end
  178. 'Up-to-date'
  179. end
  180. private
  181. #
  182. # Utility methods for get_latest_version
  183. #
  184. def fetch(url, opts)
  185. headers = {}
  186. if opts.key?(:github_token) and url.start_with?('https://api.github.com/')
  187. headers['Authorization'] = "token #{opts[:github_token]}"
  188. elsif ENV['GITHUB_TOKEN'] and url.start_with?('https://api.github.com/')
  189. headers['Authorization'] = "token #{ENV['GITHUB_TOKEN']}"
  190. end
  191. opts[:logger].debug("Fetching #{url}")
  192. response = Request.run(url, { connecttimeout: 15, headers: headers })
  193. if response.success?
  194. response.body
  195. else
  196. reason = response.timed_out? ? "Timed out while connecting to #{url}" : "Couldn't fetch #{url} (response code #{response.code})"
  197. opts[:logger].error(reason)
  198. raise reason
  199. end
  200. end
  201. def fetch_doc(url, opts)
  202. body = fetch(url, opts)
  203. Nokogiri::HTML.parse(body, nil, 'UTF-8')
  204. end
  205. def fetch_json(url, opts)
  206. JSON.parse fetch(url, opts)
  207. end
  208. def get_npm_version(package, opts, tag='latest')
  209. json = fetch_json("https://registry.npmjs.com/#{package}", opts)
  210. json['dist-tags'][tag]
  211. end
  212. def get_latest_github_release(owner, repo, opts)
  213. release = fetch_json("https://api.github.com/repos/#{owner}/#{repo}/releases/latest", opts)
  214. tag_name = release['tag_name']
  215. tag_name.start_with?('v') ? tag_name[1..-1] : tag_name
  216. end
  217. def get_github_tags(owner, repo, opts)
  218. fetch_json("https://api.github.com/repos/#{owner}/#{repo}/tags", opts)
  219. end
  220. def get_github_file_contents(owner, repo, path, opts)
  221. json = fetch_json("https://api.github.com/repos/#{owner}/#{repo}/contents/#{path}", opts)
  222. Base64.decode64(json['content'])
  223. end
  224. def get_latest_github_commit_date(owner, repo, opts)
  225. commits = fetch_json("https://api.github.com/repos/#{owner}/#{repo}/commits", opts)
  226. timestamp = commits[0]['commit']['author']['date']
  227. Date.iso8601(timestamp).to_time.to_i
  228. end
  229. def get_gitlab_tags(hostname, group, project, opts)
  230. fetch_json("https://#{hostname}/api/v4/projects/#{group}%2F#{project}/repository/tags", opts)
  231. end
  232. end
  233. end