1
0

doc.rb 8.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296
  1. module Docs
  2. class Doc
  3. INDEX_FILENAME = 'index.json'
  4. DB_FILENAME = 'db.json'
  5. META_FILENAME = 'meta.json'
  6. class << self
  7. include Instrumentable
  8. attr_accessor :name, :slug, :type, :release, :abstract, :links
  9. def inherited(subclass)
  10. subclass.type = type
  11. end
  12. def version(version = nil, &block)
  13. return @version unless block_given?
  14. klass = Class.new(self)
  15. klass.name = name
  16. klass.slug = slug
  17. klass.version = version
  18. klass.release = release
  19. klass.links = links
  20. klass.class_exec(&block)
  21. @versions ||= []
  22. @versions << klass
  23. klass
  24. end
  25. def version=(value)
  26. @version = value.to_s
  27. end
  28. def versions
  29. @versions.presence || [self]
  30. end
  31. def version?
  32. version.present?
  33. end
  34. def versioned?
  35. @versions.presence
  36. end
  37. def name
  38. @name || super.demodulize
  39. end
  40. def slug
  41. slug = @slug || default_slug || raise('slug is required')
  42. version? ? "#{slug}~#{version_slug}" : slug
  43. end
  44. def version_slug
  45. return if version.blank?
  46. slug = version.downcase
  47. slug.gsub! '+', 'p'
  48. slug.gsub! '#', 's'
  49. slug.gsub! %r{[^a-z0-9\_\.]}, '_'
  50. slug
  51. end
  52. def path
  53. slug
  54. end
  55. def index_path
  56. File.join path, INDEX_FILENAME
  57. end
  58. def db_path
  59. File.join path, DB_FILENAME
  60. end
  61. def meta_path
  62. File.join path, META_FILENAME
  63. end
  64. def as_json
  65. json = { name: name, slug: slug, type: type }
  66. json[:links] = links if links.present?
  67. json[:version] = version if version.present? || defined?(@version)
  68. json[:release] = release if release.present?
  69. json
  70. end
  71. def as_json_extra(store)
  72. json = self.as_json
  73. json[:attribution] = options[:attribution].strip if self.class.method_defined?(:options) and options[:attribution].present?
  74. json[:db_size] = store.size(self.db_path) if store.exist?(self.db_path)
  75. json[:mtime] = store.mtime(self.meta_path).to_i if store.exist?(self.meta_path)
  76. json
  77. end
  78. def store_page(store, id)
  79. index = EntryIndex.new
  80. pages = PageDb.new
  81. store.open(path) do
  82. if page = new.build_page(id) and store_page?(page)
  83. index.add page[:entries]
  84. pages.add page[:path], page[:output]
  85. store_index(store, INDEX_FILENAME, index, false)
  86. store_index(store, DB_FILENAME, pages, false)
  87. store.write page[:store_path], page[:output]
  88. true
  89. else
  90. false
  91. end
  92. end
  93. rescue Docs::SetupError => error
  94. puts "ERROR: #{error.message}"
  95. false
  96. end
  97. def store_pages(store)
  98. index = EntryIndex.new
  99. pages = PageDb.new
  100. store.replace(path) do
  101. new.build_pages do |page|
  102. next unless store_page?(page)
  103. store.write page[:store_path], page[:output]
  104. index.add page[:entries]
  105. pages.add page[:path], page[:output]
  106. end
  107. if index.present?
  108. store_index(store, INDEX_FILENAME, index)
  109. store_index(store, DB_FILENAME, pages)
  110. store_meta(store)
  111. true
  112. else
  113. false
  114. end
  115. end
  116. rescue Docs::SetupError => error
  117. puts "ERROR: #{error.message}"
  118. false
  119. end
  120. private
  121. def default_slug
  122. return if name =~ /[^A-Za-z0-9_]/
  123. name.downcase
  124. end
  125. def store_page?(page)
  126. page[:entries].present?
  127. end
  128. def store_index(store, filename, index, read_write=true)
  129. old_json = read_write && store.read(filename) || '{}'
  130. new_json = index.to_json
  131. instrument "#{filename.remove('.json')}.doc", before: old_json, after: new_json
  132. read_write && store.write(filename, new_json)
  133. end
  134. def store_meta(store)
  135. json = as_json
  136. json[:mtime] = Time.now.to_i
  137. json[:db_size] = store.size(DB_FILENAME)
  138. store.write(META_FILENAME, json.to_json)
  139. end
  140. end
  141. def initialize
  142. raise NotImplementedError, "#{self.class} is an abstract class and cannot be instantiated." if self.class.abstract
  143. end
  144. def build_page(id, &block)
  145. raise NotImplementedError
  146. end
  147. def build_pages(&block)
  148. raise NotImplementedError
  149. end
  150. def get_scraper_version(opts)
  151. if self.class.method_defined?(:options) and !options[:release].nil?
  152. options[:release]
  153. else
  154. # If options[:release] does not exist, we return the Epoch timestamp of when the doc was last modified in DevDocs production
  155. json = fetch_json('https://devdocs.io/docs.json', opts)
  156. items = json.select {|item| item['name'] == self.class.name}
  157. items = items.map {|item| item['mtime']}
  158. items.max
  159. end
  160. end
  161. # Should return the latest version of this documentation
  162. # If options[:release] is defined, it should be in the same format
  163. # If options[:release] is not defined, it should return the Epoch timestamp of when the documentation was last updated
  164. # If the docs will never change, simply return '1.0.0'
  165. def get_latest_version(opts)
  166. raise NotImplementedError
  167. end
  168. # Returns whether or not this scraper is outdated ("Outdated major version", "Outdated minor version" or 'Up-to-date').
  169. #
  170. # The default implementation assumes the documentation uses a semver(-like) approach when it comes to versions.
  171. # Patch updates are ignored because there are usually little to no documentation changes in bug-fix-only releases.
  172. #
  173. # Scrapers of documentations that do not use this versioning approach should override this method.
  174. #
  175. # Examples of the default implementation:
  176. # 1 -> 2 = outdated
  177. # 1.1 -> 1.2 = outdated
  178. # 1.1.1 -> 1.1.2 = not outdated
  179. def outdated_state(scraper_version, latest_version)
  180. scraper_parts = scraper_version.to_s.split(/[-.]/).map(&:to_i)
  181. latest_parts = latest_version.to_s.split(/[-.]/).map(&:to_i)
  182. # Only check the first two parts, the third part is for patch updates
  183. [0, 1].each do |i|
  184. break if i >= scraper_parts.length or i >= latest_parts.length
  185. return 'Outdated major version' if i == 0 and latest_parts[i] > scraper_parts[i]
  186. return 'Outdated major version' if i == 1 and latest_parts[i] > scraper_parts[i] and latest_parts[0] == 0 and scraper_parts[0] == 0
  187. return 'Outdated major version' if i == 1 and latest_parts[i] > scraper_parts[i] and latest_parts[0] == 1 and scraper_parts[0] == 1
  188. return 'Outdated minor version' if i == 1 and latest_parts[i] > scraper_parts[i]
  189. return 'Up-to-date' if latest_parts[i] < scraper_parts[i]
  190. end
  191. 'Up-to-date'
  192. end
  193. private
  194. #
  195. # Utility methods for get_latest_version
  196. #
  197. def fetch(url, opts)
  198. headers = {}
  199. if opts.key?(:github_token) and url.start_with?('https://api.github.com/')
  200. headers['Authorization'] = "token #{opts[:github_token]}"
  201. elsif ENV['GITHUB_TOKEN'] and url.start_with?('https://api.github.com/')
  202. headers['Authorization'] = "token #{ENV['GITHUB_TOKEN']}"
  203. end
  204. opts[:logger].debug("Fetching #{url}")
  205. response = Request.run(url, { connecttimeout: 15, headers: headers })
  206. if response.success?
  207. response.body
  208. else
  209. reason = response.timed_out? ? "Timed out while connecting to #{url}" : "Couldn't fetch #{url} (response code #{response.code})"
  210. opts[:logger].error(reason)
  211. raise reason
  212. end
  213. end
  214. def fetch_doc(url, opts)
  215. body = fetch(url, opts)
  216. Nokogiri::HTML.parse(body, nil, 'UTF-8')
  217. end
  218. def fetch_json(url, opts)
  219. JSON.parse fetch(url, opts)
  220. end
  221. def get_npm_version(package, opts, tag='latest')
  222. json = fetch_json("https://registry.npmjs.com/#{package}", opts)
  223. json['dist-tags'][tag]
  224. end
  225. def get_latest_github_release(owner, repo, opts)
  226. release = fetch_json("https://api.github.com/repos/#{owner}/#{repo}/releases/latest", opts)
  227. tag_name = release['tag_name']
  228. tag_name.start_with?('v') ? tag_name[1..-1] : tag_name
  229. end
  230. def get_github_tags(owner, repo, opts)
  231. fetch_json("https://api.github.com/repos/#{owner}/#{repo}/tags", opts)
  232. end
  233. def get_github_file_contents(owner, repo, path, opts)
  234. json = fetch_json("https://api.github.com/repos/#{owner}/#{repo}/contents/#{path}", opts)
  235. Base64.decode64(json['content'])
  236. end
  237. def get_latest_github_commit_date(owner, repo, opts)
  238. commits = fetch_json("https://api.github.com/repos/#{owner}/#{repo}/commits", opts)
  239. timestamp = commits[0]['commit']['author']['date']
  240. Date.iso8601(timestamp).to_time.to_i
  241. end
  242. def get_gitlab_tags(hostname, group, project, opts)
  243. fetch_json("https://#{hostname}/api/v4/projects/#{group}%2F#{project}/repository/tags", opts)
  244. end
  245. end
  246. end