class DocsetGenerator require 'rubygems' require 'nokogiri' require 'fileutils' def sanitize(str) str.to_s.gsub(/\n/, ' ') end def parse_html_docref(node) code = '' code << node.xpath(".//p[@class='abstract']").text code << "\n" node_discussion = node.xpath(".//div[@class='api discussion']") node_cdesample = node_discussion.xpath(".//div[@class='codesample clear']") node_cdesample.unlink code << node_discussion.text.sub(/^Discussion/, '') code.strip! code.gsub!(/^/m, ' # ') code << "\n" return code end def parse_type(type) if type.kind_of?(Array) type = type.first end type = type.to_s type.strip! star = type.sub!(/\s*\*$/, '') # Remove pointer star. case type when /\*$/ # A double pointer, in MacRuby this becomes a Pointer. 'Pointer' when /id(?:\s*<\w+>)?/ 'Object' when 'void' 'nil' when 'SEL' 'Symbol' when 'bool', 'BOOL' 'Boolean' when 'float', 'double', 'CGFloat' 'Float' when /^(?:const\s+)?u?int(?:\d+_t)?/, 'char', 'unichar', 'short', 'long', 'long long', 'unsigned char', 'unsigned short', 'unsigned long', 'unsigned long long', 'NSInteger', 'NSUInteger' 'Integer' when 'NSString', 'NSMutableString' 'String' when 'NSArray', 'NSMutableArray' 'Array' when 'NSDictionary', 'NSMutableDictionary' 'Hash' else type end end def parse_html_property(doc, code = "") # Properties. doc.xpath("//div[@class='api propertyObjC']").each do |node| decl = node.xpath(".//div[@class='declaration']/div[@class='declaration']").text if decl.length == 0 decl = node.xpath(".//div[@class='declaration']").text end readonly = decl.include?('readonly') decl.sub!(/@property\s*(\([^\)]+\))?/, '') md = decl.match(/(\w+);?$/) next unless md title = md[1] type = md.pre_match code << parse_html_docref(node) code << " # @return [#{parse_type(type)}]\n" code << ' ' << (readonly ? "attr_reader" : "attr_accessor") << " :#{title}\n\n" end return code end def parse_html_method(doc, code = "") # Methods. methods = [] methods.concat(doc.xpath("//div[@class='api classMethod']")) methods.concat(doc.xpath("//div[@class='api instanceMethod']")) methods.each do |node| decl = node.xpath(".//div[@class='declaration']").text types = decl.scan(/\(([^)]+)\)/) ret_type = types.shift # Docref. code << parse_html_docref(node) # Parameters and return value. arg_names = node.xpath(".//div[@class='api parameters']//dt") arg_docs = node.xpath(".//div[@class='api parameters']//dd") if arg_names.size == arg_docs.size has_types = types.size == arg_names.size arg_names.each_with_index do |arg_name, i| arg_doc = arg_docs[i] code << " # @param " code << "[#{parse_type(types[i])}] " if has_types code << "#{arg_name.text} #{sanitize(arg_doc.text)}\n" end end retdoc = node.xpath(".//div[@class='return_value']/p").text.strip code << " # @return " code << "[#{parse_type(ret_type)}] " if ret_type code << "#{sanitize(retdoc)}" unless retdoc.empty? code << "\n" is_class_method = decl.match(/^\s*\+/) != nil code << " # @scope class\n" if is_class_method decl.sub!(/^\s*[\+\-]/, '') # Remove method qualifier. decl.sub!(/;\s*$/, '') no_break_space = [0x00A0].pack("U*") decl.gsub!(no_break_space, '') sel_parts = decl.gsub(/\([^)]+\)+/, '').split.map { |x| x.split(':') } head = sel_parts.shift code << " def #{head[0]}(" code << "#{head[1]}" if head.size > 1 unless sel_parts.empty? code << ', ' code << sel_parts.map { |part| if part[1] "#{part[0]}:#{part[1]}" else part[0] end }.join(', ') end code << "); end\n\n" end return code end def parse_html_constant(doc, code_const = "", code_struct = "") doc.xpath("//div[@id='Constants_section']").each do |node| node_abstract = node.xpath("./p[@class='abstract']") node_declaration = node.xpath("./pre[@class='declaration']") node_termdef = node.xpath("./dl[@class='termdef']") node_termdef.size.times do |i| decl = node_declaration[i].text.strip if decl =~ /^(typedef\s+)?struct/ parse_html_struct(node.child, code_struct) next end enum_name = (decl.match(/\}\s*([^\s]+);$/m).to_a)[1] is_enum = true if enum_name.to_s.length > 0 if is_enum code_const << "# #{sanitize(node_abstract[i].text)}\n" code_const << "module #{enum_name} # Enumeration\n\n" end node_name = node_termdef[i].xpath("./dt") node_description = node_termdef[i].xpath("./dd") node_name.size.times do |i| code_const << " # #{sanitize(node_description[i].text.capitalize)}\n" code_const << " #{node_name[i].text} = nil\n" end code_const << "end\n" if is_enum end end return code_const end def find_framework_path(doc) elem = doc.xpath(".//span[@class='FrameworkPath']") if elem.size > 0 elem[0].parent.parent.parent.children[1].text else nil end end def parse_html_class_property_common(doc, code) code_const = '' code_struct = '' parse_html_property(doc, code) parse_html_method(doc, code) parse_html_constant(doc, code_const, code_struct) code << "end\n" code << code_const code << code_struct return code end def parse_html_class(name, doc, code) # Find superclass (mandatory). sclass = nil doc.xpath("//table[@class='specbox']/tr").each do |node| if md = node.text.match(/Inherits from([^ ]+)/) sclass = md[1] break end end return nil unless sclass # Class abstract. code << doc.xpath(".//p[@class='abstract']")[0].text.gsub(/^/m, '# ') if sclass == "none" code << "\nclass #{name}\n\n" else code << "\nclass #{name} < #{sclass}\n\n" end parse_html_class_property_common(doc, code) return code end def parse_html_protocol(name, doc, code) # Class abstract. node = doc.xpath(".//p[@class='abstract']") return nil if node.empty? # FIXME : To avoid overwriting NSObject class reference by NSObject protocol reference return nil if name == "NSObject" code << node.text.gsub(/^/m, '# ') code << "\nmodule #{name} # Protocol\n\n" parse_html_class_property_common(doc, code) return code end def parse_html_function(doc, code = "") node_name = doc.xpath("../h3[@class='tight jump function']") node_abstract = doc.xpath("../p[@class='abstract']") node_declaration = doc.xpath("../pre[@class='declaration']") node_termdef = doc.xpath("../div[@class='api parameters']/dl[@class='termdef']") node_return_val = doc.xpath("../div[@class='return_value']/p") node_name.size.times do |i| name = node_name[i].text abstract = node_abstract[i].text declaration = node_declaration[i].text.strip declaration =~ /([^\s]+)\s+.+/ return_type = $1 declaration =~ /\((.+)\);/mx args = $1 next unless args args = args.split(",") next unless args.size > 0 return_type.strip! code << "# #{sanitize(abstract)}\n" node_param_description = node_termdef.xpath("dd") params = [] args.each_with_index do |arg, index| arg.strip! arg =~ /(.+)\s+([^\s]+),?$/ type = $1 param = $2 next unless param param.sub!(/\*+/, '') type << Regexp.last_match.to_s params << param description = node_param_description[index].text if node_param_description[index] code << "# @param [#{parse_type(type)}] #{param} #{sanitize(description)}\n" end if node_return_val[i] code << "# @return [#{parse_type(return_type)}] #{sanitize(node_return_val[i].text)}\n" elsif return_type != "void" code << "# @return [#{parse_type(return_type)}]\n" else code << "# @return [nil]\n" end code << "def #{name}(" if params.size > 0 params.each do |param| code << "#{param}, " end code.slice!(-2, 2) # remove last ", " end code << "); end\n\n" end return code end def parse_html_struct(doc, code = "") node_name = doc.xpath("../h3[@class='tight jump struct']|../h3[@class='tight jump typeDef']") node_abstract = doc.xpath("../p[@class='abstract']") node_declaration = doc.xpath("../pre[@class='declaration']|../table[@class='zDeclaration']") node_termdef = doc.xpath("../dl[@class='termdef']") current_member_position = 0 node_name.size.times do |i| name = node_name[i].text abstract = node_abstract[i].text declaration = node_declaration[i].text.strip if node_name[i].values[0].include?("typeDef") && !(declaration =~ /^typedef struct/) next end members = declaration.scan(/\{([^\}]+)\}/) members = members[0][0].strip.split(/;/) if members.size > 0 unless members.empty? code << "# #{sanitize(abstract)}\n" code << "class #{name} < Boxed\n" members = members.inject([]) { |ary, item| # split 'double x, y, z, w;' to each line item.strip =~ /([^\s]+)\s+(.+)/ type = $1 member = $2 if type && member member.split(",").each do |m| ary << "#{type} #{m}" end end ary } node_field_description = node_termdef.xpath("dd") members.each do |item| item.strip =~ /(.+)\s+(.+)/ type = $1 member = $2 desc = node_field_description[current_member_position] code << " # @return [#{parse_type(type)}] #{desc ? sanitize(desc.text) : ''}\n" code << " attr_accessor :#{member}\n" current_member_position += 1 end code << "end\n\n" end end node_name.remove return code end def parse_html_reference(name, doc, code) if node = doc.xpath("//section/a[@title='Functions']") parse_html_function(node, code) end if node = doc.xpath("//section/a[@title='Data Types']") parse_html_struct(node, code) end return code end def parse_html_data(data) doc = Nokogiri::HTML(data) title = doc.xpath('/html/head/title') if title code = '' if framework_path = find_framework_path(doc) code << "# -*- framework: #{framework_path} -*-\n\n" else #$stderr.puts "Can't determine framework path for: #{name}" code << "\n\n" end if md = title.text.match(/^(.+)Class Reference$/) parse_html_class(md[1].strip, doc, code) elsif md = title.text.match(/^(.+)Protocol Reference$/) parse_html_protocol(md[1].strip, doc, code) elsif md = title.text.match(/^(.+) Reference$/) parse_html_reference(md[1].strip, doc, code) end else nil end end def self.modify_document_title(path, new_title) unless File.exists?(path) warn "File not exists : #{path}" return nil end data = File.read(path) data.gsub!(/\s*Module:/, new_title + ':') File.open(path, "w") { |io| io.print data } end def initialize(outpath, paths) @input_paths = [] paths.each do |path| path = File.expand_path(path) if File.directory?(path) @input_paths.concat(Dir.glob(path + '/**/*.html')) else @input_paths << path end end @outpath = outpath @rb_files_dir = '/tmp/rb_docset' end def generate_ruby_code FileUtils.rm_rf(@rb_files_dir) FileUtils.mkdir_p(@rb_files_dir) @input_paths.map { |path| parse_html_data(File.read(path)) }.compact.each_with_index do |code, n| File.open(File.join(@rb_files_dir, "t#{n}.rb"), 'w') do |io| io.puts "# -*- coding: utf-8 -*-" io.write(code) end end end def generate_html sh "yard doc #{@rb_files_dir}" sh "mv doc \"#{@outpath}\"" end def run generate_ruby_code() generate_html() end end