380 lines
		
	
	
		
			12 KiB
		
	
	
	
		
			Python
		
	
	
		
			Executable File
		
	
			
		
		
	
	
			380 lines
		
	
	
		
			12 KiB
		
	
	
	
		
			Python
		
	
	
		
			Executable File
		
	
| #!/usr/bin/env python3
 | |
| #
 | |
| #  Syntax: mkdoc.py [-I<path> ..] [.. a list of header files ..]
 | |
| #
 | |
| #  Extract documentation from C++ header files to use it in Python bindings
 | |
| #
 | |
| 
 | |
| import os
 | |
| import sys
 | |
| import platform
 | |
| import re
 | |
| import textwrap
 | |
| 
 | |
| from clang import cindex
 | |
| from clang.cindex import CursorKind
 | |
| from collections import OrderedDict
 | |
| from glob import glob
 | |
| from threading import Thread, Semaphore
 | |
| from multiprocessing import cpu_count
 | |
| 
 | |
| RECURSE_LIST = [
 | |
|     CursorKind.TRANSLATION_UNIT,
 | |
|     CursorKind.NAMESPACE,
 | |
|     CursorKind.CLASS_DECL,
 | |
|     CursorKind.STRUCT_DECL,
 | |
|     CursorKind.ENUM_DECL,
 | |
|     CursorKind.CLASS_TEMPLATE
 | |
| ]
 | |
| 
 | |
| PRINT_LIST = [
 | |
|     CursorKind.CLASS_DECL,
 | |
|     CursorKind.STRUCT_DECL,
 | |
|     CursorKind.ENUM_DECL,
 | |
|     CursorKind.ENUM_CONSTANT_DECL,
 | |
|     CursorKind.CLASS_TEMPLATE,
 | |
|     CursorKind.FUNCTION_DECL,
 | |
|     CursorKind.FUNCTION_TEMPLATE,
 | |
|     CursorKind.CONVERSION_FUNCTION,
 | |
|     CursorKind.CXX_METHOD,
 | |
|     CursorKind.CONSTRUCTOR,
 | |
|     CursorKind.FIELD_DECL
 | |
| ]
 | |
| 
 | |
| PREFIX_BLACKLIST = [
 | |
|     CursorKind.TRANSLATION_UNIT
 | |
| ]
 | |
| 
 | |
| CPP_OPERATORS = {
 | |
|     '<=': 'le', '>=': 'ge', '==': 'eq', '!=': 'ne', '[]': 'array',
 | |
|     '+=': 'iadd', '-=': 'isub', '*=': 'imul', '/=': 'idiv', '%=':
 | |
|     'imod', '&=': 'iand', '|=': 'ior', '^=': 'ixor', '<<=': 'ilshift',
 | |
|     '>>=': 'irshift', '++': 'inc', '--': 'dec', '<<': 'lshift', '>>':
 | |
|     'rshift', '&&': 'land', '||': 'lor', '!': 'lnot', '~': 'bnot',
 | |
|     '&': 'band', '|': 'bor', '+': 'add', '-': 'sub', '*': 'mul', '/':
 | |
|     'div', '%': 'mod', '<': 'lt', '>': 'gt', '=': 'assign', '()': 'call'
 | |
| }
 | |
| 
 | |
| CPP_OPERATORS = OrderedDict(
 | |
|     sorted(CPP_OPERATORS.items(), key=lambda t: -len(t[0])))
 | |
| 
 | |
| job_count = cpu_count()
 | |
| job_semaphore = Semaphore(job_count)
 | |
| 
 | |
| 
 | |
| class NoFilenamesError(ValueError):
 | |
|     pass
 | |
| 
 | |
| 
 | |
| def d(s):
 | |
|     return s if isinstance(s, str) else s.decode('utf8')
 | |
| 
 | |
| 
 | |
| def sanitize_name(name):
 | |
|     name = re.sub(r'type-parameter-0-([0-9]+)', r'T\1', name)
 | |
|     for k, v in CPP_OPERATORS.items():
 | |
|         name = name.replace('operator%s' % k, 'operator_%s' % v)
 | |
|     name = re.sub('<.*>', '', name)
 | |
|     name = ''.join([ch if ch.isalnum() else '_' for ch in name])
 | |
|     name = re.sub('_$', '', re.sub('_+', '_', name))
 | |
|     return '__doc_' + name
 | |
| 
 | |
| 
 | |
| def process_comment(comment):
 | |
|     result = ''
 | |
| 
 | |
|     # Remove C++ comment syntax
 | |
|     leading_spaces = float('inf')
 | |
|     for s in comment.expandtabs(tabsize=4).splitlines():
 | |
|         s = s.strip()
 | |
|         if s.startswith('/*'):
 | |
|             s = s[2:].lstrip('*')
 | |
|         elif s.endswith('*/'):
 | |
|             s = s[:-2].rstrip('*')
 | |
|         elif s.startswith('///'):
 | |
|             s = s[3:]
 | |
|         if s.startswith('*'):
 | |
|             s = s[1:]
 | |
|         if len(s) > 0:
 | |
|             leading_spaces = min(leading_spaces, len(s) - len(s.lstrip()))
 | |
|         result += s + '\n'
 | |
| 
 | |
|     if leading_spaces != float('inf'):
 | |
|         result2 = ""
 | |
|         for s in result.splitlines():
 | |
|             result2 += s[leading_spaces:] + '\n'
 | |
|         result = result2
 | |
| 
 | |
|     # Doxygen tags
 | |
|     cpp_group = '([\w:]+)'
 | |
|     param_group = '([\[\w:\]]+)'
 | |
| 
 | |
|     s = result
 | |
|     s = re.sub(r'\\c\s+%s' % cpp_group, r'``\1``', s)
 | |
|     s = re.sub(r'\\a\s+%s' % cpp_group, r'*\1*', s)
 | |
|     s = re.sub(r'\\e\s+%s' % cpp_group, r'*\1*', s)
 | |
|     s = re.sub(r'\\em\s+%s' % cpp_group, r'*\1*', s)
 | |
|     s = re.sub(r'\\b\s+%s' % cpp_group, r'**\1**', s)
 | |
|     s = re.sub(r'\\ingroup\s+%s' % cpp_group, r'', s)
 | |
|     s = re.sub(r'\\param%s?\s+%s' % (param_group, cpp_group),
 | |
|                r'\n\n$Parameter ``\2``:\n\n', s)
 | |
|     s = re.sub(r'\\tparam%s?\s+%s' % (param_group, cpp_group),
 | |
|                r'\n\n$Template parameter ``\2``:\n\n', s)
 | |
| 
 | |
|     for in_, out_ in {
 | |
|         'return': 'Returns',
 | |
|         'author': 'Author',
 | |
|         'authors': 'Authors',
 | |
|         'copyright': 'Copyright',
 | |
|         'date': 'Date',
 | |
|         'remark': 'Remark',
 | |
|         'sa': 'See also',
 | |
|         'see': 'See also',
 | |
|         'extends': 'Extends',
 | |
|         'throw': 'Throws',
 | |
|         'throws': 'Throws'
 | |
|     }.items():
 | |
|         s = re.sub(r'\\%s\s*' % in_, r'\n\n$%s:\n\n' % out_, s)
 | |
| 
 | |
|     s = re.sub(r'\\details\s*', r'\n\n', s)
 | |
|     s = re.sub(r'\\brief\s*', r'', s)
 | |
|     s = re.sub(r'\\short\s*', r'', s)
 | |
|     s = re.sub(r'\\ref\s*', r'', s)
 | |
| 
 | |
|     s = re.sub(r'\\code\s?(.*?)\s?\\endcode',
 | |
|                r"```\n\1\n```\n", s, flags=re.DOTALL)
 | |
| 
 | |
|     # HTML/TeX tags
 | |
|     s = re.sub(r'<tt>(.*?)</tt>', r'``\1``', s, flags=re.DOTALL)
 | |
|     s = re.sub(r'<pre>(.*?)</pre>', r"```\n\1\n```\n", s, flags=re.DOTALL)
 | |
|     s = re.sub(r'<em>(.*?)</em>', r'*\1*', s, flags=re.DOTALL)
 | |
|     s = re.sub(r'<b>(.*?)</b>', r'**\1**', s, flags=re.DOTALL)
 | |
|     s = re.sub(r'\\f\$(.*?)\\f\$', r'$\1$', s, flags=re.DOTALL)
 | |
|     s = re.sub(r'<li>', r'\n\n* ', s)
 | |
|     s = re.sub(r'</?ul>', r'', s)
 | |
|     s = re.sub(r'</li>', r'\n\n', s)
 | |
| 
 | |
|     s = s.replace('``true``', '``True``')
 | |
|     s = s.replace('``false``', '``False``')
 | |
| 
 | |
|     # Re-flow text
 | |
|     wrapper = textwrap.TextWrapper()
 | |
|     wrapper.expand_tabs = True
 | |
|     wrapper.replace_whitespace = True
 | |
|     wrapper.drop_whitespace = True
 | |
|     wrapper.width = 70
 | |
|     wrapper.initial_indent = wrapper.subsequent_indent = ''
 | |
| 
 | |
|     result = ''
 | |
|     in_code_segment = False
 | |
|     for x in re.split(r'(```)', s):
 | |
|         if x == '```':
 | |
|             if not in_code_segment:
 | |
|                 result += '```\n'
 | |
|             else:
 | |
|                 result += '\n```\n\n'
 | |
|             in_code_segment = not in_code_segment
 | |
|         elif in_code_segment:
 | |
|             result += x.strip()
 | |
|         else:
 | |
|             for y in re.split(r'(?: *\n *){2,}', x):
 | |
|                 wrapped = wrapper.fill(re.sub(r'\s+', ' ', y).strip())
 | |
|                 if len(wrapped) > 0 and wrapped[0] == '$':
 | |
|                     result += wrapped[1:] + '\n'
 | |
|                     wrapper.initial_indent = \
 | |
|                         wrapper.subsequent_indent = ' ' * 4
 | |
|                 else:
 | |
|                     if len(wrapped) > 0:
 | |
|                         result += wrapped + '\n\n'
 | |
|                     wrapper.initial_indent = wrapper.subsequent_indent = ''
 | |
|     return result.rstrip().lstrip('\n')
 | |
| 
 | |
| 
 | |
| def extract(filename, node, prefix, output):
 | |
|     if not (node.location.file is None or
 | |
|             os.path.samefile(d(node.location.file.name), filename)):
 | |
|         return 0
 | |
|     if node.kind in RECURSE_LIST:
 | |
|         sub_prefix = prefix
 | |
|         if node.kind not in PREFIX_BLACKLIST:
 | |
|             if len(sub_prefix) > 0:
 | |
|                 sub_prefix += '_'
 | |
|             sub_prefix += d(node.spelling)
 | |
|         for i in node.get_children():
 | |
|             extract(filename, i, sub_prefix, output)
 | |
|     if node.kind in PRINT_LIST:
 | |
|         comment = d(node.raw_comment) if node.raw_comment is not None else ''
 | |
|         comment = process_comment(comment)
 | |
|         sub_prefix = prefix
 | |
|         if len(sub_prefix) > 0:
 | |
|             sub_prefix += '_'
 | |
|         if len(node.spelling) > 0:
 | |
|             name = sanitize_name(sub_prefix + d(node.spelling))
 | |
|             output.append((name, filename, comment))
 | |
| 
 | |
| 
 | |
| class ExtractionThread(Thread):
 | |
|     def __init__(self, filename, parameters, output):
 | |
|         Thread.__init__(self)
 | |
|         self.filename = filename
 | |
|         self.parameters = parameters
 | |
|         self.output = output
 | |
|         job_semaphore.acquire()
 | |
| 
 | |
|     def run(self):
 | |
|         print('Processing "%s" ..' % self.filename, file=sys.stderr)
 | |
|         try:
 | |
|             index = cindex.Index(
 | |
|                 cindex.conf.lib.clang_createIndex(False, True))
 | |
|             tu = index.parse(self.filename, self.parameters)
 | |
|             extract(self.filename, tu.cursor, '', self.output)
 | |
|         finally:
 | |
|             job_semaphore.release()
 | |
| 
 | |
| 
 | |
| def read_args(args):
 | |
|     parameters = []
 | |
|     filenames = []
 | |
|     if "-x" not in args:
 | |
|         parameters.extend(['-x', 'c++'])
 | |
|     if not any(it.startswith("-std=") for it in args):
 | |
|         parameters.append('-std=c++11')
 | |
| 
 | |
|     if platform.system() == 'Darwin':
 | |
|         dev_path = '/Applications/Xcode.app/Contents/Developer/'
 | |
|         lib_dir = dev_path + 'Toolchains/XcodeDefault.xctoolchain/usr/lib/'
 | |
|         sdk_dir = dev_path + 'Platforms/MacOSX.platform/Developer/SDKs'
 | |
|         libclang = lib_dir + 'libclang.dylib'
 | |
| 
 | |
|         if os.path.exists(libclang):
 | |
|             cindex.Config.set_library_path(os.path.dirname(libclang))
 | |
| 
 | |
|         if os.path.exists(sdk_dir):
 | |
|             sysroot_dir = os.path.join(sdk_dir, next(os.walk(sdk_dir))[1][0])
 | |
|             parameters.append('-isysroot')
 | |
|             parameters.append(sysroot_dir)
 | |
|     elif platform.system() == 'Linux':
 | |
|         # clang doesn't find its own base includes by default on Linux,
 | |
|         # but different distros install them in different paths.
 | |
|         # Try to autodetect, preferring the highest numbered version.
 | |
|         def clang_folder_version(d):
 | |
|             return [int(ver) for ver in re.findall(r'(?<!lib)(?<!\d)\d+', d)]
 | |
|         clang_include_dir = max((
 | |
|             path
 | |
|             for libdir in ['lib64', 'lib', 'lib32']
 | |
|             for path in glob('/usr/%s/clang/*/include' % libdir)
 | |
|             if os.path.isdir(path)
 | |
|         ), default=None, key=clang_folder_version)
 | |
|         if clang_include_dir:
 | |
|             parameters.extend(['-isystem', clang_include_dir])
 | |
| 
 | |
|     for item in args:
 | |
|         if item.startswith('-'):
 | |
|             parameters.append(item)
 | |
|         else:
 | |
|             filenames.append(item)
 | |
| 
 | |
|     if len(filenames) == 0:
 | |
|         raise NoFilenamesError("args parameter did not contain any filenames")
 | |
| 
 | |
|     return parameters, filenames
 | |
| 
 | |
| 
 | |
| def extract_all(args):
 | |
|     parameters, filenames = read_args(args)
 | |
|     output = []
 | |
|     for filename in filenames:
 | |
|         thr = ExtractionThread(filename, parameters, output)
 | |
|         thr.start()
 | |
| 
 | |
|     print('Waiting for jobs to finish ..', file=sys.stderr)
 | |
|     for i in range(job_count):
 | |
|         job_semaphore.acquire()
 | |
| 
 | |
|     return output
 | |
| 
 | |
| 
 | |
| def write_header(comments, out_file=sys.stdout):
 | |
|     print('''/*
 | |
|   This file contains docstrings for the Python bindings.
 | |
|   Do not edit! These were automatically extracted by mkdoc.py
 | |
|  */
 | |
| 
 | |
| #define __EXPAND(x)                                      x
 | |
| #define __COUNT(_1, _2, _3, _4, _5, _6, _7, COUNT, ...)  COUNT
 | |
| #define __VA_SIZE(...)                                   __EXPAND(__COUNT(__VA_ARGS__, 7, 6, 5, 4, 3, 2, 1))
 | |
| #define __CAT1(a, b)                                     a ## b
 | |
| #define __CAT2(a, b)                                     __CAT1(a, b)
 | |
| #define __DOC1(n1)                                       __doc_##n1
 | |
| #define __DOC2(n1, n2)                                   __doc_##n1##_##n2
 | |
| #define __DOC3(n1, n2, n3)                               __doc_##n1##_##n2##_##n3
 | |
| #define __DOC4(n1, n2, n3, n4)                           __doc_##n1##_##n2##_##n3##_##n4
 | |
| #define __DOC5(n1, n2, n3, n4, n5)                       __doc_##n1##_##n2##_##n3##_##n4##_##n5
 | |
| #define __DOC6(n1, n2, n3, n4, n5, n6)                   __doc_##n1##_##n2##_##n3##_##n4##_##n5##_##n6
 | |
| #define __DOC7(n1, n2, n3, n4, n5, n6, n7)               __doc_##n1##_##n2##_##n3##_##n4##_##n5##_##n6##_##n7
 | |
| #define DOC(...)                                         __EXPAND(__EXPAND(__CAT2(__DOC, __VA_SIZE(__VA_ARGS__)))(__VA_ARGS__))
 | |
| 
 | |
| #if defined(__GNUG__)
 | |
| #pragma GCC diagnostic push
 | |
| #pragma GCC diagnostic ignored "-Wunused-variable"
 | |
| #endif
 | |
| ''', file=out_file)
 | |
| 
 | |
| 
 | |
|     name_ctr = 1
 | |
|     name_prev = None
 | |
|     for name, _, comment in list(sorted(comments, key=lambda x: (x[0], x[1]))):
 | |
|         if name == name_prev:
 | |
|             name_ctr += 1
 | |
|             name = name + "_%i" % name_ctr
 | |
|         else:
 | |
|             name_prev = name
 | |
|             name_ctr = 1
 | |
|         print('\nstatic const char *%s =%sR"doc(%s)doc";' %
 | |
|               (name, '\n' if '\n' in comment else ' ', comment), file=out_file)
 | |
| 
 | |
|     print('''
 | |
| #if defined(__GNUG__)
 | |
| #pragma GCC diagnostic pop
 | |
| #endif
 | |
| ''', file=out_file)
 | |
| 
 | |
| 
 | |
| def mkdoc(args):
 | |
|     args = list(args)
 | |
|     out_path = None
 | |
|     for idx, arg in enumerate(args):
 | |
|         if arg.startswith("-o"):
 | |
|             args.remove(arg)
 | |
|             try:
 | |
|                 out_path = arg[2:] or args.pop(idx)
 | |
|             except IndexError:
 | |
|                 print("-o flag requires an argument")
 | |
|                 exit(-1)
 | |
|             break
 | |
| 
 | |
|     comments = extract_all(args)
 | |
| 
 | |
|     if out_path:
 | |
|         try:
 | |
|             with open(out_path, 'w') as out_file:
 | |
|                 write_header(comments, out_file)
 | |
|         except:
 | |
|             # In the event of an error, don't leave a partially-written
 | |
|             # output file.
 | |
|             try:
 | |
|                 os.unlink(out_path)
 | |
|             except:
 | |
|                 pass
 | |
|             raise
 | |
|     else:
 | |
|         write_header(comments)
 | |
| 
 | |
| 
 | |
| if __name__ == '__main__':
 | |
|     try:
 | |
|         mkdoc(sys.argv[1:])
 | |
|     except NoFilenamesError:
 | |
|         print('Syntax: %s [.. a list of header files ..]' % sys.argv[0])
 | |
|         exit(-1)
 |