388 lines
		
	
	
		
			13 KiB
		
	
	
	
		
			Python
		
	
	
		
		
			
		
	
	
			388 lines
		
	
	
		
			13 KiB
		
	
	
	
		
			Python
		
	
	
|  | #!/usr/bin/env python3 | ||
|  | # -*- coding: utf-8 -*- | ||
|  | # | ||
|  | #  Syntax: mkdoc.py [-I<path> ..] [.. a list of header files ..] | ||
|  | # | ||
|  | #  Extract documentation from C++ header files to use it in Python bindings | ||
|  | # | ||
|  | 
 | ||
|  | import os | ||
|  | import sys | ||
|  | import platform | ||
|  | import re | ||
|  | import textwrap | ||
|  | 
 | ||
|  | from clang import cindex | ||
|  | from clang.cindex import CursorKind | ||
|  | from collections import OrderedDict | ||
|  | from glob import glob | ||
|  | from threading import Thread, Semaphore | ||
|  | from multiprocessing import cpu_count | ||
|  | 
 | ||
|  | RECURSE_LIST = [ | ||
|  |     CursorKind.TRANSLATION_UNIT, | ||
|  |     CursorKind.NAMESPACE, | ||
|  |     CursorKind.CLASS_DECL, | ||
|  |     CursorKind.STRUCT_DECL, | ||
|  |     CursorKind.ENUM_DECL, | ||
|  |     CursorKind.CLASS_TEMPLATE | ||
|  | ] | ||
|  | 
 | ||
|  | PRINT_LIST = [ | ||
|  |     CursorKind.CLASS_DECL, | ||
|  |     CursorKind.STRUCT_DECL, | ||
|  |     CursorKind.ENUM_DECL, | ||
|  |     CursorKind.ENUM_CONSTANT_DECL, | ||
|  |     CursorKind.CLASS_TEMPLATE, | ||
|  |     CursorKind.FUNCTION_DECL, | ||
|  |     CursorKind.FUNCTION_TEMPLATE, | ||
|  |     CursorKind.CONVERSION_FUNCTION, | ||
|  |     CursorKind.CXX_METHOD, | ||
|  |     CursorKind.CONSTRUCTOR, | ||
|  |     CursorKind.FIELD_DECL | ||
|  | ] | ||
|  | 
 | ||
|  | PREFIX_BLACKLIST = [ | ||
|  |     CursorKind.TRANSLATION_UNIT | ||
|  | ] | ||
|  | 
 | ||
|  | CPP_OPERATORS = { | ||
|  |     '<=': 'le', '>=': 'ge', '==': 'eq', '!=': 'ne', '[]': 'array', | ||
|  |     '+=': 'iadd', '-=': 'isub', '*=': 'imul', '/=': 'idiv', '%=': | ||
|  |     'imod', '&=': 'iand', '|=': 'ior', '^=': 'ixor', '<<=': 'ilshift', | ||
|  |     '>>=': 'irshift', '++': 'inc', '--': 'dec', '<<': 'lshift', '>>': | ||
|  |     'rshift', '&&': 'land', '||': 'lor', '!': 'lnot', '~': 'bnot', | ||
|  |     '&': 'band', '|': 'bor', '+': 'add', '-': 'sub', '*': 'mul', '/': | ||
|  |     'div', '%': 'mod', '<': 'lt', '>': 'gt', '=': 'assign', '()': 'call' | ||
|  | } | ||
|  | 
 | ||
|  | CPP_OPERATORS = OrderedDict( | ||
|  |     sorted(CPP_OPERATORS.items(), key=lambda t: -len(t[0]))) | ||
|  | 
 | ||
|  | job_count = cpu_count() | ||
|  | job_semaphore = Semaphore(job_count) | ||
|  | 
 | ||
|  | 
 | ||
|  | class NoFilenamesError(ValueError): | ||
|  |     pass | ||
|  | 
 | ||
|  | 
 | ||
|  | def d(s): | ||
|  |     return s if isinstance(s, str) else s.decode('utf8') | ||
|  | 
 | ||
|  | 
 | ||
|  | def sanitize_name(name): | ||
|  |     name = re.sub(r'type-parameter-0-([0-9]+)', r'T\1', name) | ||
|  |     for k, v in CPP_OPERATORS.items(): | ||
|  |         name = name.replace('operator%s' % k, 'operator_%s' % v) | ||
|  |     name = re.sub('<.*>', '', name) | ||
|  |     name = ''.join([ch if ch.isalnum() else '_' for ch in name]) | ||
|  |     name = re.sub('_$', '', re.sub('_+', '_', name)) | ||
|  |     return '__doc_' + name | ||
|  | 
 | ||
|  | 
 | ||
|  | def process_comment(comment): | ||
|  |     result = '' | ||
|  | 
 | ||
|  |     # Remove C++ comment syntax | ||
|  |     leading_spaces = float('inf') | ||
|  |     for s in comment.expandtabs(tabsize=4).splitlines(): | ||
|  |         s = s.strip() | ||
|  |         if s.startswith('/*'): | ||
|  |             s = s[2:].lstrip('*') | ||
|  |         elif s.endswith('*/'): | ||
|  |             s = s[:-2].rstrip('*') | ||
|  |         elif s.startswith('///'): | ||
|  |             s = s[3:] | ||
|  |         if s.startswith('*'): | ||
|  |             s = s[1:] | ||
|  |         if len(s) > 0: | ||
|  |             leading_spaces = min(leading_spaces, len(s) - len(s.lstrip())) | ||
|  |         result += s + '\n' | ||
|  | 
 | ||
|  |     if leading_spaces != float('inf'): | ||
|  |         result2 = "" | ||
|  |         for s in result.splitlines(): | ||
|  |             result2 += s[leading_spaces:] + '\n' | ||
|  |         result = result2 | ||
|  | 
 | ||
|  |     # Doxygen tags | ||
|  |     cpp_group = r'([\w:]+)' | ||
|  |     param_group = r'([\[\w:\]]+)' | ||
|  | 
 | ||
|  |     s = result | ||
|  |     s = re.sub(r'\\c\s+%s' % cpp_group, r'``\1``', s) | ||
|  |     s = re.sub(r'\\a\s+%s' % cpp_group, r'*\1*', s) | ||
|  |     s = re.sub(r'\\e\s+%s' % cpp_group, r'*\1*', s) | ||
|  |     s = re.sub(r'\\em\s+%s' % cpp_group, r'*\1*', s) | ||
|  |     s = re.sub(r'\\b\s+%s' % cpp_group, r'**\1**', s) | ||
|  |     s = re.sub(r'\\ingroup\s+%s' % cpp_group, r'', s) | ||
|  |     s = re.sub(r'\\param%s?\s+%s' % (param_group, cpp_group), | ||
|  |                r'\n\n$Parameter ``\2``:\n\n', s) | ||
|  |     s = re.sub(r'\\tparam%s?\s+%s' % (param_group, cpp_group), | ||
|  |                r'\n\n$Template parameter ``\2``:\n\n', s) | ||
|  | 
 | ||
|  |     for in_, out_ in { | ||
|  |         'return': 'Returns', | ||
|  |         'author': 'Author', | ||
|  |         'authors': 'Authors', | ||
|  |         'copyright': 'Copyright', | ||
|  |         'date': 'Date', | ||
|  |         'remark': 'Remark', | ||
|  |         'sa': 'See also', | ||
|  |         'see': 'See also', | ||
|  |         'extends': 'Extends', | ||
|  |         'throw': 'Throws', | ||
|  |         'throws': 'Throws' | ||
|  |     }.items(): | ||
|  |         s = re.sub(r'\\%s\s*' % in_, r'\n\n$%s:\n\n' % out_, s) | ||
|  | 
 | ||
|  |     s = re.sub(r'\\details\s*', r'\n\n', s) | ||
|  |     s = re.sub(r'\\brief\s*', r'', s) | ||
|  |     s = re.sub(r'\\short\s*', r'', s) | ||
|  |     s = re.sub(r'\\ref\s*', r'', s) | ||
|  | 
 | ||
|  |     s = re.sub(r'\\code\s?(.*?)\s?\\endcode', | ||
|  |                r"```\n\1\n```\n", s, flags=re.DOTALL) | ||
|  | 
 | ||
|  |     # HTML/TeX tags | ||
|  |     s = re.sub(r'<tt>(.*?)</tt>', r'``\1``', s, flags=re.DOTALL) | ||
|  |     s = re.sub(r'<pre>(.*?)</pre>', r"```\n\1\n```\n", s, flags=re.DOTALL) | ||
|  |     s = re.sub(r'<em>(.*?)</em>', r'*\1*', s, flags=re.DOTALL) | ||
|  |     s = re.sub(r'<b>(.*?)</b>', r'**\1**', s, flags=re.DOTALL) | ||
|  |     s = re.sub(r'\\f\$(.*?)\\f\$', r'$\1$', s, flags=re.DOTALL) | ||
|  |     s = re.sub(r'<li>', r'\n\n* ', s) | ||
|  |     s = re.sub(r'</?ul>', r'', s) | ||
|  |     s = re.sub(r'</li>', r'\n\n', s) | ||
|  | 
 | ||
|  |     s = s.replace('``true``', '``True``') | ||
|  |     s = s.replace('``false``', '``False``') | ||
|  | 
 | ||
|  |     # Re-flow text | ||
|  |     wrapper = textwrap.TextWrapper() | ||
|  |     wrapper.expand_tabs = True | ||
|  |     wrapper.replace_whitespace = True | ||
|  |     wrapper.drop_whitespace = True | ||
|  |     wrapper.width = 70 | ||
|  |     wrapper.initial_indent = wrapper.subsequent_indent = '' | ||
|  | 
 | ||
|  |     result = '' | ||
|  |     in_code_segment = False | ||
|  |     for x in re.split(r'(```)', s): | ||
|  |         if x == '```': | ||
|  |             if not in_code_segment: | ||
|  |                 result += '```\n' | ||
|  |             else: | ||
|  |                 result += '\n```\n\n' | ||
|  |             in_code_segment = not in_code_segment | ||
|  |         elif in_code_segment: | ||
|  |             result += x.strip() | ||
|  |         else: | ||
|  |             for y in re.split(r'(?: *\n *){2,}', x): | ||
|  |                 wrapped = wrapper.fill(re.sub(r'\s+', ' ', y).strip()) | ||
|  |                 if len(wrapped) > 0 and wrapped[0] == '$': | ||
|  |                     result += wrapped[1:] + '\n' | ||
|  |                     wrapper.initial_indent = \ | ||
|  |                         wrapper.subsequent_indent = ' ' * 4 | ||
|  |                 else: | ||
|  |                     if len(wrapped) > 0: | ||
|  |                         result += wrapped + '\n\n' | ||
|  |                     wrapper.initial_indent = wrapper.subsequent_indent = '' | ||
|  |     return result.rstrip().lstrip('\n') | ||
|  | 
 | ||
|  | 
 | ||
|  | def extract(filename, node, prefix, output): | ||
|  |     if not (node.location.file is None or | ||
|  |             os.path.samefile(d(node.location.file.name), filename)): | ||
|  |         return 0 | ||
|  |     if node.kind in RECURSE_LIST: | ||
|  |         sub_prefix = prefix | ||
|  |         if node.kind not in PREFIX_BLACKLIST: | ||
|  |             if len(sub_prefix) > 0: | ||
|  |                 sub_prefix += '_' | ||
|  |             sub_prefix += d(node.spelling) | ||
|  |         for i in node.get_children(): | ||
|  |             extract(filename, i, sub_prefix, output) | ||
|  |     if node.kind in PRINT_LIST: | ||
|  |         comment = d(node.raw_comment) if node.raw_comment is not None else '' | ||
|  |         comment = process_comment(comment) | ||
|  |         sub_prefix = prefix | ||
|  |         if len(sub_prefix) > 0: | ||
|  |             sub_prefix += '_' | ||
|  |         if len(node.spelling) > 0: | ||
|  |             name = sanitize_name(sub_prefix + d(node.spelling)) | ||
|  |             output.append((name, filename, comment)) | ||
|  | 
 | ||
|  | 
 | ||
|  | class ExtractionThread(Thread): | ||
|  |     def __init__(self, filename, parameters, output): | ||
|  |         Thread.__init__(self) | ||
|  |         self.filename = filename | ||
|  |         self.parameters = parameters | ||
|  |         self.output = output | ||
|  |         job_semaphore.acquire() | ||
|  | 
 | ||
|  |     def run(self): | ||
|  |         print('Processing "%s" ..' % self.filename, file=sys.stderr) | ||
|  |         try: | ||
|  |             index = cindex.Index( | ||
|  |                 cindex.conf.lib.clang_createIndex(False, True)) | ||
|  |             tu = index.parse(self.filename, self.parameters) | ||
|  |             extract(self.filename, tu.cursor, '', self.output) | ||
|  |         finally: | ||
|  |             job_semaphore.release() | ||
|  | 
 | ||
|  | 
 | ||
|  | def read_args(args): | ||
|  |     parameters = [] | ||
|  |     filenames = [] | ||
|  |     if "-x" not in args: | ||
|  |         parameters.extend(['-x', 'c++']) | ||
|  |     if not any(it.startswith("-std=") for it in args): | ||
|  |         parameters.append('-std=c++11') | ||
|  | 
 | ||
|  |     if platform.system() == 'Darwin': | ||
|  |         dev_path = '/Applications/Xcode.app/Contents/Developer/' | ||
|  |         lib_dir = dev_path + 'Toolchains/XcodeDefault.xctoolchain/usr/lib/' | ||
|  |         sdk_dir = dev_path + 'Platforms/MacOSX.platform/Developer/SDKs' | ||
|  |         libclang = lib_dir + 'libclang.dylib' | ||
|  | 
 | ||
|  |         if os.path.exists(libclang): | ||
|  |             cindex.Config.set_library_path(os.path.dirname(libclang)) | ||
|  | 
 | ||
|  |         if os.path.exists(sdk_dir): | ||
|  |             sysroot_dir = os.path.join(sdk_dir, next(os.walk(sdk_dir))[1][0]) | ||
|  |             parameters.append('-isysroot') | ||
|  |             parameters.append(sysroot_dir) | ||
|  |     elif platform.system() == 'Linux': | ||
|  |         # cython.util.find_library does not find `libclang` for all clang | ||
|  |         # versions and distributions. LLVM switched to a monolithical setup | ||
|  |         # that includes everything under /usr/lib/llvm{version_number}/ | ||
|  |         # We therefore glob for the library and select the highest version | ||
|  |         library_file = sorted(glob("/usr/lib/llvm-*/lib/libclang.so"), reverse=True)[0] | ||
|  |         cindex.Config.set_library_file(library_file) | ||
|  | 
 | ||
|  |         # clang doesn't find its own base includes by default on Linux, | ||
|  |         # but different distros install them in different paths. | ||
|  |         # Try to autodetect, preferring the highest numbered version. | ||
|  |         def clang_folder_version(d): | ||
|  |             return [int(ver) for ver in re.findall(r'(?<!lib)(?<!\d)\d+', d)] | ||
|  |         clang_include_dir = max(( | ||
|  |             path | ||
|  |             for libdir in ['lib64', 'lib', 'lib32'] | ||
|  |             for path in glob('/usr/%s/clang/*/include' % libdir) | ||
|  |             if os.path.isdir(path) | ||
|  |         ), default=None, key=clang_folder_version) | ||
|  |         if clang_include_dir: | ||
|  |             parameters.extend(['-isystem', clang_include_dir]) | ||
|  | 
 | ||
|  |     for item in args: | ||
|  |         if item.startswith('-'): | ||
|  |             parameters.append(item) | ||
|  |         else: | ||
|  |             filenames.append(item) | ||
|  | 
 | ||
|  |     if len(filenames) == 0: | ||
|  |         raise NoFilenamesError("args parameter did not contain any filenames") | ||
|  | 
 | ||
|  |     return parameters, filenames | ||
|  | 
 | ||
|  | 
 | ||
|  | def extract_all(args): | ||
|  |     parameters, filenames = read_args(args) | ||
|  |     output = [] | ||
|  |     for filename in filenames: | ||
|  |         thr = ExtractionThread(filename, parameters, output) | ||
|  |         thr.start() | ||
|  | 
 | ||
|  |     print('Waiting for jobs to finish ..', file=sys.stderr) | ||
|  |     for i in range(job_count): | ||
|  |         job_semaphore.acquire() | ||
|  | 
 | ||
|  |     return output | ||
|  | 
 | ||
|  | 
 | ||
|  | def write_header(comments, out_file=sys.stdout): | ||
|  |     print('''/*
 | ||
|  |   This file contains docstrings for the Python bindings. | ||
|  |   Do not edit! These were automatically extracted by mkdoc.py | ||
|  |  */ | ||
|  | 
 | ||
|  | #define __EXPAND(x)                                      x | ||
|  | #define __COUNT(_1, _2, _3, _4, _5, _6, _7, COUNT, ...)  COUNT | ||
|  | #define __VA_SIZE(...)                                   __EXPAND(__COUNT(__VA_ARGS__, 7, 6, 5, 4, 3, 2, 1)) | ||
|  | #define __CAT1(a, b)                                     a ## b | ||
|  | #define __CAT2(a, b)                                     __CAT1(a, b) | ||
|  | #define __DOC1(n1)                                       __doc_##n1 | ||
|  | #define __DOC2(n1, n2)                                   __doc_##n1##_##n2 | ||
|  | #define __DOC3(n1, n2, n3)                               __doc_##n1##_##n2##_##n3 | ||
|  | #define __DOC4(n1, n2, n3, n4)                           __doc_##n1##_##n2##_##n3##_##n4 | ||
|  | #define __DOC5(n1, n2, n3, n4, n5)                       __doc_##n1##_##n2##_##n3##_##n4##_##n5 | ||
|  | #define __DOC6(n1, n2, n3, n4, n5, n6)                   __doc_##n1##_##n2##_##n3##_##n4##_##n5##_##n6 | ||
|  | #define __DOC7(n1, n2, n3, n4, n5, n6, n7)               __doc_##n1##_##n2##_##n3##_##n4##_##n5##_##n6##_##n7 | ||
|  | #define DOC(...)                                         __EXPAND(__EXPAND(__CAT2(__DOC, __VA_SIZE(__VA_ARGS__)))(__VA_ARGS__)) | ||
|  | 
 | ||
|  | #if defined(__GNUG__) | ||
|  | #pragma GCC diagnostic push | ||
|  | #pragma GCC diagnostic ignored "-Wunused-variable" | ||
|  | #endif | ||
|  | ''', file=out_file)
 | ||
|  | 
 | ||
|  | 
 | ||
|  |     name_ctr = 1 | ||
|  |     name_prev = None | ||
|  |     for name, _, comment in list(sorted(comments, key=lambda x: (x[0], x[1]))): | ||
|  |         if name == name_prev: | ||
|  |             name_ctr += 1 | ||
|  |             name = name + "_%i" % name_ctr | ||
|  |         else: | ||
|  |             name_prev = name | ||
|  |             name_ctr = 1 | ||
|  |         print('\nstatic const char *%s =%sR"doc(%s)doc";' % | ||
|  |               (name, '\n' if '\n' in comment else ' ', comment), file=out_file) | ||
|  | 
 | ||
|  |     print('''
 | ||
|  | #if defined(__GNUG__) | ||
|  | #pragma GCC diagnostic pop | ||
|  | #endif | ||
|  | ''', file=out_file)
 | ||
|  | 
 | ||
|  | 
 | ||
|  | def mkdoc(args): | ||
|  |     args = list(args) | ||
|  |     out_path = None | ||
|  |     for idx, arg in enumerate(args): | ||
|  |         if arg.startswith("-o"): | ||
|  |             args.remove(arg) | ||
|  |             try: | ||
|  |                 out_path = arg[2:] or args.pop(idx) | ||
|  |             except IndexError: | ||
|  |                 print("-o flag requires an argument") | ||
|  |                 exit(-1) | ||
|  |             break | ||
|  | 
 | ||
|  |     comments = extract_all(args) | ||
|  | 
 | ||
|  |     if out_path: | ||
|  |         try: | ||
|  |             with open(out_path, 'w') as out_file: | ||
|  |                 write_header(comments, out_file) | ||
|  |         except: | ||
|  |             # In the event of an error, don't leave a partially-written | ||
|  |             # output file. | ||
|  |             try: | ||
|  |                 os.unlink(out_path) | ||
|  |             except: | ||
|  |                 pass | ||
|  |             raise | ||
|  |     else: | ||
|  |         write_header(comments) | ||
|  | 
 | ||
|  | 
 | ||
|  | if __name__ == '__main__': | ||
|  |     try: | ||
|  |         mkdoc(sys.argv[1:]) | ||
|  |     except NoFilenamesError: | ||
|  |         print('Syntax: %s [.. a list of header files ..]' % sys.argv[0]) | ||
|  |         exit(-1) |