xref: /OK3568_Linux_fs/kernel/Documentation/sphinx/automarkup.py (revision 4882a59341e53eb6f0b4789bf948001014eff981)
1*4882a593Smuzhiyun# SPDX-License-Identifier: GPL-2.0
2*4882a593Smuzhiyun# Copyright 2019 Jonathan Corbet <corbet@lwn.net>
3*4882a593Smuzhiyun#
4*4882a593Smuzhiyun# Apply kernel-specific tweaks after the initial document processing
5*4882a593Smuzhiyun# has been done.
6*4882a593Smuzhiyun#
7*4882a593Smuzhiyunfrom docutils import nodes
8*4882a593Smuzhiyunimport sphinx
9*4882a593Smuzhiyunfrom sphinx import addnodes
10*4882a593Smuzhiyunif sphinx.version_info[0] < 2 or \
11*4882a593Smuzhiyun   sphinx.version_info[0] == 2 and sphinx.version_info[1] < 1:
12*4882a593Smuzhiyun    from sphinx.environment import NoUri
13*4882a593Smuzhiyunelse:
14*4882a593Smuzhiyun    from sphinx.errors import NoUri
15*4882a593Smuzhiyunimport re
16*4882a593Smuzhiyunfrom itertools import chain
17*4882a593Smuzhiyun
18*4882a593Smuzhiyun#
19*4882a593Smuzhiyun# Python 2 lacks re.ASCII...
20*4882a593Smuzhiyun#
21*4882a593Smuzhiyuntry:
22*4882a593Smuzhiyun    ascii_p3 = re.ASCII
23*4882a593Smuzhiyunexcept AttributeError:
24*4882a593Smuzhiyun    ascii_p3 = 0
25*4882a593Smuzhiyun
26*4882a593Smuzhiyun#
27*4882a593Smuzhiyun# Regex nastiness.  Of course.
28*4882a593Smuzhiyun# Try to identify "function()" that's not already marked up some
29*4882a593Smuzhiyun# other way.  Sphinx doesn't like a lot of stuff right after a
30*4882a593Smuzhiyun# :c:func: block (i.e. ":c:func:`mmap()`s" flakes out), so the last
31*4882a593Smuzhiyun# bit tries to restrict matches to things that won't create trouble.
32*4882a593Smuzhiyun#
33*4882a593SmuzhiyunRE_function = re.compile(r'\b(([a-zA-Z_]\w+)\(\))', flags=ascii_p3)
34*4882a593Smuzhiyun
35*4882a593Smuzhiyun#
36*4882a593Smuzhiyun# Sphinx 2 uses the same :c:type role for struct, union, enum and typedef
37*4882a593Smuzhiyun#
38*4882a593SmuzhiyunRE_generic_type = re.compile(r'\b(struct|union|enum|typedef)\s+([a-zA-Z_]\w+)',
39*4882a593Smuzhiyun                             flags=ascii_p3)
40*4882a593Smuzhiyun
41*4882a593Smuzhiyun#
42*4882a593Smuzhiyun# Sphinx 3 uses a different C role for each one of struct, union, enum and
43*4882a593Smuzhiyun# typedef
44*4882a593Smuzhiyun#
45*4882a593SmuzhiyunRE_struct = re.compile(r'\b(struct)\s+([a-zA-Z_]\w+)', flags=ascii_p3)
46*4882a593SmuzhiyunRE_union = re.compile(r'\b(union)\s+([a-zA-Z_]\w+)', flags=ascii_p3)
47*4882a593SmuzhiyunRE_enum = re.compile(r'\b(enum)\s+([a-zA-Z_]\w+)', flags=ascii_p3)
48*4882a593SmuzhiyunRE_typedef = re.compile(r'\b(typedef)\s+([a-zA-Z_]\w+)', flags=ascii_p3)
49*4882a593Smuzhiyun
50*4882a593Smuzhiyun#
51*4882a593Smuzhiyun# Detects a reference to a documentation page of the form Documentation/... with
52*4882a593Smuzhiyun# an optional extension
53*4882a593Smuzhiyun#
54*4882a593SmuzhiyunRE_doc = re.compile(r'\bDocumentation(/[\w\-_/]+)(\.\w+)*')
55*4882a593Smuzhiyun
56*4882a593Smuzhiyun#
57*4882a593Smuzhiyun# Reserved C words that we should skip when cross-referencing
58*4882a593Smuzhiyun#
59*4882a593SmuzhiyunSkipnames = [ 'for', 'if', 'register', 'sizeof', 'struct', 'unsigned' ]
60*4882a593Smuzhiyun
61*4882a593Smuzhiyun
62*4882a593Smuzhiyun#
63*4882a593Smuzhiyun# Many places in the docs refer to common system calls.  It is
64*4882a593Smuzhiyun# pointless to try to cross-reference them and, as has been known
65*4882a593Smuzhiyun# to happen, somebody defining a function by these names can lead
66*4882a593Smuzhiyun# to the creation of incorrect and confusing cross references.  So
67*4882a593Smuzhiyun# just don't even try with these names.
68*4882a593Smuzhiyun#
69*4882a593SmuzhiyunSkipfuncs = [ 'open', 'close', 'read', 'write', 'fcntl', 'mmap',
70*4882a593Smuzhiyun              'select', 'poll', 'fork', 'execve', 'clone', 'ioctl',
71*4882a593Smuzhiyun              'socket' ]
72*4882a593Smuzhiyun
73*4882a593Smuzhiyundef markup_refs(docname, app, node):
74*4882a593Smuzhiyun    t = node.astext()
75*4882a593Smuzhiyun    done = 0
76*4882a593Smuzhiyun    repl = [ ]
77*4882a593Smuzhiyun    #
78*4882a593Smuzhiyun    # Associate each regex with the function that will markup its matches
79*4882a593Smuzhiyun    #
80*4882a593Smuzhiyun    markup_func_sphinx2 = {RE_doc: markup_doc_ref,
81*4882a593Smuzhiyun                           RE_function: markup_c_ref,
82*4882a593Smuzhiyun                           RE_generic_type: markup_c_ref}
83*4882a593Smuzhiyun
84*4882a593Smuzhiyun    markup_func_sphinx3 = {RE_doc: markup_doc_ref,
85*4882a593Smuzhiyun                           RE_function: markup_func_ref_sphinx3,
86*4882a593Smuzhiyun                           RE_struct: markup_c_ref,
87*4882a593Smuzhiyun                           RE_union: markup_c_ref,
88*4882a593Smuzhiyun                           RE_enum: markup_c_ref,
89*4882a593Smuzhiyun                           RE_typedef: markup_c_ref}
90*4882a593Smuzhiyun
91*4882a593Smuzhiyun    if sphinx.version_info[0] >= 3:
92*4882a593Smuzhiyun        markup_func = markup_func_sphinx3
93*4882a593Smuzhiyun    else:
94*4882a593Smuzhiyun        markup_func = markup_func_sphinx2
95*4882a593Smuzhiyun
96*4882a593Smuzhiyun    match_iterators = [regex.finditer(t) for regex in markup_func]
97*4882a593Smuzhiyun    #
98*4882a593Smuzhiyun    # Sort all references by the starting position in text
99*4882a593Smuzhiyun    #
100*4882a593Smuzhiyun    sorted_matches = sorted(chain(*match_iterators), key=lambda m: m.start())
101*4882a593Smuzhiyun    for m in sorted_matches:
102*4882a593Smuzhiyun        #
103*4882a593Smuzhiyun        # Include any text prior to match as a normal text node.
104*4882a593Smuzhiyun        #
105*4882a593Smuzhiyun        if m.start() > done:
106*4882a593Smuzhiyun            repl.append(nodes.Text(t[done:m.start()]))
107*4882a593Smuzhiyun
108*4882a593Smuzhiyun        #
109*4882a593Smuzhiyun        # Call the function associated with the regex that matched this text and
110*4882a593Smuzhiyun        # append its return to the text
111*4882a593Smuzhiyun        #
112*4882a593Smuzhiyun        repl.append(markup_func[m.re](docname, app, m))
113*4882a593Smuzhiyun
114*4882a593Smuzhiyun        done = m.end()
115*4882a593Smuzhiyun    if done < len(t):
116*4882a593Smuzhiyun        repl.append(nodes.Text(t[done:]))
117*4882a593Smuzhiyun    return repl
118*4882a593Smuzhiyun
119*4882a593Smuzhiyun#
120*4882a593Smuzhiyun# In sphinx3 we can cross-reference to C macro and function, each one with its
121*4882a593Smuzhiyun# own C role, but both match the same regex, so we try both.
122*4882a593Smuzhiyun#
123*4882a593Smuzhiyundef markup_func_ref_sphinx3(docname, app, match):
124*4882a593Smuzhiyun    class_str = ['c-func', 'c-macro']
125*4882a593Smuzhiyun    reftype_str = ['function', 'macro']
126*4882a593Smuzhiyun
127*4882a593Smuzhiyun    cdom = app.env.domains['c']
128*4882a593Smuzhiyun    #
129*4882a593Smuzhiyun    # Go through the dance of getting an xref out of the C domain
130*4882a593Smuzhiyun    #
131*4882a593Smuzhiyun    target = match.group(2)
132*4882a593Smuzhiyun    target_text = nodes.Text(match.group(0))
133*4882a593Smuzhiyun    xref = None
134*4882a593Smuzhiyun    if not (target in Skipfuncs or target in Skipnames):
135*4882a593Smuzhiyun        for class_s, reftype_s in zip(class_str, reftype_str):
136*4882a593Smuzhiyun            lit_text = nodes.literal(classes=['xref', 'c', class_s])
137*4882a593Smuzhiyun            lit_text += target_text
138*4882a593Smuzhiyun            pxref = addnodes.pending_xref('', refdomain = 'c',
139*4882a593Smuzhiyun                                          reftype = reftype_s,
140*4882a593Smuzhiyun                                          reftarget = target, modname = None,
141*4882a593Smuzhiyun                                          classname = None)
142*4882a593Smuzhiyun            #
143*4882a593Smuzhiyun            # XXX The Latex builder will throw NoUri exceptions here,
144*4882a593Smuzhiyun            # work around that by ignoring them.
145*4882a593Smuzhiyun            #
146*4882a593Smuzhiyun            try:
147*4882a593Smuzhiyun                xref = cdom.resolve_xref(app.env, docname, app.builder,
148*4882a593Smuzhiyun                                         reftype_s, target, pxref,
149*4882a593Smuzhiyun                                         lit_text)
150*4882a593Smuzhiyun            except NoUri:
151*4882a593Smuzhiyun                xref = None
152*4882a593Smuzhiyun
153*4882a593Smuzhiyun            if xref:
154*4882a593Smuzhiyun                return xref
155*4882a593Smuzhiyun
156*4882a593Smuzhiyun    return target_text
157*4882a593Smuzhiyun
158*4882a593Smuzhiyundef markup_c_ref(docname, app, match):
159*4882a593Smuzhiyun    class_str = {# Sphinx 2 only
160*4882a593Smuzhiyun                 RE_function: 'c-func',
161*4882a593Smuzhiyun                 RE_generic_type: 'c-type',
162*4882a593Smuzhiyun                 # Sphinx 3+ only
163*4882a593Smuzhiyun                 RE_struct: 'c-struct',
164*4882a593Smuzhiyun                 RE_union: 'c-union',
165*4882a593Smuzhiyun                 RE_enum: 'c-enum',
166*4882a593Smuzhiyun                 RE_typedef: 'c-type',
167*4882a593Smuzhiyun                 }
168*4882a593Smuzhiyun    reftype_str = {# Sphinx 2 only
169*4882a593Smuzhiyun                   RE_function: 'function',
170*4882a593Smuzhiyun                   RE_generic_type: 'type',
171*4882a593Smuzhiyun                   # Sphinx 3+ only
172*4882a593Smuzhiyun                   RE_struct: 'struct',
173*4882a593Smuzhiyun                   RE_union: 'union',
174*4882a593Smuzhiyun                   RE_enum: 'enum',
175*4882a593Smuzhiyun                   RE_typedef: 'type',
176*4882a593Smuzhiyun                   }
177*4882a593Smuzhiyun
178*4882a593Smuzhiyun    cdom = app.env.domains['c']
179*4882a593Smuzhiyun    #
180*4882a593Smuzhiyun    # Go through the dance of getting an xref out of the C domain
181*4882a593Smuzhiyun    #
182*4882a593Smuzhiyun    target = match.group(2)
183*4882a593Smuzhiyun    target_text = nodes.Text(match.group(0))
184*4882a593Smuzhiyun    xref = None
185*4882a593Smuzhiyun    if not ((match.re == RE_function and target in Skipfuncs)
186*4882a593Smuzhiyun            or (target in Skipnames)):
187*4882a593Smuzhiyun        lit_text = nodes.literal(classes=['xref', 'c', class_str[match.re]])
188*4882a593Smuzhiyun        lit_text += target_text
189*4882a593Smuzhiyun        pxref = addnodes.pending_xref('', refdomain = 'c',
190*4882a593Smuzhiyun                                      reftype = reftype_str[match.re],
191*4882a593Smuzhiyun                                      reftarget = target, modname = None,
192*4882a593Smuzhiyun                                      classname = None)
193*4882a593Smuzhiyun        #
194*4882a593Smuzhiyun        # XXX The Latex builder will throw NoUri exceptions here,
195*4882a593Smuzhiyun        # work around that by ignoring them.
196*4882a593Smuzhiyun        #
197*4882a593Smuzhiyun        try:
198*4882a593Smuzhiyun            xref = cdom.resolve_xref(app.env, docname, app.builder,
199*4882a593Smuzhiyun                                     reftype_str[match.re], target, pxref,
200*4882a593Smuzhiyun                                     lit_text)
201*4882a593Smuzhiyun        except NoUri:
202*4882a593Smuzhiyun            xref = None
203*4882a593Smuzhiyun    #
204*4882a593Smuzhiyun    # Return the xref if we got it; otherwise just return the plain text.
205*4882a593Smuzhiyun    #
206*4882a593Smuzhiyun    if xref:
207*4882a593Smuzhiyun        return xref
208*4882a593Smuzhiyun    else:
209*4882a593Smuzhiyun        return target_text
210*4882a593Smuzhiyun
211*4882a593Smuzhiyun#
212*4882a593Smuzhiyun# Try to replace a documentation reference of the form Documentation/... with a
213*4882a593Smuzhiyun# cross reference to that page
214*4882a593Smuzhiyun#
215*4882a593Smuzhiyundef markup_doc_ref(docname, app, match):
216*4882a593Smuzhiyun    stddom = app.env.domains['std']
217*4882a593Smuzhiyun    #
218*4882a593Smuzhiyun    # Go through the dance of getting an xref out of the std domain
219*4882a593Smuzhiyun    #
220*4882a593Smuzhiyun    target = match.group(1)
221*4882a593Smuzhiyun    xref = None
222*4882a593Smuzhiyun    pxref = addnodes.pending_xref('', refdomain = 'std', reftype = 'doc',
223*4882a593Smuzhiyun                                  reftarget = target, modname = None,
224*4882a593Smuzhiyun                                  classname = None, refexplicit = False)
225*4882a593Smuzhiyun    #
226*4882a593Smuzhiyun    # XXX The Latex builder will throw NoUri exceptions here,
227*4882a593Smuzhiyun    # work around that by ignoring them.
228*4882a593Smuzhiyun    #
229*4882a593Smuzhiyun    try:
230*4882a593Smuzhiyun        xref = stddom.resolve_xref(app.env, docname, app.builder, 'doc',
231*4882a593Smuzhiyun                                   target, pxref, None)
232*4882a593Smuzhiyun    except NoUri:
233*4882a593Smuzhiyun        xref = None
234*4882a593Smuzhiyun    #
235*4882a593Smuzhiyun    # Return the xref if we got it; otherwise just return the plain text.
236*4882a593Smuzhiyun    #
237*4882a593Smuzhiyun    if xref:
238*4882a593Smuzhiyun        return xref
239*4882a593Smuzhiyun    else:
240*4882a593Smuzhiyun        return nodes.Text(match.group(0))
241*4882a593Smuzhiyun
242*4882a593Smuzhiyundef auto_markup(app, doctree, name):
243*4882a593Smuzhiyun    #
244*4882a593Smuzhiyun    # This loop could eventually be improved on.  Someday maybe we
245*4882a593Smuzhiyun    # want a proper tree traversal with a lot of awareness of which
246*4882a593Smuzhiyun    # kinds of nodes to prune.  But this works well for now.
247*4882a593Smuzhiyun    #
248*4882a593Smuzhiyun    # The nodes.literal test catches ``literal text``, its purpose is to
249*4882a593Smuzhiyun    # avoid adding cross-references to functions that have been explicitly
250*4882a593Smuzhiyun    # marked with cc:func:.
251*4882a593Smuzhiyun    #
252*4882a593Smuzhiyun    for para in doctree.traverse(nodes.paragraph):
253*4882a593Smuzhiyun        for node in para.traverse(nodes.Text):
254*4882a593Smuzhiyun            if not isinstance(node.parent, nodes.literal):
255*4882a593Smuzhiyun                node.parent.replace(node, markup_refs(name, app, node))
256*4882a593Smuzhiyun
257*4882a593Smuzhiyundef setup(app):
258*4882a593Smuzhiyun    app.connect('doctree-resolved', auto_markup)
259*4882a593Smuzhiyun    return {
260*4882a593Smuzhiyun        'parallel_read_safe': True,
261*4882a593Smuzhiyun        'parallel_write_safe': True,
262*4882a593Smuzhiyun        }
263