From ce6e4cdcad70e46c51cbecacbe13e358c0af28bf Mon Sep 17 00:00:00 2001 From: syimyuzya Date: Sun, 11 Jul 2021 21:17:04 +0800 Subject: [PATCH] BUG: Handle URLs with non-ASCII chars w/ --http Python 3 allows Unicode identifiers, so non-ASCII characters should be handled with care. This commit fixes a bug that only happens when serving with `--http` (works fine when generating static HTML files). Before this fix, pydoc would raise errors if the URL contains non-ASCII module names. --- pdoc/cli.py | 3 +- pdoc/test/__init__.py | 13 +- .../__init__.py" | 378 ++++++++++++++++++ 3 files changed, 392 insertions(+), 2 deletions(-) create mode 100644 "pdoc/test/example_pkg/non_\303\244\305\241\303\247ii/__init__.py" diff --git a/pdoc/cli.py b/pdoc/cli.py index 97a47384..21605ac0 100755 --- a/pdoc/cli.py +++ b/pdoc/cli.py @@ -11,6 +11,7 @@ import re import sys import warnings +import urllib from contextlib import contextmanager from functools import lru_cache from http.server import BaseHTTPRequestHandler, HTTPServer @@ -298,7 +299,7 @@ def import_path_from_req_url(self): if pth.endswith(suffix): pth = pth[:-len(suffix)] break - return pth.replace('/', '.') + return urllib.parse.unquote(pth).replace('/', '.') def module_path(m: pdoc.Module, ext: str): diff --git a/pdoc/test/__init__.py b/pdoc/test/__init__.py index 5ea6141f..1375d71c 100644 --- a/pdoc/test/__init__.py +++ b/pdoc/test/__init__.py @@ -113,6 +113,8 @@ class CliTest(unittest.TestCase): os.path.join('example_pkg', '_private'), os.path.join('example_pkg', '_private', 'index.html'), os.path.join('example_pkg', '_private', 'module.html'), + os.path.join('example_pkg', 'non_äšçii'), + os.path.join('example_pkg', 'non_äšçii', 'index.html'), os.path.join('example_pkg', 'subpkg'), os.path.join('example_pkg', 'subpkg', '_private.html'), os.path.join('example_pkg', 'subpkg', 'index.html'), @@ -470,7 +472,7 @@ def setUp(self): def test_module(self): modules = { - EXAMPLE_MODULE: ('', ('index', 'module', 'subpkg', 'subpkg2')), + EXAMPLE_MODULE: ('', ('index', 'module', 'non_äšçii', 'subpkg', 'subpkg2')), EXAMPLE_MODULE + '.subpkg2': ('.subpkg2', ('subpkg2.module',)), } with chdir(TESTS_BASEDIR): @@ -1683,6 +1685,15 @@ def test_http(self): html = resp.read() self.assertIn(b'DictReader', html) + def test_non_ascii_url(self): + from urllib.parse import quote + with self._http([os.path.join(TESTS_BASEDIR, EXAMPLE_MODULE)]) as url: + quoted = f'{url}{EXAMPLE_MODULE}/' + quote('non_äšçii') + with urlopen(quoted, timeout=3) as resp: + self.assertEqual(resp.status, 200) + html = resp.read() + self.assertIn('ünicøđe_ftw'.encode('utf-8'), html) + def test_file(self): with chdir(os.path.join(TESTS_BASEDIR, EXAMPLE_MODULE)): with self._http(['_relative_import']) as url: diff --git "a/pdoc/test/example_pkg/non_\303\244\305\241\303\247ii/__init__.py" "b/pdoc/test/example_pkg/non_\303\244\305\241\303\247ii/__init__.py" new file mode 100644 index 00000000..c6e1f35a --- /dev/null +++ "b/pdoc/test/example_pkg/non_\303\244\305\241\303\247ii/__init__.py" @@ -0,0 +1,378 @@ +""" +Module with non-ASCII identifiers +""" +# NOTE Content copied from ../__init__.py +# with additional definitions +from collections import namedtuple +import subprocess +import os + + +# === additional definitions below === +def ünicøđe_ftw(): + """ + A function with non-ASCII name + """ +# === additional definitions above === + + +CONST = 'const' +"""CONST docstring""" + +var = 2 +"""var docstring""" + +# https://github.com/mitmproxy/pdoc/pull/44 +foreign_var = subprocess.CalledProcessError(0, '') +"""foreign var docstring""" + +__pdoc__ = {} + + +def foo(env=os.environ): + """Doesn't leak environ""" + + +def object_as_arg_default(*args, a=object(), **kwargs): + """Html-encodes angle brackets in params""" + + +def _private_function(): + """Private function, should only appear if whitelisted""" + + +class A: + """`A` is base class for `example_pkg.B`.""" # Test refname link + def overridden(self): + """A.overridden docstring""" + + def overridden_same_docstring(self): + """A.overridden_same_docstring docstring""" + + def inherited(self): # Inherited in B + """A.inherited docstring""" + + def __call__(self): + """A.__call__ docstring. Only shown when whitelisted""" + + +non_callable_routine = staticmethod(lambda x: 2) # Not interpreted as Function; skipped + + +class ReadOnlyValueDescriptor: + """Read-only value descriptor""" + + def __get__(self, instance, instance_type=None): + if instance is not None: + return instance.var ** 2 + return self + + +class B(A, int): + """ + B docstring + + External refs: `sys.version`, `sys` + """ + + CONST = 2 + """B.CONST docstring""" + + var = 3 + """B.var docstring""" + + ro_value_descriptor = ReadOnlyValueDescriptor() + """ro_value_descriptor docstring""" + + ro_value_descriptor_no_doc = ReadOnlyValueDescriptor() # no doc-string + + def __init__(self, x, y, z, w): + """`__init__` docstring""" + self.instance_var = None + """instance var docstring""" + + self._private_instance_var = None + """This should be private (hidden) despite PEP 224 docstring""" + + def f(self, a: int, b: int = 1, *args, c: str = 'c', **kwargs): + """B.f docstring""" + + @staticmethod + def static(x): + """B.static docstring""" + + @classmethod + def cls(cls): + """B.cls docstring""" + + def _private(self): + """B._private docstring""" + + @staticmethod + def _private_static(): + """B._private_static docstring""" + + @classmethod + def _private_cls(cls): + """B._private_cls docstring""" + + @property + def p(self): + """B.p docstring""" + return 1 + + class C: + """B.C docstring""" + def f(self): + """B.C.f docstring""" + + class _Private: + """B._Private docstring""" + def f(self): + """B._Private.f docstring""" + + def overridden(self): + pass + + assert overridden.__doc__ is None + __pdoc__['B.overridden'] = 'B.overridden docstring' + + def overridden_same_docstring(self): + pass + + +class C(B): pass # noqa: E701, E302 +class D(C): pass # noqa: E701, E302 + + +class Hidden: + __pdoc__['Hidden'] = False + + +class Docformats: + def numpy(self): + """ + Summary line. + + **Documentation**: https://pdoc3.github.io/pdoc/doc/pdoc/ + **Source Code**: https://github.com/pdoc3/ + + Parameters + ---------- + x1, x2 : array_like + Input arrays, + description of `x1`, `x2`. + + .. versionadded:: 1.5.0 + x : { NoneType, 'B', 'C' }, optional + n : int or list of int + Description of num + *args, **kwargs + Passed on. + complex : Union[Set[pdoc.Doc, Function], pdoc] + The `List[Doc]`s of the new signal. + + Returns + ------- + output : pdoc.Doc + The output array + List[pdoc.Doc] + The output array + foo + + Raises + ------ + TypeError + When something. + + Raises + ------ + TypeError + + Returns + ------- + None. + + Invalid + ------- + no match + + See Also + -------- + fromstring, loadtxt + + See Also + -------- + pdoc.text : Function a with its description. + scipy.random.norm : Random variates, PDFs, etc. + pdoc.Doc : A class description that + spans several lines. + + Examples + -------- + >>> doctest + ... + + Notes + ----- + Foo bar. + + ### H3 Title + + Foo bar. + """ + + def google(self): + """ + Summary line. + Nomatch: + + Args: + arg1 (str, optional): Text1 + arg2 (List[str], optional, default=10): Text2 + data (array-like object): foo + + Args: + arg1 (int): Description of arg1 + arg2 (str or int): Description of arg2 + test_sequence: 2-dim numpy array of real numbers, size: N * D + - the test observation sequence. + + test_sequence = + code + + Continue. + *args: passed around + + Returns: + issue_10: description didn't work across multiple lines + if only a single item was listed. `inspect.cleandoc()` + somehow stripped the required extra indentation. + + Returns: + A very special number + which is the answer of everything. + + Returns: + Dict[int, pdoc.Doc]: Description. + + Raises: + AttributeError: The ``Raises`` section is a list of all exceptions + that are relevant to the interface. + + and a third line. + ValueError: If `arg2` is equal to `arg1`. + + Test a title without a blank line before it. + Args: + A: a + + Examples: + Examples in doctest format. + + >>> a = [1,2,3] + + Todos: + * For module TODOs + """ + + def doctests(self): + """ + Need an intro paragrapgh. + + >>> Then code is indented one level + line1 + line2 + + Alternatively + ``` + >>> doctest + fenced code works + always + ``` + + Examples: + >>> nbytes(100) + '100.0 bytes' + line2 + + some text + + some text + + >>> another doctest + line1 + line2 + + Example: + >>> f() + Traceback (most recent call last): + ... + Exception: something went wrong + """ + + def reST_directives(self): + """ + .. todo:: + + Create something. + + .. admonition:: Example + + Image shows something. + + .. image:: https://www.debian.org/logos/openlogo-nd-100.png + + .. note:: + Can only nest admonitions two levels. + + .. image:: https://www.debian.org/logos/openlogo-nd-100.png + + Now you know. + + .. warning:: + + Some warning + lines. + + * Describe some func in a list + across multiple lines: + + .. deprecated:: 3.1 + Use `spam` instead. + + .. versionadded:: 2.5 + The *spam* parameter. + + .. caution:: + Don't touch this! + """ + + +numpy = Docformats.numpy + + +google = Docformats.google + + +doctests = Docformats.doctests + + +reST_directives = Docformats.reST_directives + + +def latex_math(): + """ + Inline equation: \\( v_t *\\frac{1}{2}* j_i + [a] < 3 \\). + + Block equation: \\[ v_t *\\frac{1}{2}* j_i + [a] < 3 \\] + + Block equation: $$ v_t *\\frac{1}{2}* j_i + [a] < 3 $$ + + ..math:: + v_t *\\frac{1}{2}* j_i + [a] < 3 + """ + + +class Location(namedtuple('Location', 'lat lon')): + """Geo-location, GPS position."""