Coverage for /Users/davegaeddert/Development/dropseed/plain/plain/plain/internal/handlers/wsgi.py: 60%

122 statements  

« prev     ^ index     » next       coverage.py v7.6.1, created at 2024-10-16 22:04 -0500

1import uuid 

2from io import IOBase 

3 

4from plain import signals 

5from plain.http import HttpRequest, QueryDict, parse_cookie 

6from plain.internal.handlers import base 

7from plain.utils.encoding import repercent_broken_unicode 

8from plain.utils.functional import cached_property 

9from plain.utils.regex_helper import _lazy_re_compile 

10 

11_slashes_re = _lazy_re_compile(rb"/+") 

12 

13 

14class LimitedStream(IOBase): 

15 """ 

16 Wrap another stream to disallow reading it past a number of bytes. 

17 

18 Based on the implementation from werkzeug.wsgi.LimitedStream 

19 See https://github.com/pallets/werkzeug/blob/dbf78f67/src/werkzeug/wsgi.py#L828 

20 """ 

21 

22 def __init__(self, stream, limit): 

23 self._read = stream.read 

24 self._readline = stream.readline 

25 self._pos = 0 

26 self.limit = limit 

27 

28 def read(self, size=-1, /): 

29 _pos = self._pos 

30 limit = self.limit 

31 if _pos >= limit: 

32 return b"" 

33 if size == -1 or size is None: 

34 size = limit - _pos 

35 else: 

36 size = min(size, limit - _pos) 

37 data = self._read(size) 

38 self._pos += len(data) 

39 return data 

40 

41 def readline(self, size=-1, /): 

42 _pos = self._pos 

43 limit = self.limit 

44 if _pos >= limit: 

45 return b"" 

46 if size == -1 or size is None: 

47 size = limit - _pos 

48 else: 

49 size = min(size, limit - _pos) 

50 line = self._readline(size) 

51 self._pos += len(line) 

52 return line 

53 

54 

55class WSGIRequest(HttpRequest): 

56 non_picklable_attrs = HttpRequest.non_picklable_attrs | frozenset(["environ"]) 

57 meta_non_picklable_attrs = frozenset(["wsgi.errors", "wsgi.input"]) 

58 

59 def __init__(self, environ): 

60 # A unique ID we can use to trace this request 

61 self.unique_id = str(uuid.uuid4()) 

62 

63 script_name = get_script_name(environ) 

64 # If PATH_INFO is empty (e.g. accessing the SCRIPT_NAME URL without a 

65 # trailing slash), operate as if '/' was requested. 

66 path_info = get_path_info(environ) or "/" 

67 self.environ = environ 

68 self.path_info = path_info 

69 # be careful to only replace the first slash in the path because of 

70 # http://test/something and http://test//something being different as 

71 # stated in RFC 3986. 

72 self.path = "{}/{}".format( 

73 script_name.rstrip("/"), path_info.replace("/", "", 1) 

74 ) 

75 self.META = environ 

76 self.META["PATH_INFO"] = path_info 

77 self.META["SCRIPT_NAME"] = script_name 

78 self.method = environ["REQUEST_METHOD"].upper() 

79 # Set content_type, content_params, and encoding. 

80 self._set_content_type_params(environ) 

81 try: 

82 content_length = int(environ.get("CONTENT_LENGTH")) 

83 except (ValueError, TypeError): 

84 content_length = 0 

85 self._stream = LimitedStream(self.environ["wsgi.input"], content_length) 

86 self._read_started = False 

87 self.resolver_match = None 

88 

89 def __getstate__(self): 

90 state = super().__getstate__() 

91 for attr in self.meta_non_picklable_attrs: 

92 if attr in state["META"]: 

93 del state["META"][attr] 

94 return state 

95 

96 def _get_scheme(self): 

97 return self.environ.get("wsgi.url_scheme") 

98 

99 @cached_property 

100 def GET(self): 

101 # The WSGI spec says 'QUERY_STRING' may be absent. 

102 raw_query_string = get_bytes_from_wsgi(self.environ, "QUERY_STRING", "") 

103 return QueryDict(raw_query_string, encoding=self._encoding) 

104 

105 def _get_post(self): 

106 if not hasattr(self, "_post"): 

107 self._load_post_and_files() 

108 return self._post 

109 

110 def _set_post(self, post): 

111 self._post = post 

112 

113 @cached_property 

114 def COOKIES(self): 

115 raw_cookie = get_str_from_wsgi(self.environ, "HTTP_COOKIE", "") 

116 return parse_cookie(raw_cookie) 

117 

118 @property 

119 def FILES(self): 

120 if not hasattr(self, "_files"): 

121 self._load_post_and_files() 

122 return self._files 

123 

124 POST = property(_get_post, _set_post) 

125 

126 

127class WSGIHandler(base.BaseHandler): 

128 request_class = WSGIRequest 

129 

130 def __init__(self, *args, **kwargs): 

131 super().__init__(*args, **kwargs) 

132 self.load_middleware() 

133 

134 def __call__(self, environ, start_response): 

135 signals.request_started.send(sender=self.__class__, environ=environ) 

136 request = self.request_class(environ) 

137 response = self.get_response(request) 

138 

139 response._handler_class = self.__class__ 

140 

141 status = "%d %s" % (response.status_code, response.reason_phrase) 

142 response_headers = [ 

143 *response.items(), 

144 *(("Set-Cookie", c.output(header="")) for c in response.cookies.values()), 

145 ] 

146 start_response(status, response_headers) 

147 if getattr(response, "file_to_stream", None) is not None and environ.get( 

148 "wsgi.file_wrapper" 

149 ): 

150 # If `wsgi.file_wrapper` is used the WSGI server does not call 

151 # .close on the response, but on the file wrapper. Patch it to use 

152 # response.close instead which takes care of closing all files. 

153 response.file_to_stream.close = response.close 

154 response = environ["wsgi.file_wrapper"]( 

155 response.file_to_stream, response.block_size 

156 ) 

157 return response 

158 

159 

160def get_path_info(environ): 

161 """Return the HTTP request's PATH_INFO as a string.""" 

162 path_info = get_bytes_from_wsgi(environ, "PATH_INFO", "/") 

163 

164 return repercent_broken_unicode(path_info).decode() 

165 

166 

167def get_script_name(environ): 

168 """ 

169 Return the equivalent of the HTTP request's SCRIPT_NAME environment 

170 variable. If Apache mod_rewrite is used, return what would have been 

171 the script name prior to any rewriting (so it's the script name as seen 

172 from the client's perspective). 

173 """ 

174 # If Apache's mod_rewrite had a whack at the URL, Apache set either 

175 # SCRIPT_URL or REDIRECT_URL to the full resource URL before applying any 

176 # rewrites. Unfortunately not every web server (lighttpd!) passes this 

177 # information through all the time, so FORCE_SCRIPT_NAME, above, is still 

178 # needed. 

179 script_url = get_bytes_from_wsgi(environ, "SCRIPT_URL", "") or get_bytes_from_wsgi( 

180 environ, "REDIRECT_URL", "" 

181 ) 

182 

183 if script_url: 

184 if b"//" in script_url: 

185 # mod_wsgi squashes multiple successive slashes in PATH_INFO, 

186 # do the same with script_url before manipulating paths (#17133). 

187 script_url = _slashes_re.sub(b"/", script_url) 

188 path_info = get_bytes_from_wsgi(environ, "PATH_INFO", "") 

189 script_name = script_url.removesuffix(path_info) 

190 else: 

191 script_name = get_bytes_from_wsgi(environ, "SCRIPT_NAME", "") 

192 

193 return script_name.decode() 

194 

195 

196def get_bytes_from_wsgi(environ, key, default): 

197 """ 

198 Get a value from the WSGI environ dictionary as bytes. 

199 

200 key and default should be strings. 

201 """ 

202 value = environ.get(key, default) 

203 # Non-ASCII values in the WSGI environ are arbitrarily decoded with 

204 # ISO-8859-1. This is wrong for Plain websites where UTF-8 is the default. 

205 # Re-encode to recover the original bytestring. 

206 return value.encode("iso-8859-1") 

207 

208 

209def get_str_from_wsgi(environ, key, default): 

210 """ 

211 Get a value from the WSGI environ dictionary as str. 

212 

213 key and default should be str objects. 

214 """ 

215 value = get_bytes_from_wsgi(environ, key, default) 

216 return value.decode(errors="replace")