Skip to content

Commit 751249a

Browse files
authored
Merge branch 'master' into dill21yu-patch-1
2 parents d946469 + 981baab commit 751249a

File tree

12 files changed

+498
-92
lines changed

12 files changed

+498
-92
lines changed

apps/filebrowser/src/filebrowser/conf.py

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@
1818
from django.utils.translation import gettext_lazy as _
1919

2020
from desktop.conf import ENABLE_DOWNLOAD, is_oozie_enabled
21-
from desktop.lib.conf import Config, coerce_bool, coerce_csv
21+
from desktop.lib.conf import coerce_bool, coerce_csv, Config
2222

2323
MAX_SNAPPY_DECOMPRESSION_SIZE = Config(
2424
key="max_snappy_decompression_size", help=_("Max snappy decompression size in bytes."), private=True, default=1024 * 1024 * 25, type=int
@@ -104,10 +104,18 @@ def max_file_size_upload_limit():
104104
)
105105

106106
RESTRICT_FILE_EXTENSIONS = Config(
107-
key='restrict_file_extensions',
107+
key="restrict_file_extensions",
108+
default=None,
109+
type=coerce_csv,
110+
help=_("Specify file extensions that are not allowed, separated by commas. For example: .exe, .zip, .rar, .tar, .gz"),
111+
)
112+
113+
ALLOW_FILE_EXTENSIONS = Config(
114+
key="allow_file_extensions",
108115
default=None,
109116
type=coerce_csv,
110117
help=_(
111-
'Specify file extensions that are not allowed, separated by commas. For example: .exe, .zip, .rar, .tar, .gz'
118+
"Specify file extensions that are allowed, separated by commas. "
119+
"When set, only these extensions will be permitted. For example: .tsv, .csv, .xlsx"
112120
),
113121
)

apps/filebrowser/src/filebrowser/utils.py

Lines changed: 40 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -14,16 +14,16 @@
1414
# See the License for the specific language governing permissions and
1515
# limitations under the License.
1616
import io
17-
import os
1817
import logging
18+
import os
1919
from datetime import datetime
2020
from urllib.parse import urlparse
2121

2222
import redis
2323

2424
from desktop.conf import TASK_SERVER_V2
2525
from desktop.lib.django_util import JsonResponse
26-
from filebrowser.conf import ARCHIVE_UPLOAD_TEMPDIR
26+
from filebrowser.conf import ALLOW_FILE_EXTENSIONS, ARCHIVE_UPLOAD_TEMPDIR, RESTRICT_FILE_EXTENSIONS
2727

2828
LOG = logging.getLogger()
2929

@@ -130,3 +130,41 @@ def release_reserved_space_for_file_uploads(uuid):
130130
LOG.exception("Failed to release reserved space: %s", str(e))
131131
finally:
132132
redis_client.close()
133+
134+
135+
def is_file_upload_allowed(file_name):
136+
"""
137+
Check if a file upload is allowed based on file extension restrictions.
138+
139+
Args:
140+
file_name: The name of the file being uploaded
141+
142+
Returns:
143+
tuple: (is_allowed, error_message)
144+
- is_allowed: Boolean indicating if the file upload is allowed
145+
- error_message: String with error message if not allowed, None otherwise
146+
"""
147+
if not file_name:
148+
return True, None
149+
150+
_, file_type = os.path.splitext(file_name)
151+
if file_type:
152+
file_type = file_type.lower()
153+
154+
# Check allow list first - if set, only these extensions are allowed
155+
allow_list = ALLOW_FILE_EXTENSIONS.get()
156+
if allow_list:
157+
# Normalize extensions to lowercase with dots
158+
normalized_allow_list = [ext.lower() if ext.startswith(".") else f".{ext.lower()}" for ext in allow_list]
159+
if file_type not in normalized_allow_list:
160+
return False, f'File type "{file_type}" is not permitted. Modify file extension settings to allow this type.'
161+
162+
# Check restrict list - if set, these extensions are not allowed
163+
restrict_list = RESTRICT_FILE_EXTENSIONS.get()
164+
if restrict_list:
165+
# Normalize extensions to lowercase with dots
166+
normalized_restrict_list = [ext.lower() if ext.startswith(".") else f".{ext.lower()}" for ext in restrict_list]
167+
if file_type in normalized_restrict_list:
168+
return False, f'File type "{file_type}" is restricted. Update file extension restrictions to allow this type.'
169+
170+
return True, None
Lines changed: 291 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,291 @@
1+
#!/usr/bin/env python
2+
# Licensed to Cloudera, Inc. under one
3+
# or more contributor license agreements. See the NOTICE file
4+
# distributed with this work for additional information
5+
# regarding copyright ownership. Cloudera, Inc. licenses this file
6+
# to you under the Apache License, Version 2.0 (the
7+
# "License"); you may not use this file except in compliance
8+
# with the License. You may obtain a copy of the License at
9+
#
10+
# http://www.apache.org/licenses/LICENSE-2.0
11+
#
12+
# Unless required by applicable law or agreed to in writing, software
13+
# distributed under the License is distributed on an "AS IS" BASIS,
14+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15+
# See the License for the specific language governing permissions and
16+
# limitations under the License.
17+
18+
from filebrowser.conf import ALLOW_FILE_EXTENSIONS, RESTRICT_FILE_EXTENSIONS
19+
from filebrowser.utils import is_file_upload_allowed
20+
21+
22+
class TestIsFileUploadAllowed:
23+
def test_no_file_name(self):
24+
# Test with None
25+
is_allowed, error_msg = is_file_upload_allowed(None)
26+
assert is_allowed is True
27+
assert error_msg is None
28+
29+
# Test with empty string
30+
is_allowed, error_msg = is_file_upload_allowed("")
31+
assert is_allowed is True
32+
assert error_msg is None
33+
34+
def test_no_restrictions_configured(self):
35+
reset_allow = ALLOW_FILE_EXTENSIONS.set_for_testing(None)
36+
reset_restrict = RESTRICT_FILE_EXTENSIONS.set_for_testing(None)
37+
38+
try:
39+
# All file types should be allowed
40+
test_files = ["document.pdf", "script.exe", "archive.zip", "data.csv", "image.png", "video.mp4"]
41+
42+
for file_name in test_files:
43+
is_allowed, error_msg = is_file_upload_allowed(file_name)
44+
assert is_allowed is True, f"File '{file_name}' should be allowed when no restrictions are configured"
45+
assert error_msg is None
46+
finally:
47+
reset_allow()
48+
reset_restrict()
49+
50+
def test_allow_list_with_dots(self):
51+
reset_allow = ALLOW_FILE_EXTENSIONS.set_for_testing([".csv", ".txt", ".json"])
52+
reset_restrict = RESTRICT_FILE_EXTENSIONS.set_for_testing(None)
53+
54+
try:
55+
# Allowed files
56+
allowed_files = ["data.csv", "notes.txt", "config.json", "DATA.CSV", "NOTES.TXT"]
57+
for file_name in allowed_files:
58+
is_allowed, error_msg = is_file_upload_allowed(file_name)
59+
assert is_allowed is True, f"File '{file_name}' should be allowed"
60+
assert error_msg is None
61+
62+
# Not allowed files
63+
not_allowed_files = ["script.exe", "archive.zip", "image.png"]
64+
for file_name in not_allowed_files:
65+
is_allowed, error_msg = is_file_upload_allowed(file_name)
66+
assert is_allowed is False, f"File '{file_name}' should not be allowed"
67+
assert error_msg is not None
68+
assert "is not permitted" in error_msg
69+
assert "Modify file extension settings" in error_msg
70+
finally:
71+
reset_allow()
72+
reset_restrict()
73+
74+
def test_allow_list_without_dots(self):
75+
reset_allow = ALLOW_FILE_EXTENSIONS.set_for_testing(["csv", "txt", "json"])
76+
reset_restrict = RESTRICT_FILE_EXTENSIONS.set_for_testing(None)
77+
78+
try:
79+
# Should still work - extensions are normalized
80+
allowed_files = ["data.csv", "notes.txt", "config.json"]
81+
for file_name in allowed_files:
82+
is_allowed, error_msg = is_file_upload_allowed(file_name)
83+
assert is_allowed is True, f"File '{file_name}' should be allowed"
84+
assert error_msg is None
85+
finally:
86+
reset_allow()
87+
reset_restrict()
88+
89+
def test_restrict_list_with_dots(self):
90+
reset_allow = ALLOW_FILE_EXTENSIONS.set_for_testing(None)
91+
reset_restrict = RESTRICT_FILE_EXTENSIONS.set_for_testing([".exe", ".zip", ".rar"])
92+
93+
try:
94+
# Restricted files
95+
restricted_files = ["malware.exe", "archive.zip", "compressed.rar", "MALWARE.EXE"]
96+
for file_name in restricted_files:
97+
is_allowed, error_msg = is_file_upload_allowed(file_name)
98+
assert is_allowed is False, f"File '{file_name}' should be restricted"
99+
assert error_msg is not None
100+
assert "is restricted" in error_msg
101+
assert "Update file extension restrictions" in error_msg
102+
103+
# Allowed files
104+
allowed_files = ["document.pdf", "data.csv", "image.png"]
105+
for file_name in allowed_files:
106+
is_allowed, error_msg = is_file_upload_allowed(file_name)
107+
assert is_allowed is True, f"File '{file_name}' should be allowed"
108+
assert error_msg is None
109+
finally:
110+
reset_allow()
111+
reset_restrict()
112+
113+
def test_restrict_list_without_dots(self):
114+
reset_allow = ALLOW_FILE_EXTENSIONS.set_for_testing(None)
115+
reset_restrict = RESTRICT_FILE_EXTENSIONS.set_for_testing(["exe", "zip", "rar"])
116+
117+
try:
118+
# Should still work - extensions are normalized
119+
restricted_files = ["malware.exe", "archive.zip", "compressed.rar"]
120+
for file_name in restricted_files:
121+
is_allowed, error_msg = is_file_upload_allowed(file_name)
122+
assert is_allowed is False, f"File '{file_name}' should be restricted"
123+
assert error_msg is not None
124+
assert "is restricted" in error_msg
125+
finally:
126+
reset_allow()
127+
reset_restrict()
128+
129+
def test_both_allow_and_restrict_lists(self):
130+
# Allow list takes precedence - if file type is not in allow list, it's rejected
131+
reset_allow = ALLOW_FILE_EXTENSIONS.set_for_testing([".csv", ".txt", ".exe"])
132+
reset_restrict = RESTRICT_FILE_EXTENSIONS.set_for_testing([".exe", ".zip"])
133+
134+
try:
135+
# File in allow list but also in restrict list - should check restrict list
136+
is_allowed, error_msg = is_file_upload_allowed("script.exe")
137+
assert is_allowed is False
138+
assert error_msg is not None
139+
assert "is restricted" in error_msg
140+
141+
# File in allow list and not in restrict list - should be allowed
142+
is_allowed, error_msg = is_file_upload_allowed("data.csv")
143+
assert is_allowed is True
144+
assert error_msg is None
145+
146+
# File not in allow list - should be rejected regardless of restrict list
147+
is_allowed, error_msg = is_file_upload_allowed("image.png")
148+
assert is_allowed is False
149+
assert error_msg is not None
150+
assert "is not permitted" in error_msg
151+
finally:
152+
reset_allow()
153+
reset_restrict()
154+
155+
def test_case_insensitive_extensions(self):
156+
reset_allow = ALLOW_FILE_EXTENSIONS.set_for_testing([".CSV", ".TXT"])
157+
reset_restrict = RESTRICT_FILE_EXTENSIONS.set_for_testing([".EXE", ".ZIP"])
158+
159+
try:
160+
# Test allow list with different cases
161+
test_cases = [
162+
("data.csv", True),
163+
("data.CSV", True),
164+
("data.CsV", True),
165+
("notes.txt", True),
166+
("notes.TXT", True),
167+
("notes.TxT", True),
168+
]
169+
170+
for file_name, expected in test_cases:
171+
is_allowed, error_msg = is_file_upload_allowed(file_name)
172+
assert is_allowed is expected, f"File '{file_name}' case handling failed"
173+
174+
# Reset for restrict list test
175+
reset_allow()
176+
reset_restrict()
177+
178+
reset_allow = ALLOW_FILE_EXTENSIONS.set_for_testing(None)
179+
reset_restrict = RESTRICT_FILE_EXTENSIONS.set_for_testing([".EXE", ".ZIP"])
180+
181+
# Test restrict list with different cases
182+
restricted_cases = [
183+
("malware.exe", False),
184+
("malware.EXE", False),
185+
("malware.ExE", False),
186+
("archive.zip", False),
187+
("archive.ZIP", False),
188+
("archive.ZiP", False),
189+
]
190+
191+
for file_name, expected in restricted_cases:
192+
is_allowed, error_msg = is_file_upload_allowed(file_name)
193+
assert is_allowed is expected, f"File '{file_name}' case handling failed"
194+
if not expected:
195+
assert error_msg is not None
196+
assert "is restricted" in error_msg
197+
finally:
198+
reset_allow()
199+
reset_restrict()
200+
201+
def test_files_without_extensions(self):
202+
reset_allow = ALLOW_FILE_EXTENSIONS.set_for_testing([".txt", ".csv"])
203+
reset_restrict = RESTRICT_FILE_EXTENSIONS.set_for_testing([".exe"])
204+
205+
try:
206+
# File without extension with allow list - should not be in allow list
207+
is_allowed, error_msg = is_file_upload_allowed("README")
208+
assert is_allowed is False
209+
assert error_msg is not None
210+
assert "is not permitted" in error_msg
211+
212+
# Reset for restrict list only test
213+
reset_allow()
214+
reset_restrict()
215+
216+
reset_allow = ALLOW_FILE_EXTENSIONS.set_for_testing(None)
217+
reset_restrict = RESTRICT_FILE_EXTENSIONS.set_for_testing([".exe"])
218+
219+
# File without extension with only restrict list - should be allowed
220+
is_allowed, error_msg = is_file_upload_allowed("README")
221+
assert is_allowed is True
222+
assert error_msg is None
223+
finally:
224+
reset_allow()
225+
reset_restrict()
226+
227+
def test_files_with_multiple_dots(self):
228+
reset_allow = ALLOW_FILE_EXTENSIONS.set_for_testing([".gz", ".txt"])
229+
reset_restrict = RESTRICT_FILE_EXTENSIONS.set_for_testing([".exe"])
230+
231+
try:
232+
# Should use the last extension
233+
is_allowed, error_msg = is_file_upload_allowed("archive.tar.gz")
234+
assert is_allowed is True
235+
assert error_msg is None
236+
237+
is_allowed, error_msg = is_file_upload_allowed("document.backup.txt")
238+
assert is_allowed is True
239+
assert error_msg is None
240+
241+
is_allowed, error_msg = is_file_upload_allowed("file.backup.exe")
242+
assert is_allowed is False
243+
assert error_msg is not None
244+
assert "is not permitted" in error_msg
245+
finally:
246+
reset_allow()
247+
reset_restrict()
248+
249+
def test_hidden_files(self):
250+
reset_allow = ALLOW_FILE_EXTENSIONS.set_for_testing([".txt", ".conf"])
251+
reset_restrict = RESTRICT_FILE_EXTENSIONS.set_for_testing([".exe"])
252+
253+
try:
254+
# Hidden file with extension
255+
is_allowed, error_msg = is_file_upload_allowed(".bashrc.txt")
256+
assert is_allowed is True
257+
assert error_msg is None
258+
259+
# Hidden file without what looks like an extension
260+
is_allowed, error_msg = is_file_upload_allowed(".bashrc")
261+
assert is_allowed is False
262+
assert error_msg is not None
263+
assert "is not permitted" in error_msg
264+
finally:
265+
reset_allow()
266+
reset_restrict()
267+
268+
def test_edge_cases(self):
269+
reset_allow = ALLOW_FILE_EXTENSIONS.set_for_testing([".txt"])
270+
reset_restrict = RESTRICT_FILE_EXTENSIONS.set_for_testing([".exe"])
271+
272+
try:
273+
# File ending with dot
274+
is_allowed, error_msg = is_file_upload_allowed("file.")
275+
assert is_allowed is False
276+
assert error_msg is not None
277+
assert "is not permitted" in error_msg
278+
279+
# Just a dot
280+
is_allowed, error_msg = is_file_upload_allowed(".")
281+
assert is_allowed is False
282+
assert error_msg is not None
283+
assert "is not permitted" in error_msg
284+
285+
# Multiple consecutive dots
286+
is_allowed, error_msg = is_file_upload_allowed("file..txt")
287+
assert is_allowed is True
288+
assert error_msg is None
289+
finally:
290+
reset_allow()
291+
reset_restrict()

0 commit comments

Comments
 (0)