From 5a29517f3f63464261effadb578c465509310f0a Mon Sep 17 00:00:00 2001 From: cockroacher <163405488+cockroacher@users.noreply.github.com> Date: Tue, 24 Sep 2024 18:10:41 +0200 Subject: [PATCH 01/20] incomplete work on sri --- helpers/sitespeed_helper.py | 2 + helpers/sri_helper.py | 262 ++++++++++++++++++++++++++++++++++++ 2 files changed, 264 insertions(+) create mode 100644 helpers/sri_helper.py diff --git a/helpers/sitespeed_helper.py b/helpers/sitespeed_helper.py index f7889e70..0b9b91bf 100644 --- a/helpers/sitespeed_helper.py +++ b/helpers/sitespeed_helper.py @@ -8,6 +8,7 @@ from helpers.data_helper import append_domain_entry,\ append_domain_entry_with_key, has_domain_entry_with_key from helpers.http_header_helper import append_data_from_response_headers +from helpers.sri_helper import append_sri_data def get_data_from_sitespeed(filename, org_domain): """ @@ -105,6 +106,7 @@ def get_data_from_sitespeed(filename, org_domain): result) append_csp_data(req_url, req_domain, res, org_domain, result) + append_sri_data(req_url, req_domain, res, org_domain, result) result['visits'] = 1 return result diff --git a/helpers/sri_helper.py b/helpers/sri_helper.py new file mode 100644 index 00000000..949b7de4 --- /dev/null +++ b/helpers/sri_helper.py @@ -0,0 +1,262 @@ +# -*- coding: utf-8 -*- +# pylint: disable=too-many-lines +import base64 +import re +import urllib +import urllib.parse +from helpers.hash_helper import create_sha256_hash +from helpers.setting_helper import get_config +from models import Rating + +# pylint: disable=too-many-arguments +def rate_csp(result_dict, global_translation, local_translation, + org_domain, org_www_domain, domain, should_create_recommendation): + """ + This function rates the Content Security Policy (CSP) of a given domain. + + Parameters: + result_dict (dict): A dictionary containing the results of the CSP checks. + global_translation (function): A function to translate text to a global language. + local_translation (function): A function to translate text to a local language. + org_domain (str): The original domain. + org_www_domain (str): The original domain with 'www.' prefix. + domain (str): The domain to be rated. + should_create_recommendation (bool): A flag indicating whether to create a recommendation. + + Returns: + Rating: A Rating object containing the overall rating, + standards rating, and integrity and security rating. + """ + rating = Rating(global_translation, get_config('general.review.improve-only')) + if not isinstance(result_dict[domain], dict): + return rating + + if domain not in (org_domain, org_www_domain): + return rating + + if 'CSP-HEADER-FOUND' in result_dict[domain]['features'] or\ + 'CSP-META-FOUND' in result_dict[domain]['features']: + total_number_of_sitespeedruns = result_dict['visits'] + + if 'CSP-UNSUPPORTED-IN-META' in result_dict[domain]['features']: + sub_rating = Rating( + global_translation, + get_config('general.review.improve-only')) + sub_rating.set_overall(1.0) + sub_rating.set_standards(1.0, + local_translation( + 'TEXT_REVIEW_CSP_UNSUPPORTED_IN_META' + ).format(domain)) + rating += sub_rating + + rating += rate_csp_depricated(domain, result_dict, local_translation, global_translation) + + for policy_name in CSP_POLICIES_SUPPORTED_SRC: + policy_object = None + if policy_name in result_dict[domain]['csp-objects']: + policy_object = result_dict[domain]['csp-objects'][policy_name] + else: + continue + + rating += rate_csp_policy( + domain, + total_number_of_sitespeedruns, + policy_object, + local_translation, + global_translation) + + rating += rate_csp_fallbacks(domain, result_dict, local_translation, global_translation) + + elif 'HTML-FOUND' in result_dict[domain]['features'] and\ + (domain in (org_domain, org_www_domain)): + rating = Rating(global_translation, get_config('general.review.improve-only')) + rating.set_overall(1.0) + rating.set_standards(1.0, + local_translation('TEXT_REVIEW_CSP_NOT_FOUND').format(domain)) + rating.set_integrity_and_security(1.0, + local_translation('TEXT_REVIEW_CSP_NOT_FOUND').format(domain)) + + final_rating = create_final_csp_rating(global_translation, local_translation, domain, rating) + + if should_create_recommendation and 'csp-findings' in result_dict[domain]: + rec_rating, text_recommendation = create_csp_recommendation( + domain, + result_dict, + org_domain, + org_www_domain, + local_translation, + global_translation) + if rec_rating.get_integrity_and_security() > final_rating.get_integrity_and_security(): + final_rating.overall_review = text_recommendation + final_rating.overall_review + + return final_rating + +def append_sri_data(req_url, req_domain, res, org_domain, result): + """ + Appends Subresource Integrity (SRI) data for various types of content. + + This function checks the type of content (HTML) and + calls the appropriate function to append the SRI data to the result dictionary. + + Args: + req_url (str): The requested URL. + req_domain (str): The requested domain. + res (dict): The response dictionary containing the content. + org_domain (str): The original domain. + result (dict): The result dictionary where the CSP data will be appended. + + Returns: + bool: True if there is a match in the CSP findings, False otherwise. + """ + csp_findings_match = False + if 'content' in res and 'text' in res['content']: + if 'mimeType' in res['content'] and 'text/html' in res['content']['mimeType']: + csp_findings_match = csp_findings_match or append_sri_data_for_html( + req_url, + req_domain, + res, + org_domain, + result) + +def append_sri_data_for_html(req_url, req_domain, res, org_domain, result): + """ + Appends Subresource Integrity (SRI) data for HTML content and linked resources. + + This function parses the HTML content and identifies the SRI from attributes. + It also identifies linked resources such as style, and script. + It then appends the SRI data for these resources to the result dictionary. + + Args: + req_url (str): The requested URL. + req_domain (str): The requested domain. + res (dict): The response dictionary containing the HTML content. + org_domain (str): The original domain. + result (dict): The result dictionary where the SRI data will be appended. + + Returns: + bool: True if there is a match in the CSP findings, False otherwise. + """ + csp_findings_match = False + # Reference: https://developer.mozilla.org/en-US/docs/Web/Security/Subresource_Integrity + # https://www.srihash.org/ + content = res['content']['text'] + regex = ( + r'(?P(?Plink|script)<.*? integrity="(?P[^"]+)".*?>)' + ) + matches = re.finditer(regex, content, re.MULTILINE) + for _, match in enumerate(matches, start=1): + raw = match.group('raw') + name = match.group('name').lower() + integrity = match.group('integrity') + + # link elements with attributes: + # - rel="stylesheet" + # - rel="preload" + # - rel="modulepreload" + + csp_findings_match = csp_findings_match or append_csp_data_for_linked_resources( + req_domain, + org_domain, + result, + content) + + regex = r'<(?Pstyle|script|form)>' + matches = re.finditer(regex, content, re.DOTALL | re.IGNORECASE | re.MULTILINE) + for _, match in enumerate(matches, start=1): + element_name = match.group('type').lower() + if element_name in ('style', 'script'): + key = f'\'unsafe-inline\'|{element_name}' + if key not in result[org_domain]['csp-findings']['quotes']: + result[org_domain]['csp-findings']['quotes'].append(key) + csp_findings_match = True + elif element_name == 'form': + element_url = url_2_host_source(req_url, req_domain) + o = urllib.parse.urlparse(element_url) + element_domain = o.hostname + if element_domain == org_domain: + key = f'\'self\'|{element_name}' + if key not in result[org_domain]['csp-findings']['quotes']: + result[org_domain]['csp-findings']['quotes'].append(key) + csp_findings_match = True + else: + key = f'{element_domain}|{element_name}' + if key not in result[org_domain]['csp-findings']['host-sources']: + result[org_domain]['csp-findings']['host-sources'].append(key) + csp_findings_match = True + return csp_findings_match + +def append_csp_data_for_linked_resources(req_domain, org_domain, result, content): + """ + Appends Content Security Policy (CSP) data for linked resources in a given HTML content. + + This function parses the HTML content and identifies linked resources such as + style, link, script, img, iframe, form, base, and frame elements. + It then appends the CSP data for these resources to the result dictionary. + + Args: + req_domain (str): The requested domain. + org_domain (str): The original domain. + result (dict): The result dictionary where the CSP data will be appended. + content (str): The HTML content to be parsed. + + Returns: + bool: True if there is a match in the CSP findings, False otherwise. + """ + csp_findings_match = False + regex = ( + r'(?P<(?Pstyle|link|script|img|iframe|form|base|frame)[^>]' + r'*((?Psrc|nonce|action|href)="(?P[^"]+)"[^>]*>))' + ) + + matches = re.finditer(regex, content, re.MULTILINE) + for _, match in enumerate(matches, start=1): + element_name = match.group('type').lower() + attribute_name = match.group('attribute').lower() + attribute_value = match.group('value').lower() + + element_url = url_2_host_source(attribute_value, req_domain) + o = urllib.parse.urlparse(element_url) + element_domain = o.hostname + if element_domain is None and element_url.startswith('data:'): + element_domain = 'data:' + elif element_domain == org_domain: + element_domain = '\'self\'' + + if attribute_name == 'nonce': + key = f'\'nonce-\'|{element_name}' + if key not in result[org_domain]['csp-findings']['quotes']: + result[org_domain]['csp-findings']['quotes'].append(key) + csp_findings_match = True + elif attribute_name == 'src': + if element_domain is not None: + key = f'{element_domain}|{element_name}' + if key not in result[org_domain]['csp-findings']['host-sources']: + result[org_domain]['csp-findings']['host-sources'].append(key) + csp_findings_match = True + elif attribute_name == 'action' and element_name == 'form': + key = f'{element_domain}|form-action' + if key not in result[org_domain]['csp-findings']['host-sources']: + result[org_domain]['csp-findings']['host-sources'].append(key) + csp_findings_match = True + return csp_findings_match + +def url_2_host_source(url, domain): + """ + Converts a given URL to a secure (https) URL if it's not already. + + Args: + url (str): The URL to be converted. + domain (str): The domain to be used if the URL doesn't contain a domain. + + Returns: + str: The converted secure URL. + """ + if url.startswith('//'): + return url.replace('//', 'https://') + if 'https://' in url: + return url + if '://' in url: + return url + if ':' in url: + return url + return f'https://{domain}/{url}' From 727f914a78266a60ef993d86657e1b7077618daf Mon Sep 17 00:00:00 2001 From: cockroacher <163405488+cockroacher@users.noreply.github.com> Date: Thu, 26 Sep 2024 18:22:25 +0200 Subject: [PATCH 02/20] get sri url and type --- helpers/sri_helper.py | 38 ++++++++++++++++++++++++++++++++++++-- 1 file changed, 36 insertions(+), 2 deletions(-) diff --git a/helpers/sri_helper.py b/helpers/sri_helper.py index 949b7de4..62e8e6b0 100644 --- a/helpers/sri_helper.py +++ b/helpers/sri_helper.py @@ -141,8 +141,8 @@ def append_sri_data_for_html(req_url, req_domain, res, org_domain, result): # https://www.srihash.org/ content = res['content']['text'] regex = ( - r'(?P(?Plink|script)<.*? integrity="(?P[^"]+)".*?>)' - ) + r'(?P<(?Plink|script)[^<]*? integrity=["\'](?P[^"\']+)["\'][^>]*?>)' + ) matches = re.finditer(regex, content, re.MULTILINE) for _, match in enumerate(matches, start=1): raw = match.group('raw') @@ -153,6 +153,40 @@ def append_sri_data_for_html(req_url, req_domain, res, org_domain, result): # - rel="stylesheet" # - rel="preload" # - rel="modulepreload" + print('B', raw) + print('\tname:', name) + print('\tintegrity:', integrity) + + src = None + regex_src = r'(href|src)="(?P[^"\']+)["\']' + group_src = re.search(regex_src, raw, re.IGNORECASE) + if group_src is not None: + src = group_src.group('src') + print('\tsrc/href:', src) + + src_type = None + if name == 'script': + src_type = 'script' + else: + regex_type = r'(as)="(?P[^"\']+)["\']' + group_type = re.search(regex_type, raw, re.IGNORECASE) + if group_type is not None: + tmp = group_type.group('as').lower() + if tmp in ('style', 'font', 'img', 'script'): + src_type = tmp + + if src_type is None: + regex_rel = r'(rel)="(?P[^"\']+)["\']' + group_rel = re.search(regex_rel, raw, re.IGNORECASE) + if group_rel is not None: + tmp = group_rel.group('rel').lower() + if tmp in ('stylesheet'): + src_type = 'style' + + print('\ttype:', src_type) + + + print('') csp_findings_match = csp_findings_match or append_csp_data_for_linked_resources( req_domain, From bdbd0c4e03dfc3e6f539c9713cdaed07ce80c729 Mon Sep 17 00:00:00 2001 From: cockroacher <163405488+cockroacher@users.noreply.github.com> Date: Thu, 26 Sep 2024 18:27:09 +0200 Subject: [PATCH 03/20] ensure absolue url in sri --- helpers/sri_helper.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/helpers/sri_helper.py b/helpers/sri_helper.py index 62e8e6b0..d4b3c32e 100644 --- a/helpers/sri_helper.py +++ b/helpers/sri_helper.py @@ -162,6 +162,7 @@ def append_sri_data_for_html(req_url, req_domain, res, org_domain, result): group_src = re.search(regex_src, raw, re.IGNORECASE) if group_src is not None: src = group_src.group('src') + src = url_2_host_source(src, req_domain) print('\tsrc/href:', src) src_type = None @@ -293,4 +294,6 @@ def url_2_host_source(url, domain): return url if ':' in url: return url + if url.startswith('/'): + url = url.strip('/') return f'https://{domain}/{url}' From c7d970af832db750eb85c9d7356ab63b31b34365 Mon Sep 17 00:00:00 2001 From: cockroacher <163405488+cockroacher@users.noreply.github.com> Date: Fri, 27 Sep 2024 16:44:01 +0200 Subject: [PATCH 04/20] debug on invalid use --- helpers/sri_helper.py | 20 +++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) diff --git a/helpers/sri_helper.py b/helpers/sri_helper.py index d4b3c32e..c15c4b18 100644 --- a/helpers/sri_helper.py +++ b/helpers/sri_helper.py @@ -140,6 +140,7 @@ def append_sri_data_for_html(req_url, req_domain, res, org_domain, result): # Reference: https://developer.mozilla.org/en-US/docs/Web/Security/Subresource_Integrity # https://www.srihash.org/ content = res['content']['text'] + # TODO: Should we match all elements and give penalty when used wrong? regex = ( r'(?P<(?Plink|script)[^<]*? integrity=["\'](?P[^"\']+)["\'][^>]*?>)' ) @@ -176,16 +177,21 @@ def append_sri_data_for_html(req_url, req_domain, res, org_domain, result): if tmp in ('style', 'font', 'img', 'script'): src_type = tmp - if src_type is None: - regex_rel = r'(rel)="(?P[^"\']+)["\']' - group_rel = re.search(regex_rel, raw, re.IGNORECASE) - if group_rel is not None: - tmp = group_rel.group('rel').lower() - if tmp in ('stylesheet'): - src_type = 'style' + link_rel = None + regex_rel = r'(rel)="(?P[^"\']+)["\']' + group_rel = re.search(regex_rel, raw, re.IGNORECASE) + if group_rel is not None: + link_rel = group_rel.group('rel').lower() + if src_type is None and link_rel in ('stylesheet'): + src_type = 'style' print('\ttype:', src_type) + print('\trel:', link_rel) + if name in ('link'): + if link_rel not in ('stylesheet', 'preload', 'modulepreload'): + # TODO: Do something when using it incorrectly + print('WEBSITE WARNING: USING integrity incorrectly!') print('') From 8f7cc5f5934c249eafe67c42b583bda4eefb2456 Mon Sep 17 00:00:00 2001 From: cockroacher <163405488+cockroacher@users.noreply.github.com> Date: Fri, 27 Sep 2024 16:50:05 +0200 Subject: [PATCH 05/20] added warning for invalid use --- helpers/sri_helper.py | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/helpers/sri_helper.py b/helpers/sri_helper.py index c15c4b18..9db0169a 100644 --- a/helpers/sri_helper.py +++ b/helpers/sri_helper.py @@ -141,10 +141,13 @@ def append_sri_data_for_html(req_url, req_domain, res, org_domain, result): # https://www.srihash.org/ content = res['content']['text'] # TODO: Should we match all elements and give penalty when used wrong? + # regex = ( + # r'(?P<(?Plink|script)[^<]*? integrity=["\'](?P[^"\']+)["\'][^>]*?>)' + # ) regex = ( - r'(?P<(?Plink|script)[^<]*? integrity=["\'](?P[^"\']+)["\'][^>]*?>)' + r'(?P<(?P[a-z]+)[^<]*? integrity=["\'](?P[^"\']+)["\'][^>]*?>)' ) - matches = re.finditer(regex, content, re.MULTILINE) + matches = re.finditer(regex, content, re.MULTILINE | re.IGNORECASE) for _, match in enumerate(matches, start=1): raw = match.group('raw') name = match.group('name').lower() @@ -193,6 +196,10 @@ def append_sri_data_for_html(req_url, req_domain, res, org_domain, result): # TODO: Do something when using it incorrectly print('WEBSITE WARNING: USING integrity incorrectly!') + if name not in ('link', 'script'): + # TODO: Do something when using it incorrectly + print('WEBSITE WARNING: USING integrity incorrectly!') + print('') csp_findings_match = csp_findings_match or append_csp_data_for_linked_resources( From 4b0952b552c48152642147b516a82d3e3c43d9ca Mon Sep 17 00:00:00 2001 From: cockroacher <163405488+cockroacher@users.noreply.github.com> Date: Fri, 27 Sep 2024 20:28:51 +0200 Subject: [PATCH 06/20] lets find candidates --- helpers/sri_helper.py | 35 +++++++++++++++++++++++++++++++++++ 1 file changed, 35 insertions(+) diff --git a/helpers/sri_helper.py b/helpers/sri_helper.py index 9db0169a..a734b9e9 100644 --- a/helpers/sri_helper.py +++ b/helpers/sri_helper.py @@ -141,6 +141,41 @@ def append_sri_data_for_html(req_url, req_domain, res, org_domain, result): # https://www.srihash.org/ content = res['content']['text'] # TODO: Should we match all elements and give penalty when used wrong? + regex = ( + r'(?P<(?Plink|script) [^>]*?>)' + ) + + matches = re.finditer(regex, content, re.MULTILINE | re.IGNORECASE) + for _, match in enumerate(matches, start=1): + raw = match.group('raw') + name = match.group('name').lower() + + src = None + regex_src = r'(href|src)="(?P[^"\']+)["\']' + group_src = re.search(regex_src, raw, re.IGNORECASE) + if group_src is not None: + src = group_src.group('src') + src = url_2_host_source(src, req_domain) + + link_rel = None + regex_rel = r'(rel)="(?P[^"\']+)["\']' + group_rel = re.search(regex_rel, raw, re.IGNORECASE) + if group_rel is not None: + link_rel = group_rel.group('rel').lower() + + should_have_integrity = False + if name in ('link'): + if link_rel in ('stylesheet', 'preload', 'modulepreload'): + should_have_integrity = True + elif name in ('script') and src is not None: + should_have_integrity = True + + if should_have_integrity: + print('A', raw) + print('\tname:', name) + print('\tsrc/href:', src) + print('') + # regex = ( # r'(?P<(?Plink|script)[^<]*? integrity=["\'](?P[^"\']+)["\'][^>]*?>)' # ) From 882597aa3c55f84f4d7fb9469c7b67931ca9f6c5 Mon Sep 17 00:00:00 2001 From: cockroacher <163405488+cockroacher@users.noreply.github.com> Date: Fri, 27 Sep 2024 20:49:11 +0200 Subject: [PATCH 07/20] lets use lists --- helpers/sri_helper.py | 168 +++++++++++++++++++++++++----------------- 1 file changed, 99 insertions(+), 69 deletions(-) diff --git a/helpers/sri_helper.py b/helpers/sri_helper.py index a734b9e9..f8d1d2ad 100644 --- a/helpers/sri_helper.py +++ b/helpers/sri_helper.py @@ -1,6 +1,7 @@ # -*- coding: utf-8 -*- # pylint: disable=too-many-lines import base64 +import json import re import urllib import urllib.parse @@ -141,44 +142,49 @@ def append_sri_data_for_html(req_url, req_domain, res, org_domain, result): # https://www.srihash.org/ content = res['content']['text'] # TODO: Should we match all elements and give penalty when used wrong? - regex = ( - r'(?P<(?Plink|script) [^>]*?>)' - ) - - matches = re.finditer(regex, content, re.MULTILINE | re.IGNORECASE) - for _, match in enumerate(matches, start=1): - raw = match.group('raw') - name = match.group('name').lower() - - src = None - regex_src = r'(href|src)="(?P[^"\']+)["\']' - group_src = re.search(regex_src, raw, re.IGNORECASE) - if group_src is not None: - src = group_src.group('src') - src = url_2_host_source(src, req_domain) - - link_rel = None - regex_rel = r'(rel)="(?P[^"\']+)["\']' - group_rel = re.search(regex_rel, raw, re.IGNORECASE) - if group_rel is not None: - link_rel = group_rel.group('rel').lower() - - should_have_integrity = False - if name in ('link'): - if link_rel in ('stylesheet', 'preload', 'modulepreload'): - should_have_integrity = True - elif name in ('script') and src is not None: - should_have_integrity = True - - if should_have_integrity: - print('A', raw) - print('\tname:', name) - print('\tsrc/href:', src) - print('') - + candidates = get_sri_candidates(req_domain, content) + nice_candidates = json.dumps(candidates, indent=3) + print('Candidates', nice_candidates) # regex = ( # r'(?P<(?Plink|script)[^<]*? integrity=["\'](?P[^"\']+)["\'][^>]*?>)' # ) + found_sris = get_sris(req_domain, content) + nice_found_sris = json.dumps(found_sris, indent=3) + print('SRI', nice_found_sris) + + csp_findings_match = csp_findings_match or append_csp_data_for_linked_resources( + req_domain, + org_domain, + result, + content) + + regex = r'<(?Pstyle|script|form)>' + matches = re.finditer(regex, content, re.DOTALL | re.IGNORECASE | re.MULTILINE) + for _, match in enumerate(matches, start=1): + element_name = match.group('type').lower() + if element_name in ('style', 'script'): + key = f'\'unsafe-inline\'|{element_name}' + if key not in result[org_domain]['csp-findings']['quotes']: + result[org_domain]['csp-findings']['quotes'].append(key) + csp_findings_match = True + elif element_name == 'form': + element_url = url_2_host_source(req_url, req_domain) + o = urllib.parse.urlparse(element_url) + element_domain = o.hostname + if element_domain == org_domain: + key = f'\'self\'|{element_name}' + if key not in result[org_domain]['csp-findings']['quotes']: + result[org_domain]['csp-findings']['quotes'].append(key) + csp_findings_match = True + else: + key = f'{element_domain}|{element_name}' + if key not in result[org_domain]['csp-findings']['host-sources']: + result[org_domain]['csp-findings']['host-sources'].append(key) + csp_findings_match = True + return csp_findings_match + +def get_sris(req_domain, content): + sri_list = [] regex = ( r'(?P<(?P[a-z]+)[^<]*? integrity=["\'](?P[^"\']+)["\'][^>]*?>)' ) @@ -192,9 +198,14 @@ def append_sri_data_for_html(req_url, req_domain, res, org_domain, result): # - rel="stylesheet" # - rel="preload" # - rel="modulepreload" - print('B', raw) - print('\tname:', name) - print('\tintegrity:', integrity) + sri = { + 'raw': raw, + 'tag-name': name, + 'integrity': integrity + } + # print('B', raw) + # print('\tname:', name) + # print('\tintegrity:', integrity) src = None regex_src = r'(href|src)="(?P[^"\']+)["\']' @@ -202,7 +213,8 @@ def append_sri_data_for_html(req_url, req_domain, res, org_domain, result): if group_src is not None: src = group_src.group('src') src = url_2_host_source(src, req_domain) - print('\tsrc/href:', src) + sri['src'] = src + # print('\tsrc/href:', src) src_type = None if name == 'script': @@ -223,8 +235,10 @@ def append_sri_data_for_html(req_url, req_domain, res, org_domain, result): if src_type is None and link_rel in ('stylesheet'): src_type = 'style' - print('\ttype:', src_type) - print('\trel:', link_rel) + sri['type'] = src_type + sri['rel'] = link_rel + # print('\ttype:', src_type) + # print('\trel:', link_rel) if name in ('link'): if link_rel not in ('stylesheet', 'preload', 'modulepreload'): @@ -236,37 +250,53 @@ def append_sri_data_for_html(req_url, req_domain, res, org_domain, result): print('WEBSITE WARNING: USING integrity incorrectly!') print('') + sri_list.append(sri) - csp_findings_match = csp_findings_match or append_csp_data_for_linked_resources( - req_domain, - org_domain, - result, - content) + return sri_list - regex = r'<(?Pstyle|script|form)>' - matches = re.finditer(regex, content, re.DOTALL | re.IGNORECASE | re.MULTILINE) +def get_sri_candidates(req_domain, content): + candidates = [] + regex = ( + r'(?P<(?Plink|script) [^>]*?>)' + ) + + matches = re.finditer(regex, content, re.MULTILINE | re.IGNORECASE) for _, match in enumerate(matches, start=1): - element_name = match.group('type').lower() - if element_name in ('style', 'script'): - key = f'\'unsafe-inline\'|{element_name}' - if key not in result[org_domain]['csp-findings']['quotes']: - result[org_domain]['csp-findings']['quotes'].append(key) - csp_findings_match = True - elif element_name == 'form': - element_url = url_2_host_source(req_url, req_domain) - o = urllib.parse.urlparse(element_url) - element_domain = o.hostname - if element_domain == org_domain: - key = f'\'self\'|{element_name}' - if key not in result[org_domain]['csp-findings']['quotes']: - result[org_domain]['csp-findings']['quotes'].append(key) - csp_findings_match = True - else: - key = f'{element_domain}|{element_name}' - if key not in result[org_domain]['csp-findings']['host-sources']: - result[org_domain]['csp-findings']['host-sources'].append(key) - csp_findings_match = True - return csp_findings_match + raw = match.group('raw') + name = match.group('name').lower() + + src = None + regex_src = r'(href|src)="(?P[^"\']+)["\']' + group_src = re.search(regex_src, raw, re.IGNORECASE) + if group_src is not None: + src = group_src.group('src') + src = url_2_host_source(src, req_domain) + + link_rel = None + regex_rel = r'(rel)="(?P[^"\']+)["\']' + group_rel = re.search(regex_rel, raw, re.IGNORECASE) + if group_rel is not None: + link_rel = group_rel.group('rel').lower() + + should_have_integrity = False + if name in ('link'): + if link_rel in ('stylesheet', 'preload', 'modulepreload'): + should_have_integrity = True + elif name in ('script') and src is not None: + should_have_integrity = True + + if should_have_integrity: + # print('A', raw) + # print('\tname:', name) + # print('\tsrc/href:', src) + # print('') + candidates.append({ + 'raw': raw, + 'tag-name': name, + 'src': src + }) + + return candidates def append_csp_data_for_linked_resources(req_domain, org_domain, result, content): """ From 66a64080d0ee5799bc1db3743fd93a7522dd7f55 Mon Sep 17 00:00:00 2001 From: cockroacher <163405488+cockroacher@users.noreply.github.com> Date: Fri, 27 Sep 2024 21:35:17 +0200 Subject: [PATCH 08/20] how many candidates are missing sri? --- helpers/sri_helper.py | 29 +++++++++++++++++++++-------- 1 file changed, 21 insertions(+), 8 deletions(-) diff --git a/helpers/sri_helper.py b/helpers/sri_helper.py index f8d1d2ad..0c64a7bf 100644 --- a/helpers/sri_helper.py +++ b/helpers/sri_helper.py @@ -145,12 +145,24 @@ def append_sri_data_for_html(req_url, req_domain, res, org_domain, result): candidates = get_sri_candidates(req_domain, content) nice_candidates = json.dumps(candidates, indent=3) print('Candidates', nice_candidates) - # regex = ( - # r'(?P<(?Plink|script)[^<]*? integrity=["\'](?P[^"\']+)["\'][^>]*?>)' - # ) - found_sris = get_sris(req_domain, content) - nice_found_sris = json.dumps(found_sris, indent=3) - print('SRI', nice_found_sris) + + sri_list = get_sris(req_domain, content) + nice_sri_list = json.dumps(sri_list, indent=3) + print('SRI', nice_sri_list) + + for sri in sri_list: + found_candidate = False + for candidate in candidates: + if candidate['raw'] == sri['raw']: + found_candidate = candidate + break + + if found_candidate is not None: + candidates.remove(found_candidate) + + nice_candidates = json.dumps(candidates, indent=3) + print('Candidates', nice_candidates) + csp_findings_match = csp_findings_match or append_csp_data_for_linked_resources( req_domain, @@ -243,10 +255,11 @@ def get_sris(req_domain, content): if name in ('link'): if link_rel not in ('stylesheet', 'preload', 'modulepreload'): # TODO: Do something when using it incorrectly + sri['error'] = 'Using integrity attribute in combination with unallowed rel attribute value.' print('WEBSITE WARNING: USING integrity incorrectly!') - - if name not in ('link', 'script'): + elif name not in ('link', 'script'): # TODO: Do something when using it incorrectly + sri['error'] = 'Using integrity attribute on wrong element type.' print('WEBSITE WARNING: USING integrity incorrectly!') print('') From 2efebd6cb7046c56ead2e6a02b48a36d8fc79a63 Mon Sep 17 00:00:00 2001 From: cockroacher <163405488+cockroacher@users.noreply.github.com> Date: Fri, 27 Sep 2024 22:32:25 +0200 Subject: [PATCH 09/20] adding sri rating logic --- helpers/sri_helper.py | 282 ++++++++++++---------------------------- tests/http_validator.py | 8 ++ 2 files changed, 94 insertions(+), 196 deletions(-) diff --git a/helpers/sri_helper.py b/helpers/sri_helper.py index 0c64a7bf..c1990fe8 100644 --- a/helpers/sri_helper.py +++ b/helpers/sri_helper.py @@ -1,28 +1,26 @@ # -*- coding: utf-8 -*- # pylint: disable=too-many-lines -import base64 import json import re import urllib import urllib.parse -from helpers.hash_helper import create_sha256_hash +from helpers.data_helper import append_domain_entry from helpers.setting_helper import get_config from models import Rating # pylint: disable=too-many-arguments -def rate_csp(result_dict, global_translation, local_translation, - org_domain, org_www_domain, domain, should_create_recommendation): +def rate_sri(result_dict, global_translation, local_translation, + org_domain, org_www_domain, domain): """ - This function rates the Content Security Policy (CSP) of a given domain. + This function rates the Subresource Integrity (SRI) of a given domain. Parameters: - result_dict (dict): A dictionary containing the results of the CSP checks. + result_dict (dict): A dictionary containing the results of the SRI checks. global_translation (function): A function to translate text to a global language. local_translation (function): A function to translate text to a local language. org_domain (str): The original domain. org_www_domain (str): The original domain with 'www.' prefix. domain (str): The domain to be rated. - should_create_recommendation (bool): A flag indicating whether to create a recommendation. Returns: Rating: A Rating object containing the overall rating, @@ -35,62 +33,44 @@ def rate_csp(result_dict, global_translation, local_translation, if domain not in (org_domain, org_www_domain): return rating - if 'CSP-HEADER-FOUND' in result_dict[domain]['features'] or\ - 'CSP-META-FOUND' in result_dict[domain]['features']: - total_number_of_sitespeedruns = result_dict['visits'] - - if 'CSP-UNSUPPORTED-IN-META' in result_dict[domain]['features']: - sub_rating = Rating( - global_translation, - get_config('general.review.improve-only')) - sub_rating.set_overall(1.0) - sub_rating.set_standards(1.0, - local_translation( - 'TEXT_REVIEW_CSP_UNSUPPORTED_IN_META' - ).format(domain)) - rating += sub_rating - - rating += rate_csp_depricated(domain, result_dict, local_translation, global_translation) - - for policy_name in CSP_POLICIES_SUPPORTED_SRC: - policy_object = None - if policy_name in result_dict[domain]['csp-objects']: - policy_object = result_dict[domain]['csp-objects'][policy_name] - else: - continue - - rating += rate_csp_policy( - domain, - total_number_of_sitespeedruns, - policy_object, - local_translation, - global_translation) - - rating += rate_csp_fallbacks(domain, result_dict, local_translation, global_translation) - + if 'SRI-WITH-ERRORS' in result_dict[domain]['features']: + sub_rating = Rating( + global_translation, + get_config('general.review.improve-only')) + sub_rating.set_overall(3.0) + sub_rating.set_standards(3.0, + local_translation( + 'TEXT_REVIEW_SRI_WITH_ERRORS' + ).format(domain)) + sub_rating.set_integrity_and_security(3.0, + local_translation( + 'TEXT_REVIEW_SRI_WITH_ERRORS' + ).format(domain)) + rating += sub_rating + elif 'SRI-COMPLIANT' in result_dict[domain]['features']: + sub_rating = Rating( + global_translation, + get_config('general.review.improve-only')) + sub_rating.set_overall(5.0) + sub_rating.set_standards(5.0, + local_translation( + 'TEXT_REVIEW_SRI_COMPLIANT' + ).format(domain)) + sub_rating.set_integrity_and_security(5.0, + local_translation( + 'TEXT_REVIEW_SRI_COMPLIANT' + ).format(domain)) + rating += sub_rating elif 'HTML-FOUND' in result_dict[domain]['features'] and\ (domain in (org_domain, org_www_domain)): rating = Rating(global_translation, get_config('general.review.improve-only')) rating.set_overall(1.0) rating.set_standards(1.0, - local_translation('TEXT_REVIEW_CSP_NOT_FOUND').format(domain)) + local_translation('TEXT_REVIEW_SRI_NONE_COMPLIANT').format(domain)) rating.set_integrity_and_security(1.0, - local_translation('TEXT_REVIEW_CSP_NOT_FOUND').format(domain)) - - final_rating = create_final_csp_rating(global_translation, local_translation, domain, rating) - - if should_create_recommendation and 'csp-findings' in result_dict[domain]: - rec_rating, text_recommendation = create_csp_recommendation( - domain, - result_dict, - org_domain, - org_www_domain, - local_translation, - global_translation) - if rec_rating.get_integrity_and_security() > final_rating.get_integrity_and_security(): - final_rating.overall_review = text_recommendation + final_rating.overall_review + local_translation('TEXT_REVIEW_SRI_NONE_COMPLIANT').format(domain)) - return final_rating + return rating def append_sri_data(req_url, req_domain, res, org_domain, result): """ @@ -105,14 +85,10 @@ def append_sri_data(req_url, req_domain, res, org_domain, result): res (dict): The response dictionary containing the content. org_domain (str): The original domain. result (dict): The result dictionary where the CSP data will be appended. - - Returns: - bool: True if there is a match in the CSP findings, False otherwise. """ - csp_findings_match = False if 'content' in res and 'text' in res['content']: if 'mimeType' in res['content'] and 'text/html' in res['content']['mimeType']: - csp_findings_match = csp_findings_match or append_sri_data_for_html( + append_sri_data_for_html( req_url, req_domain, res, @@ -133,25 +109,27 @@ def append_sri_data_for_html(req_url, req_domain, res, org_domain, result): res (dict): The response dictionary containing the HTML content. org_domain (str): The original domain. result (dict): The result dictionary where the SRI data will be appended. - - Returns: - bool: True if there is a match in the CSP findings, False otherwise. """ - csp_findings_match = False # Reference: https://developer.mozilla.org/en-US/docs/Web/Security/Subresource_Integrity # https://www.srihash.org/ content = res['content']['text'] - # TODO: Should we match all elements and give penalty when used wrong? + candidates = get_sri_candidates(req_domain, content) - nice_candidates = json.dumps(candidates, indent=3) - print('Candidates', nice_candidates) + # nice_candidates = json.dumps(candidates, indent=3) + # print('Candidates', nice_candidates) sri_list = get_sris(req_domain, content) nice_sri_list = json.dumps(sri_list, indent=3) print('SRI', nice_sri_list) + sri_errors = [] + for sri in sri_list: - found_candidate = False + found_candidate = None + + if 'error' in sri: + sri_errors.append(sri['error']) + for candidate in candidates: if candidate['raw'] == sri['raw']: found_candidate = candidate @@ -160,40 +138,21 @@ def append_sri_data_for_html(req_url, req_domain, res, org_domain, result): if found_candidate is not None: candidates.remove(found_candidate) - nice_candidates = json.dumps(candidates, indent=3) - print('Candidates', nice_candidates) - - - csp_findings_match = csp_findings_match or append_csp_data_for_linked_resources( - req_domain, - org_domain, - result, - content) - - regex = r'<(?Pstyle|script|form)>' - matches = re.finditer(regex, content, re.DOTALL | re.IGNORECASE | re.MULTILINE) - for _, match in enumerate(matches, start=1): - element_name = match.group('type').lower() - if element_name in ('style', 'script'): - key = f'\'unsafe-inline\'|{element_name}' - if key not in result[org_domain]['csp-findings']['quotes']: - result[org_domain]['csp-findings']['quotes'].append(key) - csp_findings_match = True - elif element_name == 'form': - element_url = url_2_host_source(req_url, req_domain) - o = urllib.parse.urlparse(element_url) - element_domain = o.hostname - if element_domain == org_domain: - key = f'\'self\'|{element_name}' - if key not in result[org_domain]['csp-findings']['quotes']: - result[org_domain]['csp-findings']['quotes'].append(key) - csp_findings_match = True - else: - key = f'{element_domain}|{element_name}' - if key not in result[org_domain]['csp-findings']['host-sources']: - result[org_domain]['csp-findings']['host-sources'].append(key) - csp_findings_match = True - return csp_findings_match + # nice_candidates = json.dumps(candidates, indent=3) + # print('Candidates', nice_candidates) + + if len(sri_errors) > 0: + append_domain_entry( + req_domain, + 'features', + 'SRI-WITH-ERRORS', + result) + elif len(candidates) == 0: + append_domain_entry( + req_domain, + 'features', + 'SRI-COMPLIANT', + result) def get_sris(req_domain, content): sri_list = [] @@ -206,27 +165,19 @@ def get_sris(req_domain, content): name = match.group('name').lower() integrity = match.group('integrity') - # link elements with attributes: - # - rel="stylesheet" - # - rel="preload" - # - rel="modulepreload" sri = { 'raw': raw, 'tag-name': name, 'integrity': integrity } - # print('B', raw) - # print('\tname:', name) - # print('\tintegrity:', integrity) src = None regex_src = r'(href|src)="(?P[^"\']+)["\']' group_src = re.search(regex_src, raw, re.IGNORECASE) if group_src is not None: src = group_src.group('src') - src = url_2_host_source(src, req_domain) sri['src'] = src - # print('\tsrc/href:', src) + sri['src-same-origin'] = is_same_domain(src, req_domain) src_type = None if name == 'script': @@ -249,8 +200,6 @@ def get_sris(req_domain, content): sri['type'] = src_type sri['rel'] = link_rel - # print('\ttype:', src_type) - # print('\trel:', link_rel) if name in ('link'): if link_rel not in ('stylesheet', 'preload', 'modulepreload'): @@ -262,7 +211,6 @@ def get_sris(req_domain, content): sri['error'] = 'Using integrity attribute on wrong element type.' print('WEBSITE WARNING: USING integrity incorrectly!') - print('') sri_list.append(sri) return sri_list @@ -279,11 +227,12 @@ def get_sri_candidates(req_domain, content): name = match.group('name').lower() src = None + src_same_origin = False regex_src = r'(href|src)="(?P[^"\']+)["\']' group_src = re.search(regex_src, raw, re.IGNORECASE) if group_src is not None: src = group_src.group('src') - src = url_2_host_source(src, req_domain) + src_same_origin = is_same_domain(src, req_domain) link_rel = None regex_rel = r'(rel)="(?P[^"\']+)["\']' @@ -298,93 +247,34 @@ def get_sri_candidates(req_domain, content): elif name in ('script') and src is not None: should_have_integrity = True + # NOTE: Remove same domain resources + if should_have_integrity and src_same_origin: + should_have_integrity = False + if should_have_integrity: - # print('A', raw) - # print('\tname:', name) - # print('\tsrc/href:', src) - # print('') candidates.append({ 'raw': raw, 'tag-name': name, - 'src': src + 'src': src, + 'src-same-origin': src_same_origin }) return candidates -def append_csp_data_for_linked_resources(req_domain, org_domain, result, content): - """ - Appends Content Security Policy (CSP) data for linked resources in a given HTML content. - - This function parses the HTML content and identifies linked resources such as - style, link, script, img, iframe, form, base, and frame elements. - It then appends the CSP data for these resources to the result dictionary. - - Args: - req_domain (str): The requested domain. - org_domain (str): The original domain. - result (dict): The result dictionary where the CSP data will be appended. - content (str): The HTML content to be parsed. - - Returns: - bool: True if there is a match in the CSP findings, False otherwise. - """ - csp_findings_match = False - regex = ( - r'(?P<(?Pstyle|link|script|img|iframe|form|base|frame)[^>]' - r'*((?Psrc|nonce|action|href)="(?P[^"]+)"[^>]*>))' - ) - - matches = re.finditer(regex, content, re.MULTILINE) - for _, match in enumerate(matches, start=1): - element_name = match.group('type').lower() - attribute_name = match.group('attribute').lower() - attribute_value = match.group('value').lower() - - element_url = url_2_host_source(attribute_value, req_domain) - o = urllib.parse.urlparse(element_url) - element_domain = o.hostname - if element_domain is None and element_url.startswith('data:'): - element_domain = 'data:' - elif element_domain == org_domain: - element_domain = '\'self\'' - - if attribute_name == 'nonce': - key = f'\'nonce-\'|{element_name}' - if key not in result[org_domain]['csp-findings']['quotes']: - result[org_domain]['csp-findings']['quotes'].append(key) - csp_findings_match = True - elif attribute_name == 'src': - if element_domain is not None: - key = f'{element_domain}|{element_name}' - if key not in result[org_domain]['csp-findings']['host-sources']: - result[org_domain]['csp-findings']['host-sources'].append(key) - csp_findings_match = True - elif attribute_name == 'action' and element_name == 'form': - key = f'{element_domain}|form-action' - if key not in result[org_domain]['csp-findings']['host-sources']: - result[org_domain]['csp-findings']['host-sources'].append(key) - csp_findings_match = True - return csp_findings_match - -def url_2_host_source(url, domain): - """ - Converts a given URL to a secure (https) URL if it's not already. - - Args: - url (str): The URL to be converted. - domain (str): The domain to be used if the URL doesn't contain a domain. - - Returns: - str: The converted secure URL. - """ +def is_same_domain(url, domain): if url.startswith('//'): - return url.replace('//', 'https://') - if 'https://' in url: - return url - if '://' in url: - return url - if ':' in url: - return url - if url.startswith('/'): + url = url.replace('//', 'https://') + elif url.startswith('https://'): + url = url + elif '://' in url: + url = url + elif ':' in url: + url = url + elif url.startswith('/'): url = url.strip('/') - return f'https://{domain}/{url}' + url = f'https://{domain}/{url}' + + o = urllib.parse.urlparse(url) + resource_domain = o.hostname + + return domain == resource_domain diff --git a/tests/http_validator.py b/tests/http_validator.py index f7404714..b107efd3 100644 --- a/tests/http_validator.py +++ b/tests/http_validator.py @@ -13,6 +13,7 @@ from helpers.csp_helper import rate_csp from helpers.data_helper import append_domain_entry, has_domain_entry from helpers.sitespeed_helper import get_data_from_sitespeed +from helpers.sri_helper import rate_sri from helpers.tls_helper import rate_transfer_layers from helpers.setting_helper import get_config from models import Rating @@ -306,6 +307,13 @@ def rate(org_domain, result_dict, global_translation, local_translation): org_www_domain, domain, should_create_recommendation) + rating += rate_sri( + result_dict, + global_translation, + local_translation, + org_domain, + org_www_domain, + domain) return rating From 23552247519d13df6d581010f7c1361f47ea4e23 Mon Sep 17 00:00:00 2001 From: cockroacher <163405488+cockroacher@users.noreply.github.com> Date: Fri, 27 Sep 2024 22:34:03 +0200 Subject: [PATCH 10/20] commented out debug code --- helpers/sri_helper.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/helpers/sri_helper.py b/helpers/sri_helper.py index c1990fe8..3c8363b3 100644 --- a/helpers/sri_helper.py +++ b/helpers/sri_helper.py @@ -119,8 +119,8 @@ def append_sri_data_for_html(req_url, req_domain, res, org_domain, result): # print('Candidates', nice_candidates) sri_list = get_sris(req_domain, content) - nice_sri_list = json.dumps(sri_list, indent=3) - print('SRI', nice_sri_list) + # nice_sri_list = json.dumps(sri_list, indent=3) + # print('SRI', nice_sri_list) sri_errors = [] From 86728c0c8ef303e6f429704f48fb4355fa94218d Mon Sep 17 00:00:00 2001 From: cockroacher <163405488+cockroacher@users.noreply.github.com> Date: Fri, 27 Sep 2024 22:42:33 +0200 Subject: [PATCH 11/20] Add basic translations --- locales/en/LC_MESSAGES/http_validator.mo | Bin 5907 -> 6169 bytes locales/en/LC_MESSAGES/http_validator.po | 9 +++++++++ locales/gov/LC_MESSAGES/http_validator.mo | Bin 5907 -> 6169 bytes locales/gov/LC_MESSAGES/http_validator.po | 9 +++++++++ locales/sv/LC_MESSAGES/http_validator.mo | Bin 5878 -> 6154 bytes locales/sv/LC_MESSAGES/http_validator.po | 11 ++++++++++- 6 files changed, 28 insertions(+), 1 deletion(-) diff --git a/locales/en/LC_MESSAGES/http_validator.mo b/locales/en/LC_MESSAGES/http_validator.mo index 6578f20e50492d78177464b2cb2190b8f8bc6bec..8c1ca6b5a81630f062f4835617d637fc31c7527b 100644 GIT binary patch delta 1266 zcma*lOGs2v9LMp$X==X8*J!5A?WJQmI*L#x7-555sxih%TUbNdXi_N5C}~l%EEEPs zgrZ$RMzj!ZQ;TSo)CfVFs6`nS5mu`pf`Y#P?tymIFf*U?JNMl4_}@ESd({_zRgg9# zeD?4w=U22!)1OaHn#fvWF1oM+*JGnGfSbstQ12hdEjWvAOyV|NG3Kv!@=mlw;&O~e z6$38fX1s}Be2gCahB~2@?p)u87P%jFBbQOHUpM&?>O9N10aG(XHliCVumyGgZgjK0 zjMLB)-2UAmvV>*iPpCI$WQx>bCF;0N)RT^)j+;T<$ei&B){x&}HRi7o*^bRvg?%O` zG_$_kqfv=(P*0MX<>VUF+<{v9v#1LtP&al9^@NM4Cwq&!p_FVVm!sz0CLb|5W^w}K zI^YhCd|WgGUZ6ik#P+!a>YU#7riM(cGUMwKz@~8J%$X+FkT9MPJkKhvO z{CBYd-(xG57CIa1E~NfCae|IIoWoLFMqT*3v8hO87kLcz5zM1jZVCCy2h;z<^t+0k z3e}^|XCq(fF?rPFsW=UNwzJ4u^1$S0CV#{n`hTL9EQ`TfVlOsgC)VNw>PhdLykc@0 z&q=QzK)U3d$x|j5JfxuuKSwXFlq|eVolI}*2-@M+_Ctpc+Is^b3HgtP!a;wh?e7Zf zl|XyI|Ch^Mc4&Xt9}Kn!7rL{2+3}8%o@gXCG!pHNSoYv>q(3?^JZ5>Asam|&dEbQ^ pD-emCj9LFN&>rj^ibf;7!)L~|I&L160AqomLf;xE!1IZ|;QxK97!MYShmt^!q z?GNbCq1cS5jzQO=gC$uEvRfG_ojmk>`FTUb`S`p(-_OtYS0}s6ouzLL(Fexo5Z@hq zS0YOPKVR3HZ6wZP7JuPZj7EbwY$o@j<_}{Fj$;!};~sqD*4(;vX1mNvR;1C&fK!;q zt60QIY{U1c1^@W-Ol)k&ks@SJjd<$33IrJJ26!k*6+Y3_P1dgy216; z4znk?jre7gJDh9BddO9N10aG(XHliCVumyGgZgjK0 zjMLB)-2UAmvV>*iPpCI$WQx>bCF;0N)RT^)j+;T<$ei&B){x&}HRi7o*^bRvg?%O` zG_$_kqfv=(P*0MX<>VUF+<{v9v#1LtP&al9^@NM4Cwq&!p_FVVm!sz0CLb|5W^w}K zI^YhCd|WgGUZ6ik#P+!a>YU#7riM(cGUMwKz@~8J%$X+FkT9MPJkKhvO z{CBYd-(xG57CIa1E~NfCae|IIoWoLFMqT*3v8hO87kLcz5zM1jZVCCy2h;z<^t+0k z3e}^|XCq(fF?rPFsW=UNwzJ4u^1$S0CV#{n`hTL9EQ`TfVlOsgC)VNw>PhdLykc@0 z&q=QzK)U3d$x|j5JfxuuKSwXFlq|eVolI}*2-@M+_Ctpc+Is^b3HgtP!a;wh?e7Zf zl|XyI|Ch^Mc4&Xt9}Kn!7rL{2+3}8%o@gXCG!pHNSoYv>q(3?^JZ5>Asam|&dEbQ^ pD-emCj9LFN&>rj^ibf;7!)L~|I&L160AqomLf;xE!1IZ|;QxK97!MYShmt^!q z?GNbCq1cS5jzQO=gC$uEvRfG_ojmk>`FTUb`S`p(-_OtYS0}s6ouzLL(Fexo5Z@hq zS0YOPKVR3HZ6wZP7JuPZj7EbwY$o@j<_}{Fj$;!};~sqD*4(;vX1mNvR;1C&fK!;q zt60QIY{U1c1^@W-Ol)k&ks@SJjd<$33IrJJ26!k*6+Y3_P1dgy216; z4znk?jre7gJDh9BddFvI}u-A*A=pv%%BI^6N1Kml3?Q?$nKRf3+=j?|w>Wa+R)5e6? zR^G+D3+8G1_sUKaSxC&m#aMz%u+Hek738C+zmMW7ynzmUj%)Cih{Z$wKZA_p0i zQE&lQ;&pW5Q!K}ys1sW0$>+^zk$X@ZxrF-sn#q%>^Ss4nm}(PQjt(rrEvWN%qJ#A% z%0PE;YxWC~7r2!C1@(uv43QcvMXhT`-RT+Bx-ry7?irt975O7pVBP|eN^HO~>^6Bs zGwaJ^2Br88btf5_$y|k+TTzexH0nYlsEyr3-Qg7K&OV?vl#-Rq#i+U7xYU71=k!Exuy~+UUi4CJJIF2sqq-@=KH&-JO73HAILAH33TzHSFr>2 zs=AOKWeBxCifme9CdW}v{AGjz&B+_g#&0J7G1<;+^dt_{6IhRW#O=5Zhmlfwf~)Zr z>J|MqRu?66069b%MXkSTvi%_gU0@PtkE$p>ojRJnv(@JbG_~y8zt_{~^^4zqz#s6r z+db}%fIjiIc-?b6?(q0`1>8Pgi!a`p>CcLsb)B!WS_is9!Tys2p~FGT(-#i*gnGk6 zRyosEh|`)a*%L~{27{shDQ)Z8;VtSil(90x>&QR@WbTSl7^)wU9d2cCM>$3T}Um=!vE>_CQfGN_dVx)=exY;%&q)Fw)m|+de1oa z@Lj{V5>fhpd|hI;oLI&r{=`)njRsqA9k~Z}e*s(ZGN$kmw&81c#;vI`OPdufOCv*v zBe)(fVHO|aM*N5xaNe&sRxjRYJ5dWcj=Fxq^L^AfFK{(ZV++pVT1?c0@w+g^{8pf$ z9Skm>Fnf&67`IqOqc<7(OmE70s`6w3mN6gOC$Z%m| zsTozWGUo9+YG+N$LdkNNCZEJkyn=1`6gS}~+=hQ~3+5A{f~T>Yd<}bX9CzcN1ohue zV=HgiUObC>cF*t~3!6gibeLuD!!cy7Hjk=Q1vO#1K0Kmh*h9XK2k|X-V{JoN$S%~Q zIe{AYMg#TN#G{;$KkyKCG=`mDM7`B_u?fdeFV%b0zoCpP@HeVbRdix=mO|yt$QrF5 zS&J2rqS`sngQxt&9b|4BK|c1JpA9&PdK9y6GqWlmMv7xYsQ$M-KS7Q23Ul}&S&Bu6 Pt4njSzIbUe*%$c-DaT?u diff --git a/locales/sv/LC_MESSAGES/http_validator.po b/locales/sv/LC_MESSAGES/http_validator.po index 4198812d..966100e0 100644 --- a/locales/sv/LC_MESSAGES/http_validator.po +++ b/locales/sv/LC_MESSAGES/http_validator.po @@ -190,4 +190,13 @@ msgid "TEXT_REVIEW_CSP_RECOMMENDED_RATING" msgstr "###### Rekommenderat CSP-betyg:{0}" msgid "TEXT_REVIEW_CSP_RECOMMENDED_TEXT" -msgstr "\r\n##### Vill du förbättra din Content Security Policy (CSP)?\r\nVarför inte prova följande CSP-svarshuvud för att komma igång med CSP?\r\nKom ihåg, nedanstående rekommendation är endast baserad på ett urval (läs: {0} sida(or)) av alla sidor på din webbplats och baseras utan att klicka på något.\r\n\r\n###### Rekommenderade policyer:\r\n{1}\r\n{2}\r\nLäs mer: https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Content-Security-Policy\r\n\r\n" \ No newline at end of file +msgstr "\r\n##### Vill du förbättra din Content Security Policy (CSP)?\r\nVarför inte prova följande CSP-svarshuvud för att komma igång med CSP?\r\nKom ihåg, nedanstående rekommendation är endast baserad på ett urval (läs: {0} sida(or)) av alla sidor på din webbplats och baseras utan att klicka på något.\r\n\r\n###### Rekommenderade policyer:\r\n{1}\r\n{2}\r\nLäs mer: https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Content-Security-Policy\r\n\r\n" + +msgid "TEXT_REVIEW_SRI_COMPLIANT" +msgstr "- {0}, Subresource Integrity (SRI)" + +msgid "TEXT_REVIEW_SRI_NONE_COMPLIANT" +msgstr "- {0}, Kräver Subresource Integrity (SRI)" + +msgid "TEXT_REVIEW_SRI_WITH_ERRORS" +msgstr "- {0}, Använder Subresource Integrity (SRI) på felaktigt sätt" From c4020d33250a6f1526b82722e94408007916ea62 Mon Sep 17 00:00:00 2001 From: cockroacher <163405488+cockroacher@users.noreply.github.com> Date: Sat, 28 Sep 2024 16:50:55 +0200 Subject: [PATCH 12/20] pylint --- helpers/sitespeed_helper.py | 2 +- helpers/sri_helper.py | 249 ++++++++++++++++++++++++------------ 2 files changed, 170 insertions(+), 81 deletions(-) diff --git a/helpers/sitespeed_helper.py b/helpers/sitespeed_helper.py index 0b9b91bf..c7fa8b82 100644 --- a/helpers/sitespeed_helper.py +++ b/helpers/sitespeed_helper.py @@ -106,7 +106,7 @@ def get_data_from_sitespeed(filename, org_domain): result) append_csp_data(req_url, req_domain, res, org_domain, result) - append_sri_data(req_url, req_domain, res, org_domain, result) + append_sri_data(req_domain, res, result) result['visits'] = 1 return result diff --git a/helpers/sri_helper.py b/helpers/sri_helper.py index 3c8363b3..997c3f2a 100644 --- a/helpers/sri_helper.py +++ b/helpers/sri_helper.py @@ -1,6 +1,4 @@ # -*- coding: utf-8 -*- -# pylint: disable=too-many-lines -import json import re import urllib import urllib.parse @@ -8,7 +6,7 @@ from helpers.setting_helper import get_config from models import Rating -# pylint: disable=too-many-arguments +# pylint: disable=too-many-arguments,too-many-positional-arguments def rate_sri(result_dict, global_translation, local_translation, org_domain, org_www_domain, domain): """ @@ -72,7 +70,7 @@ def rate_sri(result_dict, global_translation, local_translation, return rating -def append_sri_data(req_url, req_domain, res, org_domain, result): +def append_sri_data(req_domain, res, result): """ Appends Subresource Integrity (SRI) data for various types of content. @@ -80,22 +78,18 @@ def append_sri_data(req_url, req_domain, res, org_domain, result): calls the appropriate function to append the SRI data to the result dictionary. Args: - req_url (str): The requested URL. req_domain (str): The requested domain. res (dict): The response dictionary containing the content. - org_domain (str): The original domain. result (dict): The result dictionary where the CSP data will be appended. """ if 'content' in res and 'text' in res['content']: if 'mimeType' in res['content'] and 'text/html' in res['content']['mimeType']: append_sri_data_for_html( - req_url, req_domain, res, - org_domain, result) -def append_sri_data_for_html(req_url, req_domain, res, org_domain, result): +def append_sri_data_for_html(req_domain, res, result): """ Appends Subresource Integrity (SRI) data for HTML content and linked resources. @@ -104,10 +98,8 @@ def append_sri_data_for_html(req_url, req_domain, res, org_domain, result): It then appends the SRI data for these resources to the result dictionary. Args: - req_url (str): The requested URL. req_domain (str): The requested domain. res (dict): The response dictionary containing the HTML content. - org_domain (str): The original domain. result (dict): The result dictionary where the SRI data will be appended. """ # Reference: https://developer.mozilla.org/en-US/docs/Web/Security/Subresource_Integrity @@ -155,6 +147,21 @@ def append_sri_data_for_html(req_url, req_domain, res, org_domain, result): result) def get_sris(req_domain, content): + """ + Extracts Subresource Integrity (SRI) information from HTML content. + + This function searches for HTML tags with 'integrity' attributes within the provided content, + extracts relevant SRI details, and processes them using helper functions to append additional + information. The results are returned as a list of dictionaries. + + Args: + req_domain (str): The domain from which the request originated. + content (str): The HTML content to be parsed. + + Returns: + list: A list of dictionaries, + each containing SRI information and additional processed data. + """ sri_list = [] regex = ( r'(?P<(?P[a-z]+)[^<]*? integrity=["\'](?P[^"\']+)["\'][^>]*?>)' @@ -171,51 +178,143 @@ def get_sris(req_domain, content): 'integrity': integrity } - src = None - regex_src = r'(href|src)="(?P[^"\']+)["\']' - group_src = re.search(regex_src, raw, re.IGNORECASE) - if group_src is not None: - src = group_src.group('src') - sri['src'] = src - sri['src-same-origin'] = is_same_domain(src, req_domain) - - src_type = None - if name == 'script': - src_type = 'script' - else: - regex_type = r'(as)="(?P[^"\']+)["\']' - group_type = re.search(regex_type, raw, re.IGNORECASE) - if group_type is not None: - tmp = group_type.group('as').lower() - if tmp in ('style', 'font', 'img', 'script'): - src_type = tmp - - link_rel = None - regex_rel = r'(rel)="(?P[^"\']+)["\']' - group_rel = re.search(regex_rel, raw, re.IGNORECASE) - if group_rel is not None: - link_rel = group_rel.group('rel').lower() - if src_type is None and link_rel in ('stylesheet'): - src_type = 'style' - - sri['type'] = src_type - sri['rel'] = link_rel + append_with_src(req_domain, raw, sri) + src_type = append_with_src_type(raw, name) + link_rel = append_with_rel(raw, sri, src_type) + append_sri_errors(name, sri, link_rel) - if name in ('link'): - if link_rel not in ('stylesheet', 'preload', 'modulepreload'): + sri_list.append(sri) + + return sri_list + +def append_with_src_type(raw, name): + """ + Determines the source type of an HTML tag based on its attributes. + + This function checks the tag name and + its attributes to identify the type of resource it represents. + If the tag is a 'script', it directly assigns 'script' as the source type. + For other tags, it searches for the 'as' attribute and + assigns the corresponding type if it matches known resource types. + + Args: + raw (str): The raw HTML tag string. + name (str): The name of the HTML tag. + + Returns: + str or None: The determined source type ('script', 'style', 'font', 'img') + or None if not identified. + """ + src_type = None + if name == 'script': + src_type = 'script' + else: + regex_type = r'(as)="(?P[^"\']+)["\']' + group_type = re.search(regex_type, raw, re.IGNORECASE) + if group_type is not None: + tmp = group_type.group('as').lower() + if tmp in ('style', 'font', 'img', 'script'): + src_type = tmp + return src_type + +def append_with_rel(raw, sri, src_type): + """ + Extracts and processes the 'rel' attribute from an HTML tag. + + This function searches for the 'rel' attribute within the provided HTML tag string. + It assigns the corresponding source type if the 'rel' attribute indicates a stylesheet and + updates the SRI dictionary with the determined type and 'rel' attribute. + + Args: + raw (str): The raw HTML tag string. + sri (dict): The dictionary containing SRI information to be updated. + src_type (str or None): The current source type, + which may be updated based on the 'rel' attribute. + + Returns: + str or None: The value of the 'rel' attribute if found, otherwise None. + """ + link_rel = None + regex_rel = r'(rel)="(?P[^"\']+)["\']' + group_rel = re.search(regex_rel, raw, re.IGNORECASE) + if group_rel is not None: + link_rel = group_rel.group('rel').lower() + if src_type is None and link_rel in ('stylesheet'): + src_type = 'style' + + sri['type'] = src_type + sri['rel'] = link_rel + return link_rel + +def append_sri_errors(name, sri, link_rel): + """ + Validates the use of the integrity attribute in HTML tags and logs errors. + + This function checks if the integrity attribute is used correctly based on the tag name and + its 'rel' attribute. + It updates the SRI dictionary with error messages if the integrity attribute is + used incorrectly and logs warnings. + + Args: + name (str): The name of the HTML tag. + sri (dict): The dictionary containing SRI information to be updated. + link_rel (str or None): The value of the 'rel' attribute of the HTML tag. + + Returns: + None + """ + if name in ('link'): + if link_rel not in ('stylesheet', 'preload', 'modulepreload'): # TODO: Do something when using it incorrectly - sri['error'] = 'Using integrity attribute in combination with unallowed rel attribute value.' - print('WEBSITE WARNING: USING integrity incorrectly!') - elif name not in ('link', 'script'): - # TODO: Do something when using it incorrectly - sri['error'] = 'Using integrity attribute on wrong element type.' + sri['error'] = ( + 'Using integrity attribute in combination ' + 'with unallowed rel attribute value.') print('WEBSITE WARNING: USING integrity incorrectly!') + elif name not in ('link', 'script'): + # TODO: Do something when using it incorrectly + sri['error'] = 'Using integrity attribute on wrong element type.' + print('WEBSITE WARNING: USING integrity incorrectly!') - sri_list.append(sri) +def append_with_src(req_domain, raw, obj): + """ + Extracts the source URL from an HTML tag and updates the SRI object. - return sri_list + This function searches for 'href' or 'src' attributes within the provided HTML tag string, + extracts the URL, and updates the SRI dictionary with the source URL and + a flag indicating if the source is from the same domain. + + Args: + req_domain (str): The domain from which the request originated. + raw (str): The raw HTML tag string. + obj (dict): The dictionary containing SRI information to be updated. + + Returns: + None + """ + src = None + regex_src = r'(href|src)="(?P[^"\']+)["\']' + group_src = re.search(regex_src, raw, re.IGNORECASE) + if group_src is not None: + src = group_src.group('src') + obj['src'] = src + obj['src-same-origin'] = is_same_domain(src, req_domain) def get_sri_candidates(req_domain, content): + """ + Identifies HTML tags that should have Subresource Integrity (SRI) attributes. + + This function searches for 'link' and 'script' tags within the provided HTML content, + determines if they should have SRI attributes based on their attributes and origin, + and returns a list of candidate tags. + + Args: + req_domain (str): The domain from which the request originated. + content (str): The HTML content to be parsed. + + Returns: + list: A list of dictionaries, + each representing a candidate tag that should have an SRI attribute. + """ candidates = [] regex = ( r'(?P<(?Plink|script) [^>]*?>)' @@ -226,55 +325,45 @@ def get_sri_candidates(req_domain, content): raw = match.group('raw') name = match.group('name').lower() - src = None - src_same_origin = False - regex_src = r'(href|src)="(?P[^"\']+)["\']' - group_src = re.search(regex_src, raw, re.IGNORECASE) - if group_src is not None: - src = group_src.group('src') - src_same_origin = is_same_domain(src, req_domain) - - link_rel = None - regex_rel = r'(rel)="(?P[^"\']+)["\']' - group_rel = re.search(regex_rel, raw, re.IGNORECASE) - if group_rel is not None: - link_rel = group_rel.group('rel').lower() + candidate = { + 'raw': raw, + 'tag-name': name + } + append_with_src(req_domain, raw, candidate) + link_rel = append_with_rel(raw, candidate, None) should_have_integrity = False if name in ('link'): if link_rel in ('stylesheet', 'preload', 'modulepreload'): should_have_integrity = True - elif name in ('script') and src is not None: + elif name in ('script') and candidate['src'] is not None: should_have_integrity = True # NOTE: Remove same domain resources - if should_have_integrity and src_same_origin: + if should_have_integrity and candidate['src-same-origin']: should_have_integrity = False if should_have_integrity: - candidates.append({ - 'raw': raw, - 'tag-name': name, - 'src': src, - 'src-same-origin': src_same_origin - }) + candidates.append(candidate) return candidates def is_same_domain(url, domain): + """ + Check if given url is using same domain. + + Args: + url (str): URL to check. + domain (str): Domain to compare with. + + Returns: + bool: True if URL uses same domain, otherwise False. + """ if url.startswith('//'): url = url.replace('//', 'https://') - elif url.startswith('https://'): - url = url - elif '://' in url: - url = url - elif ':' in url: - url = url elif url.startswith('/'): url = url.strip('/') url = f'https://{domain}/{url}' - o = urllib.parse.urlparse(url) - resource_domain = o.hostname - - return domain == resource_domain + parsed_url = urllib.parse.urlparse(url) + return parsed_url.hostname == domain From 8af48c78f933dc92572f7177e25991e6a9f73ddf Mon Sep 17 00:00:00 2001 From: cockroacher <163405488+cockroacher@users.noreply.github.com> Date: Sat, 28 Sep 2024 16:52:11 +0200 Subject: [PATCH 13/20] removed TODO --- helpers/sri_helper.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/helpers/sri_helper.py b/helpers/sri_helper.py index 997c3f2a..c8cfcf21 100644 --- a/helpers/sri_helper.py +++ b/helpers/sri_helper.py @@ -265,13 +265,11 @@ def append_sri_errors(name, sri, link_rel): """ if name in ('link'): if link_rel not in ('stylesheet', 'preload', 'modulepreload'): - # TODO: Do something when using it incorrectly sri['error'] = ( 'Using integrity attribute in combination ' 'with unallowed rel attribute value.') print('WEBSITE WARNING: USING integrity incorrectly!') elif name not in ('link', 'script'): - # TODO: Do something when using it incorrectly sri['error'] = 'Using integrity attribute on wrong element type.' print('WEBSITE WARNING: USING integrity incorrectly!') From 09c16b5701c66ec71e2869d5c03e2d940e2a68d0 Mon Sep 17 00:00:00 2001 From: cockroacher <163405488+cockroacher@users.noreply.github.com> Date: Sat, 28 Sep 2024 16:54:31 +0200 Subject: [PATCH 14/20] removed commented out code --- helpers/sri_helper.py | 5 ----- 1 file changed, 5 deletions(-) diff --git a/helpers/sri_helper.py b/helpers/sri_helper.py index c8cfcf21..4db3ed20 100644 --- a/helpers/sri_helper.py +++ b/helpers/sri_helper.py @@ -107,12 +107,7 @@ def append_sri_data_for_html(req_domain, res, result): content = res['content']['text'] candidates = get_sri_candidates(req_domain, content) - # nice_candidates = json.dumps(candidates, indent=3) - # print('Candidates', nice_candidates) - sri_list = get_sris(req_domain, content) - # nice_sri_list = json.dumps(sri_list, indent=3) - # print('SRI', nice_sri_list) sri_errors = [] From 732e9a6ed94af3f788710ef1c4711363350e51d2 Mon Sep 17 00:00:00 2001 From: cockroacher <163405488+cockroacher@users.noreply.github.com> Date: Sat, 28 Sep 2024 16:56:30 +0200 Subject: [PATCH 15/20] removed commented out code --- helpers/sri_helper.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/helpers/sri_helper.py b/helpers/sri_helper.py index 4db3ed20..54da8922 100644 --- a/helpers/sri_helper.py +++ b/helpers/sri_helper.py @@ -125,9 +125,6 @@ def append_sri_data_for_html(req_domain, res, result): if found_candidate is not None: candidates.remove(found_candidate) - # nice_candidates = json.dumps(candidates, indent=3) - # print('Candidates', nice_candidates) - if len(sri_errors) > 0: append_domain_entry( req_domain, From c74029a68131c11bb7bb5a81c199fd4c1cdfea9c Mon Sep 17 00:00:00 2001 From: cockroacher <163405488+cockroacher@users.noreply.github.com> Date: Sat, 28 Sep 2024 16:57:37 +0200 Subject: [PATCH 16/20] removed debug print --- helpers/sri_helper.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/helpers/sri_helper.py b/helpers/sri_helper.py index 54da8922..9d04b1ff 100644 --- a/helpers/sri_helper.py +++ b/helpers/sri_helper.py @@ -260,10 +260,8 @@ def append_sri_errors(name, sri, link_rel): sri['error'] = ( 'Using integrity attribute in combination ' 'with unallowed rel attribute value.') - print('WEBSITE WARNING: USING integrity incorrectly!') elif name not in ('link', 'script'): sri['error'] = 'Using integrity attribute on wrong element type.' - print('WEBSITE WARNING: USING integrity incorrectly!') def append_with_src(req_domain, raw, obj): """ From 4ef9617d8b0cd47033b877ec49c562f0130744d7 Mon Sep 17 00:00:00 2001 From: cockroacher <163405488+cockroacher@users.noreply.github.com> Date: Mon, 30 Sep 2024 16:31:32 +0200 Subject: [PATCH 17/20] list sri candidate in details mode --- helpers/sri_helper.py | 43 +++++++++++++++++++--- locales/en/LC_MESSAGES/http_validator.mo | Bin 6169 -> 6301 bytes locales/en/LC_MESSAGES/http_validator.po | 7 +++- locales/gov/LC_MESSAGES/http_validator.mo | Bin 6169 -> 6177 bytes locales/gov/LC_MESSAGES/http_validator.po | 7 +++- locales/sv/LC_MESSAGES/http_validator.po | 7 +++- 6 files changed, 52 insertions(+), 12 deletions(-) diff --git a/helpers/sri_helper.py b/helpers/sri_helper.py index 9d04b1ff..f98cbd8d 100644 --- a/helpers/sri_helper.py +++ b/helpers/sri_helper.py @@ -2,7 +2,8 @@ import re import urllib import urllib.parse -from helpers.data_helper import append_domain_entry +from helpers.data_helper import append_domain_entry,\ + append_domain_entry_with_key, has_domain_entry from helpers.setting_helper import get_config from models import Rating @@ -61,12 +62,26 @@ def rate_sri(result_dict, global_translation, local_translation, rating += sub_rating elif 'HTML-FOUND' in result_dict[domain]['features'] and\ (domain in (org_domain, org_www_domain)): + rating = Rating(global_translation, get_config('general.review.improve-only')) rating.set_overall(1.0) - rating.set_standards(1.0, - local_translation('TEXT_REVIEW_SRI_NONE_COMPLIANT').format(domain)) - rating.set_integrity_and_security(1.0, - local_translation('TEXT_REVIEW_SRI_NONE_COMPLIANT').format(domain)) + + if get_config('general.review.details') and \ + has_domain_entry(domain, 'sri-findings', 'sri-candidates', result_dict): + candidates_str_list = '' + candidates = result_dict[domain]['sri-findings']['sri-candidates'] + for candidate in candidates: + candidates_str_list += f" - '{candidate}'\r\n" + + rating.set_integrity_and_security(1.0, + local_translation( + 'TEXT_REVIEW_SRI_NONE_COMPLIANT_DETAILS' + ).format(domain)) + rating.integrity_and_security_review = rating.integrity_and_security_review +\ + candidates_str_list + else: + rating.set_integrity_and_security(1.0, + local_translation('TEXT_REVIEW_SRI_NONE_COMPLIANT').format(domain)) return rating @@ -131,12 +146,28 @@ def append_sri_data_for_html(req_domain, res, result): 'features', 'SRI-WITH-ERRORS', result) + for sri_error in sri_errors: + append_domain_entry_with_key( + req_domain, + 'sri-findings', + 'sri-errors', + sri_error, + result) + elif len(candidates) == 0: append_domain_entry( req_domain, 'features', 'SRI-COMPLIANT', result) + else: + for candidate in candidates: + append_domain_entry_with_key( + req_domain, + 'sri-findings', + 'sri-candidates', + candidate['raw'], + result) def get_sris(req_domain, content): """ @@ -324,7 +355,7 @@ def get_sri_candidates(req_domain, content): if name in ('link'): if link_rel in ('stylesheet', 'preload', 'modulepreload'): should_have_integrity = True - elif name in ('script') and candidate['src'] is not None: + elif name in ('script') and ('src' in candidate and candidate['src'] is not None): should_have_integrity = True # NOTE: Remove same domain resources diff --git a/locales/en/LC_MESSAGES/http_validator.mo b/locales/en/LC_MESSAGES/http_validator.mo index 8c1ca6b5a81630f062f4835617d637fc31c7527b..d4215bef80b30bc12beeff9f41b7d5163c921fd9 100644 GIT binary patch delta 1175 zcmYk)OGs2v9LMp$lld6Uw=<@r-tm!@jgASBF(QPx7KMNq`dSOe2cbJeWXw5vr# za8XiFB52d5XrQD;L}1h+gcdz4Y|$d3O+l>+>HF`Vg~QzW+~5D6*F9%u!}C4a)vEky z;j@n`pqGcAb)P`KNIAI?eHg;c7%`s0Ao&t%|2b^M$JmS?u^s;ygB#s^5G|3coad&4 z1~;$;A7KbT;8rXwa0d*dJ|9I3r%)Gi%RGN*@>|q)Kd=F-3q_i+8yoQ`>i9`)W`3FD zM&IDc|ANRTtSA3P?dUI}Gxnjj8$*55YpCt+qAui_aRqmff8jO^7K?1h!`OirO*~C2-h-N>s9S#(b)p5-g)O4?dx!d_Yp8EnU*ayTA2pAdoH6;j$@eg; z4VFyd3s&ktCRdb-4CRS*VjK0$CJ}a3W>E7y`tdn-;tJ|`1!eBhwV}2f#t_=rgSSz4 z^i3K2uV>}q9~Q=T9Khqa8>g`smvJw8D%>O3kDpms46CR|_|bkRkhRMU>W;-S`#j&|l+D95!CSe(DcUkK!BZe_%tc2)ELNTHlFU zA4c7wbExB8LB29;^8KtSyhJ_YRb+1YZE`V1%@%SBvI}*`2C)lc7(oa7@V%**GpoMw z0BXCm$qObgBTc1c%@k@`o)#jg6P`ppvvgg~oBz7NS@f0_osJBR4jwu(l6&Dzl{l5Y zhEgjrZrSn7WWtWeoMK;VaDBlVOWRz@WO^!*I%~yK6Luo*6#3d}wAJKzJZ2?Q(X?&H RqZ7%?&P{(;Zqaup?;qTagdP9@ delta 1102 zcmYMyO-NKx7{>9}@+-%gn$c!-(&qN1Rx|m5P$t--T8Q8#a-|8`Xi^wBMOxG>3xx>@ zp@=pG8PP(THnoUWNsSS-i8f|bL|CnY2nzZ?-2)9X^E=PE=bn%E-kYu0Te3@$;#uRf zk6#_X>QzerKb6I1Yl%UOVm;PkyR#QJk&mP9pTaFThY2j;He7LrOZ>bWJ+rJ0&}d}9 zMcj;cFon;t34fv{^h*8f2hbyrpf++Db^Vsh&rtI$;|5$^X0{O%SdU$(`TH@!`Zh&F zC%E^o!)y`bm!%4|UQB)VNvHM&_L_FiHN1+cCVxYzKB?BM!TqQ)Yd8 zM56)UqfSy@;pZeOA3~M>ENY<~YGZd%CtN_C>;q~;MFBt8q4Hjr54)UkIfq#dxKAUD z3vR$$JXmD*1Gj2|pc&P)K~x^b7*69(yp7txD{R7VsBzJdU+PY5C6A&ic?0!`3L)y> zLgPCfU6|la>%b$}hSyLpW&u_Dz&f*cY%GNla*#jW=OKHwK~zOfpdP^`)cg;z4L{+2 ztf}%h)L%vYHF1uPW}L?wTt+SY%h^$Fwud~4dIS$pm0Lu<_SyCScKy-yeuY|5^K~O% zJL>X;%QIOTdbV@OTK2@{*Dim-O8WnxN>;&Ol{kg%*oV7t8gmD0AH9npmI(>4| QJChk58`0H#D0ZdjFGK8brT_o{ diff --git a/locales/en/LC_MESSAGES/http_validator.po b/locales/en/LC_MESSAGES/http_validator.po index f83e46b7..2835f3ca 100644 --- a/locales/en/LC_MESSAGES/http_validator.po +++ b/locales/en/LC_MESSAGES/http_validator.po @@ -196,7 +196,10 @@ msgid "TEXT_REVIEW_SRI_COMPLIANT" msgstr "- {0}, Subresource Integrity (SRI)" msgid "TEXT_REVIEW_SRI_NONE_COMPLIANT" -msgstr "- {0}, Needs Subresource Integrity (SRI)" +msgstr "- {0}, Subresource Integrity (SRI) is required" + +msgid "TEXT_REVIEW_SRI_NONE_COMPLIANT_DETAILS" +msgstr "- {0}, Subresource Integrity (SRI) is required for following entries" msgid "TEXT_REVIEW_SRI_WITH_ERRORS" -msgstr "- {0}, Incorrectly using Subresource Integrity (SRI)" +msgstr "- {0}, Subresource Integrity (SRI) is used incorrectly" diff --git a/locales/gov/LC_MESSAGES/http_validator.mo b/locales/gov/LC_MESSAGES/http_validator.mo index 8c1ca6b5a81630f062f4835617d637fc31c7527b..d983dae182bf7721f9236505504d04ecc8936ccb 100644 GIT binary patch delta 157 zcmbPfu+U(`8s=C%1_lOQF$M-RARPmwLHx-;+5ku&g7QB@`AXst`CuRo6k=$C@@GQ% zCxEmtQ2aiS)&OR3r!Xjljc##ELJE=EiBC}N==!}DW)s}5-2TBO;O0qOU^GU TN=+`wshm7PTyOIZu|!4yo82E= delta 147 zcmZ2zFwp!|tY z{sAB@3>3c(q;-Mx>&=`j+Cr>;si`T&lLf?71U>VT^NWg7lS^_c6-tXU^U^`0lNX4q KZ9X8D$Or&xr5nEh diff --git a/locales/gov/LC_MESSAGES/http_validator.po b/locales/gov/LC_MESSAGES/http_validator.po index f83e46b7..2835f3ca 100644 --- a/locales/gov/LC_MESSAGES/http_validator.po +++ b/locales/gov/LC_MESSAGES/http_validator.po @@ -196,7 +196,10 @@ msgid "TEXT_REVIEW_SRI_COMPLIANT" msgstr "- {0}, Subresource Integrity (SRI)" msgid "TEXT_REVIEW_SRI_NONE_COMPLIANT" -msgstr "- {0}, Needs Subresource Integrity (SRI)" +msgstr "- {0}, Subresource Integrity (SRI) is required" + +msgid "TEXT_REVIEW_SRI_NONE_COMPLIANT_DETAILS" +msgstr "- {0}, Subresource Integrity (SRI) is required for following entries" msgid "TEXT_REVIEW_SRI_WITH_ERRORS" -msgstr "- {0}, Incorrectly using Subresource Integrity (SRI)" +msgstr "- {0}, Subresource Integrity (SRI) is used incorrectly" diff --git a/locales/sv/LC_MESSAGES/http_validator.po b/locales/sv/LC_MESSAGES/http_validator.po index 966100e0..880c59d6 100644 --- a/locales/sv/LC_MESSAGES/http_validator.po +++ b/locales/sv/LC_MESSAGES/http_validator.po @@ -196,7 +196,10 @@ msgid "TEXT_REVIEW_SRI_COMPLIANT" msgstr "- {0}, Subresource Integrity (SRI)" msgid "TEXT_REVIEW_SRI_NONE_COMPLIANT" -msgstr "- {0}, Kräver Subresource Integrity (SRI)" +msgstr "- {0}, Subresource Integrity (SRI) krävs" + +msgid "TEXT_REVIEW_SRI_NONE_COMPLIANT_DETAILS" +msgstr "- {0}, Subresource Integrity (SRI) krävs för följande HTML-element" msgid "TEXT_REVIEW_SRI_WITH_ERRORS" -msgstr "- {0}, Använder Subresource Integrity (SRI) på felaktigt sätt" +msgstr "- {0}, Subresource Integrity (SRI) används på felaktigt sätt" From 303d89be7af4276322fab7c79326636cbdeead34 Mon Sep 17 00:00:00 2001 From: cockroacher <163405488+cockroacher@users.noreply.github.com> Date: Mon, 30 Sep 2024 16:40:58 +0200 Subject: [PATCH 18/20] list sri errors in details mode --- helpers/sri_helper.py | 30 +++++++++++++++++------ locales/en/LC_MESSAGES/http_validator.po | 3 +++ locales/gov/LC_MESSAGES/http_validator.po | 3 +++ locales/sv/LC_MESSAGES/http_validator.po | 3 +++ 4 files changed, 31 insertions(+), 8 deletions(-) diff --git a/helpers/sri_helper.py b/helpers/sri_helper.py index f98cbd8d..d066a48d 100644 --- a/helpers/sri_helper.py +++ b/helpers/sri_helper.py @@ -37,14 +37,28 @@ def rate_sri(result_dict, global_translation, local_translation, global_translation, get_config('general.review.improve-only')) sub_rating.set_overall(3.0) - sub_rating.set_standards(3.0, - local_translation( - 'TEXT_REVIEW_SRI_WITH_ERRORS' - ).format(domain)) - sub_rating.set_integrity_and_security(3.0, - local_translation( - 'TEXT_REVIEW_SRI_WITH_ERRORS' - ).format(domain)) + + if get_config('general.review.details') and \ + has_domain_entry(domain, 'sri-findings', 'sri-errors', result_dict): + errors_str_list = '' + errors = result_dict[domain]['sri-findings']['sri-errors'] + for error in errors: + errors_str_list += f" - '{error}'\r\n" + + sub_rating.set_standards(3.0, + local_translation( + 'TEXT_REVIEW_SRI_WITH_ERRORS_DETAILS' + ).format(domain)) + + rating.standards_review = rating.standards_review +\ + errors_str_list + else: + sub_rating.set_standards(3.0, + local_translation( + 'TEXT_REVIEW_SRI_WITH_ERRORS' + ).format(domain)) + + rating += sub_rating elif 'SRI-COMPLIANT' in result_dict[domain]['features']: sub_rating = Rating( diff --git a/locales/en/LC_MESSAGES/http_validator.po b/locales/en/LC_MESSAGES/http_validator.po index 2835f3ca..1c5341d3 100644 --- a/locales/en/LC_MESSAGES/http_validator.po +++ b/locales/en/LC_MESSAGES/http_validator.po @@ -203,3 +203,6 @@ msgstr "- {0}, Subresource Integrity (SRI) is required for following entries" msgid "TEXT_REVIEW_SRI_WITH_ERRORS" msgstr "- {0}, Subresource Integrity (SRI) is used incorrectly" + +msgid "TEXT_REVIEW_SRI_WITH_ERRORS" +msgstr "- {0}, Subresource Integrity (SRI) has following errors" diff --git a/locales/gov/LC_MESSAGES/http_validator.po b/locales/gov/LC_MESSAGES/http_validator.po index 2835f3ca..1c5341d3 100644 --- a/locales/gov/LC_MESSAGES/http_validator.po +++ b/locales/gov/LC_MESSAGES/http_validator.po @@ -203,3 +203,6 @@ msgstr "- {0}, Subresource Integrity (SRI) is required for following entries" msgid "TEXT_REVIEW_SRI_WITH_ERRORS" msgstr "- {0}, Subresource Integrity (SRI) is used incorrectly" + +msgid "TEXT_REVIEW_SRI_WITH_ERRORS" +msgstr "- {0}, Subresource Integrity (SRI) has following errors" diff --git a/locales/sv/LC_MESSAGES/http_validator.po b/locales/sv/LC_MESSAGES/http_validator.po index 880c59d6..5578d4cd 100644 --- a/locales/sv/LC_MESSAGES/http_validator.po +++ b/locales/sv/LC_MESSAGES/http_validator.po @@ -203,3 +203,6 @@ msgstr "- {0}, Subresource Integrity (SRI) krävs för följande HTML-element" msgid "TEXT_REVIEW_SRI_WITH_ERRORS" msgstr "- {0}, Subresource Integrity (SRI) används på felaktigt sätt" + +msgid "TEXT_REVIEW_SRI_WITH_ERRORS" +msgstr "- {0}, Subresource Integrity (SRI) har följande fel" From c25f32b126cb3ffc823e4908d795f07035bd42bc Mon Sep 17 00:00:00 2001 From: cockroacher <163405488+cockroacher@users.noreply.github.com> Date: Mon, 30 Sep 2024 16:43:39 +0200 Subject: [PATCH 19/20] fixed sri review candidate output --- helpers/sri_helper.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/helpers/sri_helper.py b/helpers/sri_helper.py index d066a48d..c833d660 100644 --- a/helpers/sri_helper.py +++ b/helpers/sri_helper.py @@ -43,7 +43,7 @@ def rate_sri(result_dict, global_translation, local_translation, errors_str_list = '' errors = result_dict[domain]['sri-findings']['sri-errors'] for error in errors: - errors_str_list += f" - '{error}'\r\n" + errors_str_list += f" - {error}\r\n" sub_rating.set_standards(3.0, local_translation( @@ -85,7 +85,7 @@ def rate_sri(result_dict, global_translation, local_translation, candidates_str_list = '' candidates = result_dict[domain]['sri-findings']['sri-candidates'] for candidate in candidates: - candidates_str_list += f" - '{candidate}'\r\n" + candidates_str_list += f" - `{candidate}`\r\n" rating.set_integrity_and_security(1.0, local_translation( From b04acec1234a3705cf2477fa5c367edb6e0caec2 Mon Sep 17 00:00:00 2001 From: cockroacher <163405488+cockroacher@users.noreply.github.com> Date: Mon, 30 Sep 2024 16:51:06 +0200 Subject: [PATCH 20/20] fix missing translations --- locales/en/LC_MESSAGES/http_validator.mo | Bin 6301 -> 6302 bytes locales/gov/LC_MESSAGES/http_validator.mo | Bin 6177 -> 6302 bytes locales/sv/LC_MESSAGES/http_validator.mo | Bin 6154 -> 6266 bytes 3 files changed, 0 insertions(+), 0 deletions(-) diff --git a/locales/en/LC_MESSAGES/http_validator.mo b/locales/en/LC_MESSAGES/http_validator.mo index d4215bef80b30bc12beeff9f41b7d5163c921fd9..b72be62d1a8b5984827dc5d026f7179ea30cfe07 100644 GIT binary patch delta 124 zcmbPhIL~my5#~^H1_p*3Vhjv=Kw3bYfx!Ss+XHD3zW~ag59MC~(m7GnSa delta 123 zcmbPdIM;B)5#~@c1_p*3Vhjv=K$>5ifx!Ss+W~11KOf4U2j!m!(m sNDBkSqk*(8NZ)2jmPbOunZ*jF#i=O@nR&_iMMbH}B{`Lw`NjJg0f6Tf=>Px# diff --git a/locales/gov/LC_MESSAGES/http_validator.mo b/locales/gov/LC_MESSAGES/http_validator.mo index d983dae182bf7721f9236505504d04ecc8936ccb..b72be62d1a8b5984827dc5d026f7179ea30cfe07 100644 GIT binary patch delta 1151 zcmYk)OGs2<7{>8eGcRMAsm)C0WwbFXZ=*#SL}radl(kT56NTiMf_XVswwrdfh&F>_ zLUw^RZHk1Fh>*ajMHDT%SlFUPM4N(I71jUgd<%y;^E=ObzT5e}xgMKpOfD2;j2WMu zTqU~F`C0TS$uL_%F2x|$;cARKk75P+EUNz`uEqOUj_p3b`gspV%#wDJn`#h-A&X&W}Wj`Oa6fyup-B7Bksd$Jmd0|GV|Lr zZr0&<)YgRqe%_AC$5C5<9yQSvYGE^|es553`Umxf#ku~%no+sKPp6?&QF8s*Cy0L(IoFDZ&jI7u4is^=HA9iCXLs2 zK8<|p7AgIIRhF1l6RR+eO}HF4xqEOG`3UOy30#d+n8YHk#V>AjNszZ-$}DI5X|yun z3^wC!OyhHG!=I=LQ>DT0J26E*jM~U~)bBSvKSj+mk1KF-nb}HAViWeF=Fei1^=*QN zPH^{MhuJJPkiViHEGswbz!ucFeW;U;p~g+3HZtwLz;^NnT!+!6X6vyBTXD#9UYYgn z5semnhdN1lMUdN3xerzP)2M~=sEyr0op1(qviGPBg~CB@MCEOs_j(@noX4C7+@}%6 z86WTlcZJM;;2KR(WkxlvAC-qNffsNC-a>8Q6}I6w)VO#gD0L5J$it{gUPE0`F+%-2 zX?&-n7nA(cy72&Z;T6;$vxq8vxW;UbjioV0uHsA2rI5W^KdK@}QCDydHUC5G!jHHE z>uZAzWoxOwCeG8*fzw!z^QeV?x!rYUTgl_7D|mpa+${38Pu~CA`{T=k3T069?MB{q z(DRt*OF0_4+bLu%d+hnO=g(M4{~uJzDj2*G)3_P4o-dd=pv$&GgXB}F&rji6oW*)vz!3g22AA4-Ck8|^lIEt7 z4wrBZ&R_)J;Cd`6wg#Xmeo54ooU(^-WmDz=Lpyoc4M@*hD`3`2Z z!xPi+9^E>S$rZ~*qD3N2sFjbDi?FLQi5A{KE$Agi@Dm2G)@84FGls|sY{CoJhO^j; zpIz*~uB4tfY$I;NEYLCw@$W7V-o2?5bDV19zcLyw7+O_Oizxwp#+p`I%9@q=p8D4d delta 1109 zcmYMyT}abW7{~F)G^b82x2a^>{*`H&FVl=lp*JnCpqtXGB9hds5Yc4?(efr@kObjP zR}r%!B7#-amC!)x#rx{QUN3^8iwL41>ihG5puzSzzyHq8dCqh8JC=+k=0iD?#%mkz zO5Tx0O8;MlIc7_UMOcPaxC~pJ`*0=s80z;)T#dIdiZ5^tesPv8_VXt6%o28pK`j%` z<0`y~4fqV#;!o6qUatRrCwk-oR3Vp8-`{X~2DQ#RT#nfRvlSS{D%^@%{~$)$-;xY; zf;$Tj%wA#z`77#&fjqM&tU=A|L7nsrYThKOko(T(SWo_h>#!u>tPb0;7W-YEP-cI7 z!k`A$d-7b&1Jb?*KxXYjf zXWfL?xGT%-2dd(>A~U*ay%@tmR3X=}0Uw};AFvLyi~T!p#P#F@*oqgh1s|dg@VS`& zH!}$E58H`bupQ6gMtqDN_ziWZElbU&DXbe+cxlM26JtoNb_&(8any!W*o1FzGZuu+ zy08Pa@8vN4S0&Rt(4Ebr77j-Ijcc68aSzY$p-%n_^&beAQUu2M&{gb4T~#kqqn$;~ zPa=o5l*?&U$LA6ZD9+wuA%1uHugf7;qb7@@ITp?g{*IxsOls)pLaK~- b!