From 71a5fd3522c3ca6bf0ff3b5f5246f0fc8c8bca5d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tom=C3=A1=C5=A1=20Ml=C3=A1dek?= Date: Thu, 23 May 2019 14:36:12 +0200 Subject: [PATCH] unicode normalize in sanitize function --- delojza.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/delojza.py b/delojza.py index 507dc1a..004d42b 100755 --- a/delojza.py +++ b/delojza.py @@ -8,6 +8,7 @@ import re import shutil import sys import tempfile +import unicodedata from configparser import ConfigParser from datetime import datetime, timedelta from glob import glob @@ -92,10 +93,12 @@ class DelojzaBot: return True return False + # https://github.com/django/django/blob/master/django/utils/text.py#L393 @staticmethod def sanitize(filepath): if filepath is None: return None + filepath = unicodedata.normalize('NFKD', filepath).encode('ascii', 'ignore').decode('ascii') return re.sub(r'[^\w.()\[\]{}#-]', '_', filepath) def tag_file(self, filepath, message, info=None):