unicode normalize in sanitize function

This commit is contained in:
Tomáš Mládek 2019-05-23 14:36:12 +02:00
parent 89db7e885a
commit 71a5fd3522

View file

@ -8,6 +8,7 @@ import re
import shutil import shutil
import sys import sys
import tempfile import tempfile
import unicodedata
from configparser import ConfigParser from configparser import ConfigParser
from datetime import datetime, timedelta from datetime import datetime, timedelta
from glob import glob from glob import glob
@ -92,10 +93,12 @@ class DelojzaBot:
return True return True
return False return False
# https://github.com/django/django/blob/master/django/utils/text.py#L393
@staticmethod @staticmethod
def sanitize(filepath): def sanitize(filepath):
if filepath is None: if filepath is None:
return None return None
filepath = unicodedata.normalize('NFKD', filepath).encode('ascii', 'ignore').decode('ascii')
return re.sub(r'[^\w.()\[\]{}#-]', '_', filepath) return re.sub(r'[^\w.()\[\]{}#-]', '_', filepath)
def tag_file(self, filepath, message, info=None): def tag_file(self, filepath, message, info=None):