Update custom_recognizers.py
This commit is contained in:
@@ -10,19 +10,16 @@ class BelgianNrnRecognizer(PatternRecognizer):
|
|||||||
|
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
patterns = [
|
patterns = [
|
||||||
# Format standard : 12.34.56-789.01
|
|
||||||
Pattern(
|
Pattern(
|
||||||
name="NRN format standard",
|
name="NRN format standard",
|
||||||
regex=r"\b\d{2}\.\d{2}\.\d{2}-\d{3}\.\d{2}\b",
|
regex=r"\b\d{2}\.\d{2}\.\d{2}-\d{3}\.\d{2}\b",
|
||||||
score=1.0
|
score=1.0
|
||||||
),
|
),
|
||||||
# Format compact : 12345678901
|
|
||||||
Pattern(
|
Pattern(
|
||||||
name="NRN format compact",
|
name="NRN format compact",
|
||||||
regex=r"\b\d{11}\b",
|
regex=r"\b\d{11}\b",
|
||||||
score=0.7
|
score=0.7
|
||||||
),
|
),
|
||||||
# Format avec espaces : 12 34 56 789 01
|
|
||||||
Pattern(
|
Pattern(
|
||||||
name="NRN format espacé",
|
name="NRN format espacé",
|
||||||
regex=r"\b\d{2}\s\d{2}\s\d{2}\s\d{3}\s\d{2}\b",
|
regex=r"\b\d{2}\s\d{2}\s\d{2}\s\d{3}\s\d{2}\b",
|
||||||
@@ -42,13 +39,11 @@ class BelgianEnterpriseRecognizer(PatternRecognizer):
|
|||||||
|
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
patterns = [
|
patterns = [
|
||||||
# Format standard : BE0123.456.789
|
|
||||||
Pattern(
|
Pattern(
|
||||||
name="BTW/TVA format standard",
|
name="BTW/TVA format standard",
|
||||||
regex=r"\bBE\s?0\d{3}\.\d{3}\.\d{3}\b",
|
regex=r"\bBE\s?0\d{3}\.\d{3}\.\d{3}\b",
|
||||||
score=0.95
|
score=0.95
|
||||||
),
|
),
|
||||||
# Format sans points : BE0123456789
|
|
||||||
Pattern(
|
Pattern(
|
||||||
name="BTW/TVA format compact",
|
name="BTW/TVA format compact",
|
||||||
regex=r"\bBE\s?0\d{9}\b",
|
regex=r"\bBE\s?0\d{9}\b",
|
||||||
@@ -68,7 +63,6 @@ class BelgianBankAccountRecognizer(PatternRecognizer):
|
|||||||
|
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
patterns = [
|
patterns = [
|
||||||
# Format belge : 123-4567890-12
|
|
||||||
Pattern(
|
Pattern(
|
||||||
name="Compte bancaire belge",
|
name="Compte bancaire belge",
|
||||||
regex=r"\b\d{3}-\d{7}-\d{2}\b",
|
regex=r"\b\d{3}-\d{7}-\d{2}\b",
|
||||||
@@ -88,13 +82,11 @@ class ImprovedIbanRecognizer(PatternRecognizer):
|
|||||||
|
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
patterns = [
|
patterns = [
|
||||||
# IBAN avec espaces
|
|
||||||
Pattern(
|
Pattern(
|
||||||
name="IBAN avec espaces",
|
name="IBAN avec espaces",
|
||||||
regex=r"\b[A-Z]{2}\d{2}(?:\s\d{4}){3,7}(?:\s\d{1,4})?\b",
|
regex=r"\b[A-Z]{2}\d{2}(?:\s\d{4}){3,7}(?:\s\d{1,4})?\b",
|
||||||
score=0.95
|
score=0.95
|
||||||
),
|
),
|
||||||
# IBAN sans espaces
|
|
||||||
Pattern(
|
Pattern(
|
||||||
name="IBAN compact",
|
name="IBAN compact",
|
||||||
regex=r"\b[A-Z]{2}\d{2}[A-Z0-9]{4,32}\b",
|
regex=r"\b[A-Z]{2}\d{2}[A-Z0-9]{4,32}\b",
|
||||||
@@ -114,19 +106,16 @@ class ImprovedPhoneRecognizer(PatternRecognizer):
|
|||||||
|
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
patterns = [
|
patterns = [
|
||||||
# Format international : +32 1 23 45 67 89
|
|
||||||
Pattern(
|
Pattern(
|
||||||
name="Téléphone international",
|
name="Téléphone international",
|
||||||
regex=r"\b(?:\+|00)(?:32|33|352)\s?[1-9](?:[\s.-]?\d{2}){3,4}\b",
|
regex=r"\b(?:\+|00)(?:32|33|352)\s?[1-9](?:[\s.-]?\d{2}){3,4}\b",
|
||||||
score=0.9
|
score=0.9
|
||||||
),
|
),
|
||||||
# Format national : 01 23 45 67 89
|
|
||||||
Pattern(
|
Pattern(
|
||||||
name="Téléphone national",
|
name="Téléphone national",
|
||||||
regex=r"\b0[1-9](?:[\s.-]?\d{2}){4}\b",
|
regex=r"\b0[1-9](?:[\s.-]?\d{2}){4}\b",
|
||||||
score=0.8
|
score=0.8
|
||||||
),
|
),
|
||||||
# Format mobile belge : 04xx xx xx xx
|
|
||||||
Pattern(
|
Pattern(
|
||||||
name="Mobile belge",
|
name="Mobile belge",
|
||||||
regex=r"\b04\d{2}[\s.-]?\d{2}[\s.-]?\d{2}[\s.-]?\d{2}\b",
|
regex=r"\b04\d{2}[\s.-]?\d{2}[\s.-]?\d{2}[\s.-]?\d{2}\b",
|
||||||
@@ -146,13 +135,11 @@ class FrenchNIRRecognizer(PatternRecognizer):
|
|||||||
|
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
patterns = [
|
patterns = [
|
||||||
# Format avec espaces : 1 23 04 75 123 456 78
|
|
||||||
Pattern(
|
Pattern(
|
||||||
name="NIR avec espaces",
|
name="NIR avec espaces",
|
||||||
regex=r"\b[12]\s?\d{2}\s?(?:0[1-9]|1[0-2])\s?(?:2[ABab]|[0-9]{2})\s?\d{3}\s?\d{3}\s?\d{2}\b",
|
regex=r"\b[12]\s?\d{2}\s?(?:0[1-9]|1[0-2])\s?(?:2[ABab]|[0-9]{2})\s?\d{3}\s?\d{3}\s?\d{2}\b",
|
||||||
score=1.0
|
score=1.0
|
||||||
),
|
),
|
||||||
# Format compact : 12304751234567
|
|
||||||
Pattern(
|
Pattern(
|
||||||
name="NIR compact",
|
name="NIR compact",
|
||||||
regex=r"\b[12]\d{2}(?:0[1-9]|1[0-2])(?:2[ABab]|[0-9]{2})\d{6}\d{2}\b",
|
regex=r"\b[12]\d{2}(?:0[1-9]|1[0-2])(?:2[ABab]|[0-9]{2})\d{6}\d{2}\b",
|
||||||
@@ -172,7 +159,6 @@ class ImprovedEmailRecognizer(PatternRecognizer):
|
|||||||
|
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
patterns = [
|
patterns = [
|
||||||
# Email standard avec domaines courants
|
|
||||||
Pattern(
|
Pattern(
|
||||||
name="Email standard",
|
name="Email standard",
|
||||||
regex=r"\b[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}\b",
|
regex=r"\b[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}\b",
|
||||||
@@ -187,8 +173,10 @@ class ImprovedEmailRecognizer(PatternRecognizer):
|
|||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
# Liste des reconnaisseurs à charger
|
# Fonction pour créer les instances des reconnaisseurs
|
||||||
custom_recognizers = [
|
def get_custom_recognizers():
|
||||||
|
"""Retourne la liste des reconnaisseurs personnalisés instanciés."""
|
||||||
|
return [
|
||||||
BelgianNrnRecognizer(),
|
BelgianNrnRecognizer(),
|
||||||
BelgianEnterpriseRecognizer(),
|
BelgianEnterpriseRecognizer(),
|
||||||
BelgianBankAccountRecognizer(),
|
BelgianBankAccountRecognizer(),
|
||||||
@@ -196,4 +184,8 @@ custom_recognizers = [
|
|||||||
ImprovedPhoneRecognizer(),
|
ImprovedPhoneRecognizer(),
|
||||||
FrenchNIRRecognizer(),
|
FrenchNIRRecognizer(),
|
||||||
ImprovedEmailRecognizer()
|
ImprovedEmailRecognizer()
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|
||||||
|
# Variable pour la compatibilité avec la configuration YAML
|
||||||
|
custom_recognizers = get_custom_recognizers()
|
||||||
|
|||||||
Reference in New Issue
Block a user