# -*- coding: utf-8 -*-
import re
archivo = open("mail.txt", "r")
resultado = open("destino.txt","a")
emails =
body_regex = re.compile('''
^(?!\.) # name may not begin with a dot
(
[-a-z0-9!\#$%&'*+/=?^_`{|}~] # all legal characters except dot
|
(? )+
(? ''', re.VERBOSE | re.IGNORECASE)
domain_regex = re.compile('''
(
localhost
|
(
[a-z0-9]
# [sub]domain begins with alphanumeric
(
[-\w]* # alphanumeric, underscore, dot, hyphen
[a-z0-9] # ending alphanumeric
)?
\. # ending dot
)+
[a-z]{2,} # TLD alpha-only
)$
''', re.VERBOSE | re.IGNORECASE)
def is_valid_email(email):
if not isinstance(email, str) or not email or '@' not in email:
return False
body, domain = email.rsplit('@', 1)
match_body = body_regex.match(body)
match_domain = domain_regex.match(domain)
if not match_domain:
# check for Internationalized Domain Names
# see https://docs.python.org/2/library/codecs.html#module-encodings.idna
try:
domain_encoded = domain.encode('idna').decode('ascii')
except UnicodeError:
return False
match_domain = domain_regex.match(domain_encoded)
return (match_body is not None) and (match_domain is not None)
for email in emails:
emails = list()
if is_valid_email(email) and not email in emails:
emails.append(email)
resultado.write(email + '\n')
resultado.close()
archivo.