Facebook
From crisyelit, 3 Years ago, written in Plain Text.
This paste is a reply to valueEmail con python from crisyelit - view diff
Embed
Download Paste or View Raw
Hits: 307
  1. #!/usr/bin/env python
  2. # -*- coding: utf-8 -*-
  3. import re
  4.  
  5. archivo = open("mail.txt", "r")
  6. resultado = open("destino.txt","a")
  7. emails = list(set([linea.strip() for linea in archivo]))
  8.  
  9. body_regex = re.compile('''
  10.     ^(?!\.)                            # name may not begin with a dot
  11.     (
  12.       [-a-z0-9!\#$%&'*+/=?^_`{|}~]     # all legal characters except dot
  13.       |
  14.       (?<!\.)\.                        # single dots only
  15.     )+
  16.     (?<!\.)$                            # name may not end with a dot
  17. ''', re.VERBOSE | re.IGNORECASE)
  18. domain_regex = re.compile('''
  19.     (
  20.       localhost
  21.       |
  22.       (
  23.         [a-z0-9]
  24.             # [sub]domain begins with alphanumeric
  25.         (
  26.           [-\w]*                         # alphanumeric, underscore, dot, hyphen
  27.           [a-z0-9]                       # ending alphanumeric
  28.         )?
  29.       \.                               # ending dot
  30.       )+
  31.       [a-z]{2,}                        # TLD alpha-only
  32.    )$
  33. ''', re.VERBOSE | re.IGNORECASE)
  34.  
  35. def is_valid_email(email):
  36.     if not isinstance(email, str) or not email or '@' not in email:
  37.         return False
  38.    
  39.     body, domain = email.rsplit('@', 1)
  40.  
  41.     match_body = body_regex.match(body)
  42.     match_domain = domain_regex.match(domain)
  43.  
  44.     if not match_domain:
  45.         # check for Internationalized Domain Names
  46.         # see https://docs.python.org/2/library/codecs.html#module-encodings.idna
  47.         try:
  48.             domain_encoded = domain.encode('idna').decode('ascii')
  49.         except UnicodeError:
  50.             return False
  51.         match_domain = domain_regex.match(domain_encoded)
  52.  
  53.     return (match_body is not None) and (match_domain is not None)
  54.  
  55. for email in emails:
  56.   emails = list()
  57.   if is_valid_email(email)  and not email in emails:
  58.     emails.append(email)
  59.     resultado.write(email + '\n')
  60.  
  61. resultado.close()
  62. archivo.close()