Issue
I have two files
main.py
#!/usr/bin/env python3
# coding=utf-8
import logging
import mailbox
import module
import os
import sys
from mailbox import Maildir, MaildirMessage
from kafka import KafkaProducer
from kafka.errors import KafkaError
from kafka.producer.future import FutureRecordMetadata
LOG_LEVEL: logging = os.environ.get('CATCHALL_LOG_LEVEL', 'WARNING').upper()
logging.basicConfig(
level=LOG_LEVEL,
format='%(asctime)s %(levelname)s %(name)s %(lineno)d: %(message)s',
filename='/var/log/email-catcher.log'
)
logging.getLogger('kafka').setLevel(logging.WARNING)
MAILBOX_DIRECTORY: str or None = os.environ.get('CATCHALL_MAILBOX_DIRECTORY')
if not MAILBOX_DIRECTORY:
logging.critical(
"CATCHALL_MAILBOX_DIRECTORY env var not found. Exiting ...")
sys.exit(1)
BACKUP_MAILBOX_DIRECTORY: str or None = os.environ.get('CATCHALL_BACKUP_MAILBOX_DIRECTORY')
if not BACKUP_MAILBOX_DIRECTORY:
logging.critical(
"CATCHALL_BACKUP_MAILBOX_DIRECTORY env var not found. Exiting ...")
sys.exit(1)
KAFKA_SERVERS: str or None = os.environ.get('CATCHALL_KAFKA_SERVERS')
if not KAFKA_SERVERS:
logging.critical("KAFKA_SERVERS_LIST env var not found. Exiting ...")
sys.exit(1)
KAFKA_SERVERS_LIST: list = KAFKA_SERVERS.split(',')
KAFKA_TOPIC = os.environ.get('CATCHALL_KAFKA_TOPIC', 'best-booking-emails')
def put_mail_in_kafka_topic(from_address: str, mail: str, kafka_producer: KafkaProducer) -> bool:
"""
:rtype: bool
:param from_address: string
:param mail: string
:param kafka_producer: KafkaProducer
"""
logging.debug('connected to kafka producer')
try:
future: FutureRecordMetadata = kafka_producer.send(
topic=KAFKA_TOPIC,
key=from_address.encode(),
value=mail.encode()
)
future.get(timeout=60)
return True
except KafkaError:
logging.exception('failed to publish mail on kafka topic')
return False
def backup_mail(mail: mailbox.MaildirMessage) -> None:
"""
:rtype: None
:param mail: mailbox.MaildirMessage
"""
backup_mbox.lock()
try:
backup_mbox.add(mail)
except Exception as ex:
logging.error(ex)
finally:
backup_mbox.flush()
backup_mbox.unlock()
def read_mails() -> None:
"""
:rtype: None
"""
mbox: Maildir = mailbox.Maildir(MAILBOX_DIRECTORY)
if not len(mbox.items()):
logging.info('no new emails found in mbox')
return
processed_email_count: int = 0
kafka_producer: KafkaProducer = KafkaProducer(
bootstrap_servers=KAFKA_SERVERS_LIST,
compression_type='gzip'
)
mbox.lock()
try:
item_keys_to_remove: list = []
for key, mail in mbox.iteritems():
logging.info(f"new mail from {mail['From']} with subject: {mail['Subject']}")
filtered_mail: tuple[object, MaildirMessage] or None = module.filter_mail(mail, logging)
if filtered_mail is not None:
logging.info(f"Filtered mail from {mail['From']} with subject: {mail['Subject']}")
if put_mail_in_kafka_topic(filtered_mail[0], filtered_mail[1].as_string(), kafka_producer):
item_keys_to_remove.append(key)
backup_mail(mail)
processed_email_count += 1
else:
logging.critical(f"Failed to filter mail or publish mail on kafka topic")
for key in item_keys_to_remove:
mbox.remove(key)
finally:
mbox.flush()
mbox.unlock()
mbox.close()
kafka_producer.flush()
logging.info(f"mail processed: {processed_email_count}")
if __name__ == '__main__':
backup_mbox: Maildir = mailbox.Maildir(BACKUP_MAILBOX_DIRECTORY)
try:
logging.info("Job started...")
read_mails()
logging.info("Job finished")
finally:
backup_mbox.close()
module.py
# coding=utf-8
import logging
import re
import unicodedata
from mailbox import MaildirMessage
from bs4 import BeautifulSoup, ResultSet, Tag
def filter_mail(mail: MaildirMessage, log: logging) -> tuple[object, MaildirMessage] or None:
""" Filter the mail to only include messages that have need to be sent
:rtype: tuple[object, MaildirMessage] or None
:param mail: MaildirMessage
:param log: logging
"""
try:
parsed_email: BeautifulSoup = BeautifulSoup(mail._payload, 'html.parser')
original_from_email_id: object = \
re.findall(r"([a-zA-Z0-9._-]+@[a-zA-Z0-9._-]+\.[a-zA-Z0-9_-]+)", mail['From'])[0]
# Extracting the orignal mail sender
from_element: str = parsed_email.find_all("br")[0].parent.text
from_element = unicodedata.normalize("NFKD", from_element)
from_element = from_element.replace("\n", " ")
from_element = from_element.replace("=2E", ".")
emails_from: list = re.findall(r"([a-zA-Z0-9._-]+@[a-zA-Z0-9._-]+\.[a-zA-Z0-9_-]+)", from_element)
# Modifing email headers
email_subject: str = mail['Subject'].replace("Fw: ", "")
mail.replace_header("From", emails_from[0])
mail.replace_header("Subject", email_subject)
# Removing html block from email content
html_block_to_be_removed_element: ResultSet[Tag] = parsed_email.select('div[style=\'3D"margin:0\']')
html_block_to_be_removed: str = str(html_block_to_be_removed_element).removeprefix('[').removesuffix(']')
mail._payload = str(parsed_email).replace(html_block_to_be_removed, '')
return original_from_email_id, mail
except Exception as e:
log.critical(f"Failed to Filter email, Exception Message - " + str(e))
return None
I am unable to solve the error
Traceback (most recent call last):
File "main.py", line 5, in <module>
import module
File "/Users/manav.mehta/Documents/Projects/email-catcher/module.py", line 9, in <module>
def filter_mail(mail: MaildirMessage, log: logging) -> tuple[object, MaildirMessage] or None:
TypeError: 'type' object is not subscriptable
I am using python 3.8, I have tried every type hinting possible I am unable to understand what is the issue, I believe it could be because of the
filtered_mail: tuple[object, MaildirMessage] or None = module.filter_mail(mail, logging)
and then I am using the first object as index which creating the issue.
if put_mail_in_kafka_topic(filtered_mail[0], filtered_mail[1].as_string(), kafka_producer):
But, I am not sure what is it. I think it could be because from the below line
original_from_email_id: object =
re.findall(r"([a-zA-Z0-9.-]+@[a-zA-Z0-9.-]+.[a-zA-Z0-9_-]+)", mail['From'])[0]
Solution
Your Python version is not compatible with subscripting built-in types, which is introduced on Python 3.9.
In your case, you may replace tuple[...]
with
import typing
typing.Tuple[...]
or just use a special import keeping your original code, as brilliantly noted by @Taavi:
from __future__ import annotations
tuple[...]
Other thing is that you can't do
tuple[object, MaildirMessage] or None
Instead, you must do
tuple[object, MaildirMessage] | None
Answered By - enzo
0 comments:
Post a Comment
Note: Only a member of this blog may post a comment.