Add extract_email.py
This commit is contained in:
commit
c7ac1ea6a2
112
extract_email.py
Normal file
112
extract_email.py
Normal file
@ -0,0 +1,112 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
import sys
|
||||||
|
import os
|
||||||
|
import json
|
||||||
|
import email
|
||||||
|
from email import policy
|
||||||
|
from email.parser import BytesParser
|
||||||
|
from email.utils import getaddresses
|
||||||
|
|
||||||
|
def main():
|
||||||
|
if len(sys.argv) < 2:
|
||||||
|
print("Usage: extract_eml.py <file.eml>")
|
||||||
|
sys.exit(1)
|
||||||
|
|
||||||
|
eml_file = sys.argv[1]
|
||||||
|
|
||||||
|
if not os.path.isfile(eml_file):
|
||||||
|
print(f"Error: file not found: {eml_file}")
|
||||||
|
sys.exit(1)
|
||||||
|
|
||||||
|
base = os.path.splitext(eml_file)[0]
|
||||||
|
|
||||||
|
# Read the email
|
||||||
|
with open(eml_file, "rb") as f:
|
||||||
|
msg = BytesParser(policy=policy.default).parse(f)
|
||||||
|
|
||||||
|
# ---------------------------
|
||||||
|
# Extract Subject
|
||||||
|
# ---------------------------
|
||||||
|
subject = msg.get("subject", "").strip()
|
||||||
|
|
||||||
|
with open(f"{base}.subject.txt", "w", encoding="utf-8", errors="ignore") as f:
|
||||||
|
f.write(subject)
|
||||||
|
|
||||||
|
# ---------------------------
|
||||||
|
# Extract Text Body
|
||||||
|
# ---------------------------
|
||||||
|
body_text = None
|
||||||
|
|
||||||
|
if msg.is_multipart():
|
||||||
|
for part in msg.walk():
|
||||||
|
if part.get_content_type() == "text/plain":
|
||||||
|
try:
|
||||||
|
body_text = part.get_payload(decode=True).decode(errors="ignore")
|
||||||
|
except:
|
||||||
|
pass
|
||||||
|
break
|
||||||
|
else:
|
||||||
|
if msg.get_content_type() == "text/plain":
|
||||||
|
try:
|
||||||
|
body_text = msg.get_payload(decode=True).decode(errors="ignore")
|
||||||
|
except:
|
||||||
|
pass
|
||||||
|
|
||||||
|
if body_text is None:
|
||||||
|
body_text = "(No text/plain body found)"
|
||||||
|
|
||||||
|
with open(f"{base}.txt", "w", encoding="utf-8", errors="ignore") as f:
|
||||||
|
f.write(body_text)
|
||||||
|
|
||||||
|
# ---------------------------
|
||||||
|
# Extract Attachments
|
||||||
|
# ---------------------------
|
||||||
|
attachment_counter = 1
|
||||||
|
|
||||||
|
for part in msg.walk():
|
||||||
|
if part.get_content_disposition() == "attachment":
|
||||||
|
filename = part.get_filename()
|
||||||
|
if not filename:
|
||||||
|
filename = f"attachment{attachment_counter:02d}.bin"
|
||||||
|
|
||||||
|
_, ext = os.path.splitext(filename)
|
||||||
|
if ext:
|
||||||
|
outname = f"{base}.attachment{attachment_counter:02d}{ext}"
|
||||||
|
else:
|
||||||
|
outname = f"{base}.attachment{attachment_counter:02d}"
|
||||||
|
|
||||||
|
data = part.get_payload(decode=True)
|
||||||
|
|
||||||
|
with open(outname, "wb") as f:
|
||||||
|
f.write(data)
|
||||||
|
|
||||||
|
attachment_counter += 1
|
||||||
|
|
||||||
|
# ---------------------------
|
||||||
|
# Extract Addresses
|
||||||
|
# ---------------------------
|
||||||
|
from_addr = msg.get("from", "")
|
||||||
|
to_addr = msg.get_all("to", [])
|
||||||
|
cc_addr = msg.get_all("cc", [])
|
||||||
|
bcc_addr = msg.get_all("bcc", [])
|
||||||
|
|
||||||
|
# Normalize/parse multiple addresses
|
||||||
|
from_parsed = getaddresses([from_addr])
|
||||||
|
to_parsed = getaddresses(to_addr)
|
||||||
|
cc_parsed = getaddresses(cc_addr)
|
||||||
|
bcc_parsed = getaddresses(bcc_addr)
|
||||||
|
|
||||||
|
addresses_json = {
|
||||||
|
"fromAddress": from_parsed[0][1] if from_parsed else "",
|
||||||
|
"toAddresses": [a[1] for a in to_parsed],
|
||||||
|
"ccAddresses": [a[1] for a in cc_parsed],
|
||||||
|
"ccoAddresses": [a[1] for a in bcc_parsed] # BCC
|
||||||
|
}
|
||||||
|
|
||||||
|
with open(f"{base}.addresses.json", "w", encoding="utf-8") as f:
|
||||||
|
json.dump(addresses_json, f, indent=4)
|
||||||
|
|
||||||
|
print("Done.")
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
Loading…
x
Reference in New Issue
Block a user