Add extract_email.py
This commit is contained in:
commit
c7ac1ea6a2
112
extract_email.py
Normal file
112
extract_email.py
Normal file
@ -0,0 +1,112 @@
|
||||
#!/usr/bin/env python3
|
||||
import sys
|
||||
import os
|
||||
import json
|
||||
import email
|
||||
from email import policy
|
||||
from email.parser import BytesParser
|
||||
from email.utils import getaddresses
|
||||
|
||||
def main():
|
||||
if len(sys.argv) < 2:
|
||||
print("Usage: extract_eml.py <file.eml>")
|
||||
sys.exit(1)
|
||||
|
||||
eml_file = sys.argv[1]
|
||||
|
||||
if not os.path.isfile(eml_file):
|
||||
print(f"Error: file not found: {eml_file}")
|
||||
sys.exit(1)
|
||||
|
||||
base = os.path.splitext(eml_file)[0]
|
||||
|
||||
# Read the email
|
||||
with open(eml_file, "rb") as f:
|
||||
msg = BytesParser(policy=policy.default).parse(f)
|
||||
|
||||
# ---------------------------
|
||||
# Extract Subject
|
||||
# ---------------------------
|
||||
subject = msg.get("subject", "").strip()
|
||||
|
||||
with open(f"{base}.subject.txt", "w", encoding="utf-8", errors="ignore") as f:
|
||||
f.write(subject)
|
||||
|
||||
# ---------------------------
|
||||
# Extract Text Body
|
||||
# ---------------------------
|
||||
body_text = None
|
||||
|
||||
if msg.is_multipart():
|
||||
for part in msg.walk():
|
||||
if part.get_content_type() == "text/plain":
|
||||
try:
|
||||
body_text = part.get_payload(decode=True).decode(errors="ignore")
|
||||
except:
|
||||
pass
|
||||
break
|
||||
else:
|
||||
if msg.get_content_type() == "text/plain":
|
||||
try:
|
||||
body_text = msg.get_payload(decode=True).decode(errors="ignore")
|
||||
except:
|
||||
pass
|
||||
|
||||
if body_text is None:
|
||||
body_text = "(No text/plain body found)"
|
||||
|
||||
with open(f"{base}.txt", "w", encoding="utf-8", errors="ignore") as f:
|
||||
f.write(body_text)
|
||||
|
||||
# ---------------------------
|
||||
# Extract Attachments
|
||||
# ---------------------------
|
||||
attachment_counter = 1
|
||||
|
||||
for part in msg.walk():
|
||||
if part.get_content_disposition() == "attachment":
|
||||
filename = part.get_filename()
|
||||
if not filename:
|
||||
filename = f"attachment{attachment_counter:02d}.bin"
|
||||
|
||||
_, ext = os.path.splitext(filename)
|
||||
if ext:
|
||||
outname = f"{base}.attachment{attachment_counter:02d}{ext}"
|
||||
else:
|
||||
outname = f"{base}.attachment{attachment_counter:02d}"
|
||||
|
||||
data = part.get_payload(decode=True)
|
||||
|
||||
with open(outname, "wb") as f:
|
||||
f.write(data)
|
||||
|
||||
attachment_counter += 1
|
||||
|
||||
# ---------------------------
|
||||
# Extract Addresses
|
||||
# ---------------------------
|
||||
from_addr = msg.get("from", "")
|
||||
to_addr = msg.get_all("to", [])
|
||||
cc_addr = msg.get_all("cc", [])
|
||||
bcc_addr = msg.get_all("bcc", [])
|
||||
|
||||
# Normalize/parse multiple addresses
|
||||
from_parsed = getaddresses([from_addr])
|
||||
to_parsed = getaddresses(to_addr)
|
||||
cc_parsed = getaddresses(cc_addr)
|
||||
bcc_parsed = getaddresses(bcc_addr)
|
||||
|
||||
addresses_json = {
|
||||
"fromAddress": from_parsed[0][1] if from_parsed else "",
|
||||
"toAddresses": [a[1] for a in to_parsed],
|
||||
"ccAddresses": [a[1] for a in cc_parsed],
|
||||
"ccoAddresses": [a[1] for a in bcc_parsed] # BCC
|
||||
}
|
||||
|
||||
with open(f"{base}.addresses.json", "w", encoding="utf-8") as f:
|
||||
json.dump(addresses_json, f, indent=4)
|
||||
|
||||
print("Done.")
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Loading…
x
Reference in New Issue
Block a user