#!/usr/bin/env python3 import sys import os import json import email from email import policy from email.parser import BytesParser from email.utils import getaddresses def main(): if len(sys.argv) < 2: print("Usage: extract_eml.py ") sys.exit(1) eml_file = sys.argv[1] if not os.path.isfile(eml_file): print(f"Error: file not found: {eml_file}") sys.exit(1) base = os.path.splitext(eml_file)[0] # Read the email with open(eml_file, "rb") as f: msg = BytesParser(policy=policy.default).parse(f) # --------------------------- # Extract Subject # --------------------------- subject = msg.get("subject", "").strip() with open(f"{base}.subject.txt", "w", encoding="utf-8", errors="ignore") as f: f.write(subject) # --------------------------- # Extract Text Body # --------------------------- body_text = None if msg.is_multipart(): for part in msg.walk(): if part.get_content_type() == "text/plain": try: body_text = part.get_payload(decode=True).decode(errors="ignore") except: pass break else: if msg.get_content_type() == "text/plain": try: body_text = msg.get_payload(decode=True).decode(errors="ignore") except: pass if body_text is None: body_text = "(No text/plain body found)" with open(f"{base}.txt", "w", encoding="utf-8", errors="ignore") as f: f.write(body_text) # --------------------------- # Extract Attachments # --------------------------- attachment_counter = 1 for part in msg.walk(): if part.get_content_disposition() == "attachment": filename = part.get_filename() if not filename: filename = f"attachment{attachment_counter:02d}.bin" _, ext = os.path.splitext(filename) if ext: outname = f"{base}.attachment{attachment_counter:02d}{ext}" else: outname = f"{base}.attachment{attachment_counter:02d}" data = part.get_payload(decode=True) with open(outname, "wb") as f: f.write(data) attachment_counter += 1 # --------------------------- # Extract Addresses # --------------------------- from_addr = msg.get("from", "") to_addr = msg.get_all("to", []) cc_addr = msg.get_all("cc", []) bcc_addr = msg.get_all("bcc", []) # Normalize/parse multiple addresses from_parsed = getaddresses([from_addr]) to_parsed = getaddresses(to_addr) cc_parsed = getaddresses(cc_addr) bcc_parsed = getaddresses(bcc_addr) addresses_json = { "fromAddress": from_parsed[0][1] if from_parsed else "", "toAddresses": [a[1] for a in to_parsed], "ccAddresses": [a[1] for a in cc_parsed], "ccoAddresses": [a[1] for a in bcc_parsed] # BCC } with open(f"{base}.addresses.json", "w", encoding="utf-8") as f: json.dump(addresses_json, f, indent=4) print("Done.") if __name__ == "__main__": main()