From f07e7a77a8cf1671f85f7b82a939575d1915585c Mon Sep 17 00:00:00 2001 From: "Frederico @ VilaRosa02" Date: Wed, 27 Aug 2025 08:49:21 +0000 Subject: [PATCH] init --- .gitignore | 2 + .gitmodules | 3 ++ make-llm-files | 1 + src/Makefile | 72 +++++++++++++++++++++++++++++++ src/extract_text_from_eml.sh | 48 +++++++++++++++++++++ src/mail-route | 82 ++++++++++++++++++++++++++++++++++++ 6 files changed, 208 insertions(+) create mode 100644 .gitignore create mode 100644 .gitmodules create mode 160000 make-llm-files create mode 100644 src/Makefile create mode 100755 src/extract_text_from_eml.sh create mode 100755 src/mail-route diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..24f9f8d --- /dev/null +++ b/.gitignore @@ -0,0 +1,2 @@ +.env +data/ diff --git a/.gitmodules b/.gitmodules new file mode 100644 index 0000000..3ae1348 --- /dev/null +++ b/.gitmodules @@ -0,0 +1,3 @@ +[submodule "make-llm-files"] + path = make-llm-files + url = https://git.fredericofalcao.com/git/make-llm-files diff --git a/make-llm-files b/make-llm-files new file mode 160000 index 0000000..5a24d49 --- /dev/null +++ b/make-llm-files @@ -0,0 +1 @@ +Subproject commit 5a24d4946d3c53105b191bd9c43c5754bc71d0f1 diff --git a/src/Makefile b/src/Makefile new file mode 100644 index 0000000..fc87226 --- /dev/null +++ b/src/Makefile @@ -0,0 +1,72 @@ +include ../.env +export + +.SECONDARY: + + +SHELL=/bin/bash -x +ROOT_DIR=/mnt/servers/aws.fredericofalcao.com/api.fredericofalcao.com/data/emails +#SRC_EML_FILES=$(notdir $(shell find "$(ROOT_DIR)" -type f -name "*.eml" | head -n 2)) +LATEST_FILE:=$(shell find "$(ROOT_DIR)" -name "*.eml" -printf '%T@ %p\n' | sort -n | tail -1) +LATEST_FILE_MODIFIED_TIME=$(shell echo "$(LATEST_FILE)" | cut -d' ' -f1) +LATEST_FILE_FILE_PATH=$(shell echo "$(LATEST_FILE)" | cut -d' ' -f2-) + +SRC_EML_FILES=../data/$(notdir $(LATEST_FILE_FILE_PATH)) +SRC_TXT_FILES=$(subst .eml,.email.txt,$(SRC_EML_FILES)) +SRC_SCORE_FILES=$(subst .email.txt,.score.txt,$(SRC_TXT_FILES)) +SRC_FILES=$(subst .score.txt,.notification.txt,$(SRC_SCORE_FILES)) + + +all: $(SRC_FILES) + +# 6. Run the MAIL-ROUTE script to either notify the user of a new message, or ignore +%.notification.txt: %.score.txt %.header.txt %.summary.txt + TELEGRAM_BOT_TOKEN=$(TELEGRAM_BOT_TOKEN) TELEGRAM_CHAT_ID=$(TELEGRAM_CHAT_ID) ./mail-route $^ > $@ + +%.header.txt: %.eml + echo > $@ + cat $< | grep -i -e '^From:' | grep -o -E -e '@[a-zA-Z0-9_.-]+' | tr -d '@' | tr -d '\n' >> $@ + printf " -> " >> $@ + cat $< | grep -i -e '^To:' | grep -o -E -e '[a-zA-Z0-9_.-]+@' | tr -d '@' | tr -d '\n' >> $@ + printf "\n" >> $@ + + +# 4. Generate a summary of the email +%.summary.txt: %.email.txt + cd ../make-llm-files && make clean + echo "You are a machine executive assistant tasked with parsing emails. User will send the content of a received email, and you (agent) must send a one phrase (max 12 words) sentence as an executive summary of the email." > ../make-llm-files/01_root.sys + cp ../data/$< ../make-llm-files/user_message.txt + echo '{}' > ../make-llm-files/settings.json + cd ../make-llm-files && make + mv ../make-llm-files/agent_message.txt $@ + + + +# 3. Convert a text-only email to a score (something a script/email-router can react to) +%.score.txt: %.email.txt + cd ../make-llm-files && make clean + echo "You are a machine executive assistant tasked with parsing emails urgency/importance. User will send the content of a received email, and you (agent) must REPLY ONLY with a single score [0.00-1.00] where you assess how important/urgent the email is." > ../make-llm-files/01_root.sys + cp ../data/$< ../make-llm-files/user_message.txt + echo '{"temperature": 0.8,"max_output_tokens":4}' > ../make-llm-files/settings.json + cd ../make-llm-files && make + mv ../make-llm-files/agent_message.txt $@ + +# 2. Convert an eml to text-only (something that an llm can parse) +%.email.txt: %.eml + ./extract_text_from_eml.sh $< > $@ + +# 1. Copy the email file (eml) locallly +%.eml: + cp $(shell find $(ROOT_DIR) -name "$(notdir $@)") ../data/ + + +telegram_notification.log: email_message.eml + curl -s -X POST "https://api.telegram.org/bot$(TELEGRAM_BOT_TOKEN)/sendDocument" \ + -F chat_id="$(TELEGRAM_CHAT_ID)" \ + -F caption="Here is your file 📎" \ + -F document="@$<" > $@ + + + +clean: + rm *.sys *.txt *.log *.eml diff --git a/src/extract_text_from_eml.sh b/src/extract_text_from_eml.sh new file mode 100755 index 0000000..8f15a66 --- /dev/null +++ b/src/extract_text_from_eml.sh @@ -0,0 +1,48 @@ +#!/bin/bash +EMAIL_FILENAME=$1 + +# REQUIRES: +# sudo apt-get -y install w3m + + +find_text() { + +# Non-multipart fallback (single-part message) +if [[ "$ATTACHMENTS" == "Did not find anything to unpack from standard input" ]]; then + grep -A999999 -i -e '^From:' $EMAIL_FILENAME + return +fi + +# Check if there is a TEXT/PLAIN part +if [[ ! -z $(echo "$ATTACHMENTS" | grep "text/plain") ]] +then + # Check if it is empty + TEXT_PLAIN_FILE=$(echo "$ATTACHMENTS" | grep "text/plain" | awk '{print $1}') + if [[ ! -z "$(cat $T_DIR/$TEXT_PLAIN_FILE)" ]] + then + grep -i -e 'From:' $EMAIL_FILENAME + grep -i -e 'To:' $EMAIL_FILENAME + grep -i -e 'Subject:' $EMAIL_FILENAME + qprint -d $T_DIR/$TEXT_PLAIN_FILE + return + fi + +fi +if [[ ! -z $(echo "$ATTACHMENTS" | grep "text/html") ]] +then + HTML_FILE=$(echo "$ATTACHMENTS" | grep "text/html" | awk '{print $1}') + # CONVERT HTML TO TEXT + qprint -d $T_DIR/$HTML_FILE | w3m -dump -I UTF-8 -O UTF-8 -T text/html - + return +fi +} + + +T_DIR=$(mktemp -d) +ATTACHMENTS=$(munpack -q -t -C $T_DIR < $EMAIL_FILENAME) + +find_text + +rm -rf $T_DIR; + + diff --git a/src/mail-route b/src/mail-route new file mode 100755 index 0000000..d09c2fe --- /dev/null +++ b/src/mail-route @@ -0,0 +1,82 @@ +#!/usr/bin/env php +trim(file_get_contents($o)),$argv)); +$telegram_bot_token = getenv()["TELEGRAM_BOT_TOKEN"] ?? ""; +$telegram_chat_id = getenv()["TELEGRAM_CHAT_ID"] ?? ""; + +function sendTelegramMessage($message, $telegram_bot_token, $telegram_chat_id) { + + // Build URL + $url = "https://api.telegram.org/bot{$telegram_bot_token}/sendMessage"; + + // cURL request + $ch = curl_init($url); + curl_setopt_array($ch, [ + CURLOPT_POST => true, + CURLOPT_RETURNTRANSFER => true, + CURLOPT_HTTPHEADER => ['Content-Type: application/json'], + CURLOPT_POSTFIELDS => json_encode([ + 'chat_id' => $telegram_chat_id, + 'text' => $message, + ]), + ]); + + $response = curl_exec($ch); + + if ($response === false) { + return "cURL error: " . curl_error($ch) . PHP_EOL; + } else { + return $response ; + } + curl_close($ch); +} + +if ( $score < 0.2 ) { + + // DELETE / REJECT + sendTelegramMessage("_message rejected_", $telegram_bot_token, $telegram_chat_id); + sendTelegramMessage($message, $telegram_bot_token, $telegram_chat_id); + + +} elseif ($score < 0.4) { + + // NEVER NOTIFY + sendTelegramMessage("_message would be archived_", $telegram_bot_token, $telegram_chat_id); + sendTelegramMessage($message, $telegram_bot_token, $telegram_chat_id); + +} elseif ($score < 0.6) { + + // NOTIFY AT THE END-OF-WEEK + sendTelegramMessage("_message is low-priority_", $telegram_bot_token, $telegram_chat_id); + sendTelegramMessage($message, $telegram_bot_token, $telegram_chat_id); + + +} elseif ($score < 0.8) { + + // NOTIFY AT THE END-OF-DAY + sendTelegramMessage("_message is medium-priority_", $telegram_bot_token, $telegram_chat_id); + sendTelegramMessage($message, $telegram_bot_token, $telegram_chat_id); + +} else { + + // NOTIFY THE USER IMMEDIATELY + sendTelegramMessage("_message is high-priority_", $telegram_bot_token, $telegram_chat_id); + sendTelegramMessage($message, $telegram_bot_token, $telegram_chat_id); +} + +