Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ serde = { version = "1.0", features = ["derive"] }
serde_json = "1.0"
ftm-types = "0.4"
rusqlite = { version = "0.31", features = ["bundled"] }
sha2 = "0.10"
duckdb = { version = "1.10502", features = ["bundled"] }
ureq = { version = "3", features = ["json"] }

Expand Down
164 changes: 156 additions & 8 deletions src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ use ratatui::{
};
use rusqlite::{Connection, params};
use serde::{Deserialize, Serialize};
use sha2::{Digest, Sha256};
use std::{io, path::PathBuf, rc::Rc, time::Instant};

use ftm_types::generated::entities::{Email as FtmEmail, Folder as FtmFolder};
Expand Down Expand Up @@ -1295,6 +1296,7 @@ fn create_export_schema(conn: &Connection) -> Result<(), rusqlite::Error> {
id INTEGER PRIMARY KEY,
folder_id INTEGER NOT NULL REFERENCES folders(id),
message_class TEXT NOT NULL,
message_hash TEXT NOT NULL UNIQUE,
subject TEXT,
sender TEXT,
to_recipients TEXT,
Expand Down Expand Up @@ -1335,6 +1337,36 @@ fn filetime_to_iso(ticks: i64) -> Option<String> {
}
}

fn message_hash(
sender: Option<&str>,
subject: Option<&str>,
submit_time: Option<&str>,
to_recipients: Option<&str>,
body_text: Option<&str>,
body_html: Option<&str>,
body_rtf: Option<&[u8]>,
) -> String {
let mut h = Sha256::new();
for field in [
sender.unwrap_or("").as_bytes(),
b"\x00",
subject.unwrap_or("").as_bytes(),
b"\x00",
submit_time.unwrap_or("").as_bytes(),
b"\x00",
to_recipients.unwrap_or("").as_bytes(),
b"\x00",
body_text.unwrap_or("").as_bytes(),
b"\x00",
body_html.unwrap_or("").as_bytes(),
b"\x00",
body_rtf.unwrap_or(b""),
] {
h.update(field);
}
format!("{:x}", h.finalize())
}

fn export_folder(
store: Rc<UnicodeStore>,
folder: &UnicodeFolder,
Expand Down Expand Up @@ -1440,14 +1472,25 @@ fn export_folder(
})
.unwrap_or(0);

conn.execute(
"INSERT INTO messages (folder_id, message_class, subject, sender,
let hash = message_hash(
sender.as_deref(),
subject.as_deref(),
submit_time.as_deref(),
to_recipients.as_deref(),
body_text.as_deref(),
body_html.as_deref(),
body_rtf.as_deref(),
);

let inserted = conn.execute(
"INSERT OR IGNORE INTO messages (folder_id, message_class, message_hash, subject, sender,
to_recipients, cc_recipients, submit_time, delivery_time,
body_text, body_html, body_rtf, attachment_count)
VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8, ?9, ?10, ?11, ?12)",
VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8, ?9, ?10, ?11, ?12, ?13)",
params![
folder_id,
&message_class,
&hash,
&subject,
&sender,
&to_recipients,
Expand All @@ -1460,7 +1503,9 @@ fn export_folder(
attachment_count,
],
)?;
counts.1 += 1;
if inserted > 0 {
counts.1 += 1;
}
}
}

Expand Down Expand Up @@ -1557,6 +1602,7 @@ fn create_duckdb_schema(conn: &DuckConnection) -> Result<(), duckdb::Error> {
id BIGINT PRIMARY KEY DEFAULT nextval('messages_id_seq'),
folder_id BIGINT NOT NULL,
message_class VARCHAR NOT NULL,
message_hash VARCHAR NOT NULL UNIQUE,
subject VARCHAR,
sender VARCHAR,
to_recipients VARCHAR,
Expand Down Expand Up @@ -1693,14 +1739,25 @@ fn export_folder_duckdb(
})
.unwrap_or(0);

conn.execute(
"INSERT INTO messages (folder_id, message_class, subject, sender,
let hash = message_hash(
sender.as_deref(),
subject.as_deref(),
submit_time.as_deref(),
to_recipients.as_deref(),
body_text.as_deref(),
body_html.as_deref(),
body_rtf.as_deref(),
);

let inserted = conn.execute(
"INSERT OR IGNORE INTO messages (folder_id, message_class, message_hash, subject, sender,
to_recipients, cc_recipients, submit_time, delivery_time,
body_text, body_html, body_rtf, attachment_count)
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)",
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)",
duckdb::params![
folder_id,
&message_class,
&hash,
&subject,
&sender,
&to_recipients,
Expand All @@ -1713,7 +1770,9 @@ fn export_folder_duckdb(
attachment_count,
],
)?;
counts.1 += 1;
if inserted > 0 {
counts.1 += 1;
}
}
}

Expand Down Expand Up @@ -4151,6 +4210,95 @@ mod tests {
let _ = std::fs::remove_file(&db_path);
}

// ── duplicate message_hash tests ─────────────────────────────────────────

#[test]
fn test_export_sqlite_duplicate_hash_skipped() {
let db_path = std::env::temp_dir().join("pstexplorer_test_export_dedup.db");
let _ = std::fs::remove_file(&db_path);

let conn = Connection::open(&db_path).unwrap();
create_export_schema(&conn).unwrap();

conn.execute(
"INSERT INTO folders (parent_id, name, path) VALUES (NULL, 'Inbox', 'Inbox')",
[],
)
.unwrap();
let folder_id = conn.last_insert_rowid();

let insert = |hash: &str, subject: &str| {
conn.execute(
"INSERT OR IGNORE INTO messages
(folder_id, message_class, message_hash, subject, attachment_count)
VALUES (?1, 'IPM.NOTE', ?2, ?3, 0)",
params![folder_id, hash, subject],
)
.unwrap()
};

insert("deadbeef", "Original");
let affected = insert("deadbeef", "Duplicate with same hash");

assert_eq!(affected, 0, "duplicate insert should be a no-op");

let count: i64 = conn
.query_row("SELECT COUNT(*) FROM messages", [], |r| r.get(0))
.unwrap();
assert_eq!(count, 1, "only one row should exist after duplicate insert");

let subject: String = conn
.query_row("SELECT subject FROM messages", [], |r| r.get(0))
.unwrap();
assert_eq!(subject, "Original", "first message should be kept");

let _ = std::fs::remove_file(&db_path);
}

#[test]
fn test_export_duckdb_duplicate_hash_skipped() {
let db_path = std::env::temp_dir().join("pstexplorer_test_export_dedup.duckdb");
let _ = std::fs::remove_file(&db_path);

let conn = DuckConnection::open(&db_path).unwrap();
create_duckdb_schema(&conn).unwrap();

let folder_id: i64 = conn
.query_row(
"INSERT INTO folders (parent_id, name, path) VALUES (NULL, 'Inbox', 'Inbox') RETURNING id",
[],
|r| r.get(0),
)
.unwrap();

let insert = |hash: &str, subject: &str| -> usize {
conn.execute(
"INSERT OR IGNORE INTO messages
(folder_id, message_class, message_hash, subject, attachment_count)
VALUES (?, 'IPM.NOTE', ?, ?, 0)",
duckdb::params![folder_id, hash, subject],
)
.unwrap()
};

insert("deadbeef", "Original");
let affected = insert("deadbeef", "Duplicate with same hash");

assert_eq!(affected, 0, "duplicate insert should be a no-op");

let count: i64 = conn
.query_row("SELECT COUNT(*) FROM messages", [], |r| r.get(0))
.unwrap();
assert_eq!(count, 1, "only one row should exist after duplicate insert");

let subject: String = conn
.query_row("SELECT subject FROM messages", [], |r| r.get(0))
.unwrap();
assert_eq!(subject, "Original", "first message should be kept");

let _ = std::fs::remove_file(&db_path);
}

// ── FTM output tests ─────────────────────────────────────────────────────

#[test]
Expand Down
Loading