From aaea693a8862739a889a2316f32094f5efa00ffd Mon Sep 17 00:00:00 2001
From: James Alster <jamesalster2@gmail.com>
Date: Sat, 7 Mar 2026 11:17:16 +0000
Subject: [PATCH 1/7] Add top_features functions

---
 Project.toml         |  2 ++
 docs/src/features.md | 19 +++++++++++++++++++
 src/TextAnalysis.jl  |  2 ++
 src/corpus.jl        | 11 +++++++++++
 src/document.jl      |  9 +++++++++
 src/dtm.jl           | 13 +++++++++++++
 test/corpus.jl       |  3 +++
 test/document.jl     |  7 +++++++
 test/dtm.jl          | 10 ++++++++++
 test/runtests.jl     |  1 +
 10 files changed, 77 insertions(+)

diff --git a/Project.toml b/Project.toml
index 441da011..731365f4 100644
--- a/Project.toml
+++ b/Project.toml
@@ -12,6 +12,7 @@ InteractiveUtils = "b77e0a4c-d291-57a0-90e8-8db25a27a240"
 JSON = "682c06a0-de6a-54ab-a142-c8b1cf79cde6"
 Languages = "8ef0a80b-9436-5d2c-a485-80b904378c43"
 LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
+OrderedCollections = "bac558e1-5e72-5ebc-8fee-abe8a469f55d"
 Printf = "de0858da-6303-5e67-8744-51eddeeeb8d7"
 ProgressMeter = "92933f4c-e287-5a05-a399-4b506db050ca"
 Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
@@ -29,6 +30,7 @@ DelimitedFiles = "1"
 DocStringExtensions = "0.9"
 JSON = "0.21, 1"
 Languages = "0.4"
+OrderedCollections = "1.8.1"
 ProgressMeter = "1"
 Snowball = "0.1"
 Statistics = "1"
diff --git a/docs/src/features.md b/docs/src/features.md
index eb95e6db..a6b8bcba 100644
--- a/docs/src/features.md
+++ b/docs/src/features.md
@@ -102,6 +102,25 @@ julia> hash_dtv(crps[1])
  0  0  0  0  0  0  0  0  0  0  0  0  0  …  0  0  0  0  0  0  0  0  0  0  0  0
 ```
 
+## Top Features
+
+We can use the function `top_features()` to quickly view the top features of a `Document`, `DocumentTermMatrix` or `Corpus`.
+
+```julia
+julia> top_features(m) # or `top_features(crps)`
+OrderedCollections.OrderedDict{String, Int64} with 6 entries:
+  "To"     => 2
+  "be"     => 2
+  "become" => 2
+  "not"    => 2
+  "or"     => 2
+  "to"     => 2
+julia> top_features(m, 2)
+2-element Vector{String}:
+ "To"
+ "be"
+```
+
 ## TF (Term Frequency)
 
 Often we need to find out what proportion of a document is contributed by each term. This can be done using the term frequency function:
diff --git a/src/TextAnalysis.jl b/src/TextAnalysis.jl
index 2d763b6b..59bc26b9 100644
--- a/src/TextAnalysis.jl
+++ b/src/TextAnalysis.jl
@@ -3,6 +3,7 @@ using SparseArrays
 using Printf
 using LinearAlgebra
 using StatsBase: countmap, addcounts!
+using OrderedCollections: OrderedDict
 using Languages
 using WordTokenizers
 using Snowball
@@ -54,6 +55,7 @@ export tf, tf_idf, bm_25, lsa, lda, summarize, cos_similarity
 export tf!, tf_idf!, bm_25!, lda!
 export remove_patterns!, remove_patterns
 export prune!
+export top_features
 
 export strip_patterns, strip_corrupt_utf8, strip_case, stem_words, tag_part_of_speech, strip_whitespace, strip_punctuation
 export strip_numbers, strip_non_letters, strip_indefinite_articles, strip_definite_articles, strip_articles
diff --git a/src/corpus.jl b/src/corpus.jl
index 9d3b273b..9fd2f639 100644
--- a/src/corpus.jl
+++ b/src/corpus.jl
@@ -298,3 +298,14 @@ function standardize!(crps::Corpus, ::Type{T}) where {T<:AbstractDocument}
         crps.documents[i] = convert(T, crps.documents[i])
     end
 end
+
+##############################################################################
+#
+# top_features() methods
+#
+##############################################################################
+
+top_features(lx::Dict{String,Int}) = sort!(OrderedDict(lx); byvalue=true, rev=true)
+top_features(lx::Dict{String,Int}, n::Int) = first(keys(top_features(lx)), n)
+top_features(crps::Corpus) = top_features(lexicon(crps))
+top_features(crps::Corpus, n::Int) = top_features(lexicon(crps), n)
\ No newline at end of file
diff --git a/src/document.jl b/src/document.jl
index e933f9a7..cb2de50a 100644
--- a/src/document.jl
+++ b/src/document.jl
@@ -398,3 +398,12 @@ Base.convert(::Type{NGramDocument}, d::NGramDocument) = d
 ##############################################################################
 
 Base.getindex(d::AbstractDocument, term::AbstractString) = ngrams(d)[term]
+
+##############################################################################
+#
+# top_features() methods
+#
+##############################################################################
+
+top_features(d::AbstractDocument) = sort!(OrderedDict(countmap(tokens(d))); byvalue=true, rev=true)
+top_features(d::AbstractDocument, n::Int) = first(keys(top_features(d)), n)
\ No newline at end of file
diff --git a/src/dtm.jl b/src/dtm.jl
index 35c9cc7c..4e812050 100644
--- a/src/dtm.jl
+++ b/src/dtm.jl
@@ -440,3 +440,16 @@ function merge!(dtm1::DocumentTermMatrix{T}, dtm2::DocumentTermMatrix{T}) where
 
     dtm1
 end
+
+"""
+    top_features(x)
+    top_features(x, n)
+
+Return terms sorted in descending frequency. With `n`, return only the top `n` terms.
+Accepts a `Corpus`, `AbstractDocument`, lexicon `Dict`, or `DocumentTermMatrix`.
+"""
+top_features(D::DocumentTermMatrix, n::Int) = first(keys(top_features(D)), n)
+function top_features(D::DocumentTermMatrix)
+    counts = vec(sum(D.dtm; dims=1))
+    return sort!(OrderedDict(zip(D.terms, counts)); byvalue=true, rev=true)
+end
\ No newline at end of file
diff --git a/test/corpus.jl b/test/corpus.jl
index 044c89a6..d2fe0e93 100644
--- a/test/corpus.jl
+++ b/test/corpus.jl
@@ -39,6 +39,9 @@
     update_lexicon!(crps)
     answer = Dict("1" => 2, "2" => 1, "4" => 1)
 
+    @test top_features(crps) == top_features(crps[1])
+    @test top_features(crps, 1) == top_features(crps[1], 1)
+
     @test answer == lexicon(crps)
 end
 
diff --git a/test/document.jl b/test/document.jl
index 8ffa3ef3..e936f813 100644
--- a/test/document.jl
+++ b/test/document.jl
@@ -66,6 +66,13 @@
     @test isa(ngd, NGramDocument)
     @test "To" in keys(ngrams(ngd))
 
+    # Test top features
+    top = top_features(sd)
+    @test top isa OrderedDict
+    @test collect(keys(top)) == ["be", "or", "not", "to", "To"]
+    @test collect(values(top)) == [2, 1, 1, 1, 1]
+    @test top_features(sd, 2) == ["be", "or"]
+
     sd = StringDocument(hamlet_text)
     td = TokenDocument(hamlet_text)
     ngd = NGramDocument(hamlet_text)
diff --git a/test/dtm.jl b/test/dtm.jl
index 0a2f01fd..f6fdba76 100644
--- a/test/dtm.jl
+++ b/test/dtm.jl
@@ -109,4 +109,14 @@
     @test dtm2.terms == ["five", "four", "three", "two"]
     @test size(dtm2.dtm) == (2, 4)
     @test sum(dtm2.dtm, dims=(1,)) == [1 2 2 1]
+
+    # Test top_features
+    crps3 = Corpus([FileDocument(sample_file)])
+    update_lexicon!(crps3)
+    m3 = DocumentTermMatrix(crps3)
+    top = top_features(m3)
+    top5 = top_features(m3, 5)
+    @test top isa OrderedDict
+    @test top5 == first(keys(top), 5) == [",", "thou", "And", "and", ";"] 
+    @test first(values(top), 5) == [29, 6, 5, 5, 3]
 end
diff --git a/test/runtests.jl b/test/runtests.jl
index 3ea9e016..471b95d6 100644
--- a/test/runtests.jl
+++ b/test/runtests.jl
@@ -4,6 +4,7 @@ using Languages
 using TextAnalysis
 using WordTokenizers
 using Serialization
+using OrderedCollections: OrderedDict
 
 tests = [
     "coom.jl"

From be5838732ec42c93f50ebcf5c6d08836ef4357ac Mon Sep 17 00:00:00 2001
From: James Alster <jamesalster2@gmail.com>
Date: Sat, 7 Mar 2026 12:11:23 +0000
Subject: [PATCH 2/7] OrderedCollections version compatibility for Julia 1.6

---
 Project.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Project.toml b/Project.toml
index 731365f4..6294e2e8 100644
--- a/Project.toml
+++ b/Project.toml
@@ -30,7 +30,7 @@ DelimitedFiles = "1"
 DocStringExtensions = "0.9"
 JSON = "0.21, 1"
 Languages = "0.4"
-OrderedCollections = "1.8.1"
+OrderedCollections = "1.7.0"
 ProgressMeter = "1"
 Snowball = "0.1"
 Statistics = "1"

From dbe1f57d3ec623014cdc05eb46097df58433805b Mon Sep 17 00:00:00 2001
From: James Alster <jamesalster2@gmail.com>
Date: Sat, 7 Mar 2026 12:34:36 +0000
Subject: [PATCH 3/7] enforce alphabetic sorting

---
 src/corpus.jl    | 2 +-
 src/document.jl  | 2 +-
 src/dtm.jl       | 2 +-
 test/document.jl | 4 ++--
 4 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/src/corpus.jl b/src/corpus.jl
index 9fd2f639..7ca4c6af 100644
--- a/src/corpus.jl
+++ b/src/corpus.jl
@@ -305,7 +305,7 @@ end
 #
 ##############################################################################
 
-top_features(lx::Dict{String,Int}) = sort!(OrderedDict(lx); byvalue=true, rev=true)
+top_features(lx::Dict{String,Int}) = sort!(sort!(OrderedDict(lx)); byvalue=true, rev=true) # double sort for key then value order
 top_features(lx::Dict{String,Int}, n::Int) = first(keys(top_features(lx)), n)
 top_features(crps::Corpus) = top_features(lexicon(crps))
 top_features(crps::Corpus, n::Int) = top_features(lexicon(crps), n)
\ No newline at end of file
diff --git a/src/document.jl b/src/document.jl
index cb2de50a..f5dc73c4 100644
--- a/src/document.jl
+++ b/src/document.jl
@@ -405,5 +405,5 @@ Base.getindex(d::AbstractDocument, term::AbstractString) = ngrams(d)[term]
 #
 ##############################################################################
 
-top_features(d::AbstractDocument) = sort!(OrderedDict(countmap(tokens(d))); byvalue=true, rev=true)
+top_features(d::AbstractDocument) = sort!(sort!(OrderedDict(countmap(tokens(d)))); byvalue=true, rev=true) # double sort for key and value order
 top_features(d::AbstractDocument, n::Int) = first(keys(top_features(d)), n)
\ No newline at end of file
diff --git a/src/dtm.jl b/src/dtm.jl
index 4e812050..1f66633e 100644
--- a/src/dtm.jl
+++ b/src/dtm.jl
@@ -451,5 +451,5 @@ Accepts a `Corpus`, `AbstractDocument`, lexicon `Dict`, or `DocumentTermMatrix`.
 top_features(D::DocumentTermMatrix, n::Int) = first(keys(top_features(D)), n)
 function top_features(D::DocumentTermMatrix)
     counts = vec(sum(D.dtm; dims=1))
-    return sort!(OrderedDict(zip(D.terms, counts)); byvalue=true, rev=true)
+    return sort!(sort!(OrderedDict(zip(D.terms, counts))); byvalue=true, rev=true) # double sort for key and value order
 end
\ No newline at end of file
diff --git a/test/document.jl b/test/document.jl
index e936f813..22523a2c 100644
--- a/test/document.jl
+++ b/test/document.jl
@@ -69,9 +69,9 @@
     # Test top features
     top = top_features(sd)
     @test top isa OrderedDict
-    @test collect(keys(top)) == ["be", "or", "not", "to", "To"]
+    @test collect(keys(top)) == ["be", "To", "not", "or", "to"]
     @test collect(values(top)) == [2, 1, 1, 1, 1]
-    @test top_features(sd, 2) == ["be", "or"]
+    @test top_features(sd, 2) == ["be", "To"]
 
     sd = StringDocument(hamlet_text)
     td = TokenDocument(hamlet_text)

From 7cf6d2b9c7256c489638c8360424238e533b8243 Mon Sep 17 00:00:00 2001
From: James Alster <jamesalster2@gmail.com>
Date: Sat, 7 Mar 2026 12:35:57 +0000
Subject: [PATCH 4/7] improve docs

---
 src/dtm.jl | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/dtm.jl b/src/dtm.jl
index 1f66633e..54d6a27a 100644
--- a/src/dtm.jl
+++ b/src/dtm.jl
@@ -447,6 +447,7 @@ end
 
 Return terms sorted in descending frequency. With `n`, return only the top `n` terms.
 Accepts a `Corpus`, `AbstractDocument`, lexicon `Dict`, or `DocumentTermMatrix`.
+Ties are sorted alphabetically.
 """
 top_features(D::DocumentTermMatrix, n::Int) = first(keys(top_features(D)), n)
 function top_features(D::DocumentTermMatrix)

From a3bc21f8b911b784a4f9bab09193deb29545d310 Mon Sep 17 00:00:00 2001
From: James Alster <jamesalster2@gmail.com>
Date: Tue, 17 Mar 2026 12:47:16 +0000
Subject: [PATCH 5/7] incorporate comments

---
 docs/src/features.md |  9 ++-------
 src/corpus.jl        | 13 +++++++++----
 src/document.jl      |  9 +++++++--
 src/dtm.jl           | 12 +++++++-----
 test/corpus.jl       |  1 -
 test/document.jl     |  5 ++---
 test/dtm.jl          |  7 +++----
 7 files changed, 30 insertions(+), 26 deletions(-)

diff --git a/docs/src/features.md b/docs/src/features.md
index a6b8bcba..b50adb52 100644
--- a/docs/src/features.md
+++ b/docs/src/features.md
@@ -104,21 +104,16 @@ julia> hash_dtv(crps[1])
 
 ## Top Features
 
-We can use the function `top_features()` to quickly view the top features of a `Document`, `DocumentTermMatrix` or `Corpus`.
+We can use the function `top_features(x, n)` to quickly view the top features of a `Document`, `DocumentTermMatrix` or `Corpus`.
 
 ```julia
-julia> top_features(m) # or `top_features(crps)`
+julia> top_features(m, 5)
 OrderedCollections.OrderedDict{String, Int64} with 6 entries:
   "To"     => 2
   "be"     => 2
   "become" => 2
   "not"    => 2
   "or"     => 2
-  "to"     => 2
-julia> top_features(m, 2)
-2-element Vector{String}:
- "To"
- "be"
 ```
 
 ## TF (Term Frequency)
diff --git a/src/corpus.jl b/src/corpus.jl
index 7ca4c6af..53f69eb1 100644
--- a/src/corpus.jl
+++ b/src/corpus.jl
@@ -305,7 +305,12 @@ end
 #
 ##############################################################################
 
-top_features(lx::Dict{String,Int}) = sort!(sort!(OrderedDict(lx)); byvalue=true, rev=true) # double sort for key then value order
-top_features(lx::Dict{String,Int}, n::Int) = first(keys(top_features(lx)), n)
-top_features(crps::Corpus) = top_features(lexicon(crps))
-top_features(crps::Corpus, n::Int) = top_features(lexicon(crps), n)
\ No newline at end of file
+function top_features(lx::Dict{String,Int}, ::Val{N}) where {N}
+    D_pairs = collect(pairs(lx))
+    n = min(N, length(D_pairs))
+    idx = partialsortperm(D_pairs, 1:n, by = p -> (-p.second, p.first))
+    OrderedDict(D_pairs[idx])
+end
+top_features(lx::Dict{String,Int}, n::Int) = first.(top_features(lx), Val(n))
+top_features(crps::Corpus, n::Int) = top_features(lexicon(crps), Val(n))
+#top_features(crps::Corpus) = top_features(lexicon(crps))
\ No newline at end of file
diff --git a/src/document.jl b/src/document.jl
index f5dc73c4..58604d78 100644
--- a/src/document.jl
+++ b/src/document.jl
@@ -405,5 +405,10 @@ Base.getindex(d::AbstractDocument, term::AbstractString) = ngrams(d)[term]
 #
 ##############################################################################
 
-top_features(d::AbstractDocument) = sort!(sort!(OrderedDict(countmap(tokens(d)))); byvalue=true, rev=true) # double sort for key and value order
-top_features(d::AbstractDocument, n::Int) = first(keys(top_features(d)), n)
\ No newline at end of file
+function top_features(d::AbstractDocument, ::Val{N})  where {N}
+    D_pairs = collect(pairs(countmap(tokens(d))))
+    n = min(N, length(D_pairs))
+    idx = partialsortperm(D_pairs, 1:n; by = p -> (-p.second, p.first))
+    OrderedDict(D_pairs[idx])
+end
+top_features(d::AbstractDocument, n::Int) = top_features(d, Val(n))
\ No newline at end of file
diff --git a/src/dtm.jl b/src/dtm.jl
index 54d6a27a..702fc45c 100644
--- a/src/dtm.jl
+++ b/src/dtm.jl
@@ -449,8 +449,10 @@ Return terms sorted in descending frequency. With `n`, return only the top `n` t
 Accepts a `Corpus`, `AbstractDocument`, lexicon `Dict`, or `DocumentTermMatrix`.
 Ties are sorted alphabetically.
 """
-top_features(D::DocumentTermMatrix, n::Int) = first(keys(top_features(D)), n)
-function top_features(D::DocumentTermMatrix)
-    counts = vec(sum(D.dtm; dims=1))
-    return sort!(sort!(OrderedDict(zip(D.terms, counts))); byvalue=true, rev=true) # double sort for key and value order
-end
\ No newline at end of file
+function top_features(D::DocumentTermMatrix, ::Val{N}) where {N}
+    counts = @view(sum(D.dtm; dims=1)[1, :])
+    n = min(N, length(counts))
+    idx = partialsortperm(counts, 1:n; rev=true)
+    OrderedDict(zip(D.terms[idx], counts[idx]))
+end
+top_features(D::DocumentTermMatrix, n::Int) = top_features(D, Val(n))
\ No newline at end of file
diff --git a/test/corpus.jl b/test/corpus.jl
index d2fe0e93..212ffd12 100644
--- a/test/corpus.jl
+++ b/test/corpus.jl
@@ -39,7 +39,6 @@
     update_lexicon!(crps)
     answer = Dict("1" => 2, "2" => 1, "4" => 1)
 
-    @test top_features(crps) == top_features(crps[1])
     @test top_features(crps, 1) == top_features(crps[1], 1)
 
     @test answer == lexicon(crps)
diff --git a/test/document.jl b/test/document.jl
index 22523a2c..f3955070 100644
--- a/test/document.jl
+++ b/test/document.jl
@@ -67,11 +67,10 @@
     @test "To" in keys(ngrams(ngd))
 
     # Test top features
-    top = top_features(sd)
-    @test top isa OrderedDict
+    top = top_features(sd, 5)
     @test collect(keys(top)) == ["be", "To", "not", "or", "to"]
     @test collect(values(top)) == [2, 1, 1, 1, 1]
-    @test top_features(sd, 2) == ["be", "To"]
+    @test top_features(sd, 2) == OrderedDict("be" => 2, "To" => 1)
 
     sd = StringDocument(hamlet_text)
     td = TokenDocument(hamlet_text)
diff --git a/test/dtm.jl b/test/dtm.jl
index f6fdba76..45f31527 100644
--- a/test/dtm.jl
+++ b/test/dtm.jl
@@ -114,9 +114,8 @@
     crps3 = Corpus([FileDocument(sample_file)])
     update_lexicon!(crps3)
     m3 = DocumentTermMatrix(crps3)
-    top = top_features(m3)
     top5 = top_features(m3, 5)
-    @test top isa OrderedDict
-    @test top5 == first(keys(top), 5) == [",", "thou", "And", "and", ";"] 
-    @test first(values(top), 5) == [29, 6, 5, 5, 3]
+    @test top5 isa OrderedDict
+    @test collect(keys(top5)) == [",", "thou", "And", "and", ";"] 
+    @test collect(values(top5)) == [29, 6, 5, 5, 3]
 end

From 7ba28a81ae0edf1b5bc5fa12b09708266cc1defe Mon Sep 17 00:00:00 2001
From: James Alster <jamesalster2@gmail.com>
Date: Tue, 17 Mar 2026 12:58:08 +0000
Subject: [PATCH 6/7] rename top_features to top_terms

---
 docs/src/features.md |  4 ++--
 src/TextAnalysis.jl  |  2 +-
 src/corpus.jl        | 12 ++++++------
 src/document.jl      |  7 ++++---
 src/dtm.jl           |  8 ++++----
 test/corpus.jl       |  2 +-
 test/document.jl     |  4 ++--
 test/dtm.jl          |  4 ++--
 8 files changed, 22 insertions(+), 21 deletions(-)

diff --git a/docs/src/features.md b/docs/src/features.md
index b50adb52..2e2ba289 100644
--- a/docs/src/features.md
+++ b/docs/src/features.md
@@ -104,10 +104,10 @@ julia> hash_dtv(crps[1])
 
 ## Top Features
 
-We can use the function `top_features(x, n)` to quickly view the top features of a `Document`, `DocumentTermMatrix` or `Corpus`.
+We can use the function `top_terms(x, n)` to quickly view the top features of a `Document`, `DocumentTermMatrix` or `Corpus`.
 
 ```julia
-julia> top_features(m, 5)
+julia> top_terms(m, 5)
 OrderedCollections.OrderedDict{String, Int64} with 6 entries:
   "To"     => 2
   "be"     => 2
diff --git a/src/TextAnalysis.jl b/src/TextAnalysis.jl
index 59bc26b9..99d3a3d3 100644
--- a/src/TextAnalysis.jl
+++ b/src/TextAnalysis.jl
@@ -55,7 +55,7 @@ export tf, tf_idf, bm_25, lsa, lda, summarize, cos_similarity
 export tf!, tf_idf!, bm_25!, lda!
 export remove_patterns!, remove_patterns
 export prune!
-export top_features
+export top_terms
 
 export strip_patterns, strip_corrupt_utf8, strip_case, stem_words, tag_part_of_speech, strip_whitespace, strip_punctuation
 export strip_numbers, strip_non_letters, strip_indefinite_articles, strip_definite_articles, strip_articles
diff --git a/src/corpus.jl b/src/corpus.jl
index 53f69eb1..4374ada7 100644
--- a/src/corpus.jl
+++ b/src/corpus.jl
@@ -301,16 +301,16 @@ end
 
 ##############################################################################
 #
-# top_features() methods
+# top_terms() methods
 #
 ##############################################################################
 
-function top_features(lx::Dict{String,Int}, ::Val{N}) where {N}
+function top_terms(lx::Dict{String,Int}, ::Val{N}) where {N}
     D_pairs = collect(pairs(lx))
     n = min(N, length(D_pairs))
-    idx = partialsortperm(D_pairs, 1:n, by = p -> (-p.second, p.first))
+    # Count decreasing, break ties alphabetically
+    idx = partialsortperm(D_pairs, 1:n, by = p -> (-p.second, p.first)) 
     OrderedDict(D_pairs[idx])
 end
-top_features(lx::Dict{String,Int}, n::Int) = first.(top_features(lx), Val(n))
-top_features(crps::Corpus, n::Int) = top_features(lexicon(crps), Val(n))
-#top_features(crps::Corpus) = top_features(lexicon(crps))
\ No newline at end of file
+top_terms(lx::Dict{String,Int}, n::Int) = top_terms(lx, Val(n))
+top_terms(crps::Corpus, n::Int) = top_terms(lexicon(crps), Val(n))
\ No newline at end of file
diff --git a/src/document.jl b/src/document.jl
index 58604d78..bf14373f 100644
--- a/src/document.jl
+++ b/src/document.jl
@@ -401,14 +401,15 @@ Base.getindex(d::AbstractDocument, term::AbstractString) = ngrams(d)[term]
 
 ##############################################################################
 #
-# top_features() methods
+# top_terms() methods
 #
 ##############################################################################
 
-function top_features(d::AbstractDocument, ::Val{N})  where {N}
+function top_terms(d::AbstractDocument, ::Val{N})  where {N}
     D_pairs = collect(pairs(countmap(tokens(d))))
     n = min(N, length(D_pairs))
+    # Count decreasing, break ties alphabetically
     idx = partialsortperm(D_pairs, 1:n; by = p -> (-p.second, p.first))
     OrderedDict(D_pairs[idx])
 end
-top_features(d::AbstractDocument, n::Int) = top_features(d, Val(n))
\ No newline at end of file
+top_terms(d::AbstractDocument, n::Int) = top_terms(d, Val(n))
\ No newline at end of file
diff --git a/src/dtm.jl b/src/dtm.jl
index 702fc45c..5df36261 100644
--- a/src/dtm.jl
+++ b/src/dtm.jl
@@ -442,17 +442,17 @@ function merge!(dtm1::DocumentTermMatrix{T}, dtm2::DocumentTermMatrix{T}) where
 end
 
 """
-    top_features(x)
-    top_features(x, n)
+    top_terms(x)
+    top_terms(x, n)
 
 Return terms sorted in descending frequency. With `n`, return only the top `n` terms.
 Accepts a `Corpus`, `AbstractDocument`, lexicon `Dict`, or `DocumentTermMatrix`.
 Ties are sorted alphabetically.
 """
-function top_features(D::DocumentTermMatrix, ::Val{N}) where {N}
+function top_terms(D::DocumentTermMatrix, ::Val{N}) where {N}
     counts = @view(sum(D.dtm; dims=1)[1, :])
     n = min(N, length(counts))
     idx = partialsortperm(counts, 1:n; rev=true)
     OrderedDict(zip(D.terms[idx], counts[idx]))
 end
-top_features(D::DocumentTermMatrix, n::Int) = top_features(D, Val(n))
\ No newline at end of file
+top_terms(D::DocumentTermMatrix, n::Int) = top_terms(D, Val(n))
\ No newline at end of file
diff --git a/test/corpus.jl b/test/corpus.jl
index 212ffd12..3495697b 100644
--- a/test/corpus.jl
+++ b/test/corpus.jl
@@ -39,7 +39,7 @@
     update_lexicon!(crps)
     answer = Dict("1" => 2, "2" => 1, "4" => 1)
 
-    @test top_features(crps, 1) == top_features(crps[1], 1)
+    @test top_terms(crps, 1) == top_terms(crps[1], 1)
 
     @test answer == lexicon(crps)
 end
diff --git a/test/document.jl b/test/document.jl
index f3955070..4af83db3 100644
--- a/test/document.jl
+++ b/test/document.jl
@@ -67,10 +67,10 @@
     @test "To" in keys(ngrams(ngd))
 
     # Test top features
-    top = top_features(sd, 5)
+    top = top_terms(sd, 5)
     @test collect(keys(top)) == ["be", "To", "not", "or", "to"]
     @test collect(values(top)) == [2, 1, 1, 1, 1]
-    @test top_features(sd, 2) == OrderedDict("be" => 2, "To" => 1)
+    @test top_terms(sd, 2) == OrderedDict("be" => 2, "To" => 1)
 
     sd = StringDocument(hamlet_text)
     td = TokenDocument(hamlet_text)
diff --git a/test/dtm.jl b/test/dtm.jl
index 45f31527..8292152b 100644
--- a/test/dtm.jl
+++ b/test/dtm.jl
@@ -110,11 +110,11 @@
     @test size(dtm2.dtm) == (2, 4)
     @test sum(dtm2.dtm, dims=(1,)) == [1 2 2 1]
 
-    # Test top_features
+    # Test top_terms
     crps3 = Corpus([FileDocument(sample_file)])
     update_lexicon!(crps3)
     m3 = DocumentTermMatrix(crps3)
-    top5 = top_features(m3, 5)
+    top5 = top_terms(m3, 5)
     @test top5 isa OrderedDict
     @test collect(keys(top5)) == [",", "thou", "And", "and", ";"] 
     @test collect(values(top5)) == [29, 6, 5, 5, 3]

From fc76728629f15614a19d5526d93d06d68d9761bc Mon Sep 17 00:00:00 2001
From: James Alster <jamesalster2@gmail.com>
Date: Tue, 17 Mar 2026 13:09:41 +0000
Subject: [PATCH 7/7] Fix dtm method for top_terms

---
 src/dtm.jl | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/src/dtm.jl b/src/dtm.jl
index 5df36261..b2ab553e 100644
--- a/src/dtm.jl
+++ b/src/dtm.jl
@@ -451,8 +451,10 @@ Ties are sorted alphabetically.
 """
 function top_terms(D::DocumentTermMatrix, ::Val{N}) where {N}
     counts = @view(sum(D.dtm; dims=1)[1, :])
-    n = min(N, length(counts))
-    idx = partialsortperm(counts, 1:n; rev=true)
-    OrderedDict(zip(D.terms[idx], counts[idx]))
+    D_pairs = D.terms .=> counts
+    n = min(N, length(D_pairs))
+    # Count decreasing, break ties alphabetically
+    idx = partialsortperm(D_pairs, 1:n; by = p -> (-p.second, p.first))
+    OrderedDict(D_pairs[idx])
 end
 top_terms(D::DocumentTermMatrix, n::Int) = top_terms(D, Val(n))
\ No newline at end of file