From beaa0c6751209964a1045a931b0be0a92bb728fe Mon Sep 17 00:00:00 2001 From: Markus Himmel Date: Fri, 3 Oct 2025 07:39:07 +0200 Subject: [PATCH 01/11] Update toolchain --- lean-toolchain | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lean-toolchain b/lean-toolchain index c7e310c..142d469 100644 --- a/lean-toolchain +++ b/lean-toolchain @@ -1 +1 @@ -leanprover/lean4:v4.23.0-rc1 +leanprover/lean4:nightly-2025-10-02 From aec5fa88bc4295c36769fdaec9a03d1b26ea55cf Mon Sep 17 00:00:00 2001 From: Markus Himmel Date: Fri, 3 Oct 2025 07:39:12 +0200 Subject: [PATCH 02/11] Fix --- BibtexQuery/String.lean | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/BibtexQuery/String.lean b/BibtexQuery/String.lean index 1b7c453..9041f43 100644 --- a/BibtexQuery/String.lean +++ b/BibtexQuery/String.lean @@ -121,13 +121,13 @@ def Char.asciify : Char → Char def String.asciify (s : String) : String := s.map Char.asciify ---#eval "Dès Noël où un zéphyr haï me vêt de glaçons würmiens, je dîne d'exquis rôtis de +--#eval "Dès Noël où un zéphyr haï me vêt de glaçons würmiens, je dîne d'exquis rôtis de --bœuf au kir à l'aÿ d'âge mûr & cætera".asciify --#eval "Testfile aisdfjoai".foldl (fun s c => s ++ "A") "" --#eval '{'.asciify.toLower -def String.flattenWords (s : String) : String := s.foldl +def String.flattenWords (s : String) : String := s.foldl (fun s c => s ++ (if c.asciify.toLower.isAlphanum then c.asciify.toLower.toString else "")) "" --#eval "Frédéric Dupuis, Marco {T}omamichel".flattenWords @@ -160,9 +160,9 @@ partial def Substring.containsSubstrStartingAt (s : Substring) (q : String) : Bo if s.toString.length = 0 then q.length = 0 else if q.isPrefixOf s.toString then true else (s.drop 1).containsSubstrStartingAt q - + def String.containsSubstr (s : String) (q : String) : Bool := s.toSubstring.containsSubstrStartingAt q def String.pad (s : String) (c : Char) (n : Nat) : String := - (s ++ ⟨List.replicate n c⟩).take n + (s ++ (List.replicate n c).asString).take n From b2701230f0ea034881d2cb5228d1706de2e5b67c Mon Sep 17 00:00:00 2001 From: Markus Himmel Date: Fri, 3 Oct 2025 07:41:11 +0200 Subject: [PATCH 03/11] Fix --- BibtexQuery/ParsecExtra.lean | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/BibtexQuery/ParsecExtra.lean b/BibtexQuery/ParsecExtra.lean index 7702090..bc45430 100644 --- a/BibtexQuery/ParsecExtra.lean +++ b/BibtexQuery/ParsecExtra.lean @@ -27,7 +27,7 @@ def _root_.String.parse? [Inhabited α] (s : String) (p : Parser α) : Option α | .success _ x => some x | .error _ _ => none -def _root_.String.parseDebug [Inhabited α] (s : String) (p : Parser α) : Option (α × String.Pos) := +def _root_.String.parseDebug [Inhabited α] (s : String) (p : Parser α) : Option (α × String.Pos.Raw) := match p s.iter with | .success pos x => some ⟨x, pos.i⟩ | .error _ _ => none From f7567957b70829bdad6cf62f52f9f6dfbc6ae169 Mon Sep 17 00:00:00 2001 From: Kim Morrison Date: Tue, 18 Nov 2025 15:24:03 +1100 Subject: [PATCH 04/11] bump to nightly-2025-11-11 --- lean-toolchain | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lean-toolchain b/lean-toolchain index 1d4c9d4..dd1a8dd 100644 --- a/lean-toolchain +++ b/lean-toolchain @@ -1 +1 @@ -leanprover/lean4:v4.25.0 \ No newline at end of file +leanprover/lean4:nightly-2025-11-11 \ No newline at end of file From 7e99c004d012587e269abdc1660cf04514e9127d Mon Sep 17 00:00:00 2001 From: Kim Morrison Date: Tue, 18 Nov 2025 15:57:54 +1100 Subject: [PATCH 05/11] chore: bump to nightly-2025-11-17 This diff is entirely written by Claude. It looks mostly sane, but without any test framework it's hard to be sure. :-) --- BibtexQuery/Format.lean | 6 +++--- BibtexQuery/Main.lean | 20 ++++++++++++-------- BibtexQuery/Name.lean | 10 +++++----- BibtexQuery/ParsecExtra.lean | 23 ++++++++++++++++------- BibtexQuery/String.lean | 19 ++++++++++++------- BibtexQuery/TexDiacritics.lean | 18 +++++++++--------- lake-manifest.json | 4 ++-- lakefile.toml | 2 +- lean-toolchain | 2 +- 9 files changed, 61 insertions(+), 43 deletions(-) diff --git a/BibtexQuery/Format.lean b/BibtexQuery/Format.lean index 0287810..a38f933 100644 --- a/BibtexQuery/Format.lean +++ b/BibtexQuery/Format.lean @@ -58,7 +58,7 @@ def getDate (tags : Std.HashMap String (Array TexContent)) : Nat × Array Conten if let .some yearTex := tags["year"]? then let yearHtml := TexContent.toHtmlArray yearTex if let .some year := (TexContent.toPlaintextArray yearTex).toList.filter - Char.isDigit |> String.mk |>.toNat? then + Char.isDigit |> String.ofList |>.toNat? then let month : Nat := if let .some monthTex := tags["month"]? then let monthStr := (TexContent.toPlaintextArray monthTex).trim.toLower @@ -123,9 +123,9 @@ def ProcessedEntry.ofEntry (e : Entry) : Except String (Option ProcessedEntry) : if x.name = "pubmed" ∨ x.name = "doi" ∨ x.name = "eprint" ∨ x.name.endsWith "url" then .ok (x.name, #[.normal s]) else - match texContents s.iter with + match texContents ⟨s, s.startValidPos⟩ with | .success _ arr => .ok (x.name, arr) - | .error it err => .error s!"failed to run texContents on '{it.1}' at pos {it.2}: {err}" + | .error it err => .error s!"failed to run texContents on '{it.1}' at pos {Input.pos it}: {err}" let tags := Std.HashMap.ofList lst let authors := processNames (tags.getD "author" #[]) let editors := processNames (tags.getD "editor" #[]) diff --git a/BibtexQuery/Main.lean b/BibtexQuery/Main.lean index 203ff76..5d44a47 100644 --- a/BibtexQuery/Main.lean +++ b/BibtexQuery/Main.lean @@ -97,25 +97,29 @@ def main : List String → IO Unit | ["--help", _] => printHelp | ["d", fname] => do IO.println s!"Reading {fname} to find doubled keys" - let parsed := BibtexQuery.Parser.bibtexFile (←IO.FS.readFile fname).iter + let s := ← IO.FS.readFile fname + let parsed := BibtexQuery.Parser.bibtexFile ⟨s, s.startValidPos⟩ match parsed with | .success _pos res => let lst := listDoublons res IO.println lst - | .error pos err => IO.eprintln s!"Parse error at line {pos.lineNumber}: {err}" + | .error pos err => IO.eprintln s!"Parse error at line {lineNumberOfValidPos pos}: {err}" | ["l", fname] => do - let parsed := BibtexQuery.Parser.bibtexFile (←IO.FS.readFile fname).iter + let s := ← IO.FS.readFile fname + let parsed := BibtexQuery.Parser.bibtexFile ⟨s, s.startValidPos⟩ match parsed with | .success _pos res => printEntries res - | .error pos err => IO.eprint s!"Parse error at line {pos.lineNumber}: {err}" + | .error pos err => IO.eprint s!"Parse error at line {lineNumberOfValidPos pos}: {err}" | "q" :: (fname :: queries) => do - let parsed := BibtexQuery.Parser.bibtexFile (←IO.FS.readFile fname).iter + let s := ← IO.FS.readFile fname + let parsed := BibtexQuery.Parser.bibtexFile ⟨s, s.startValidPos⟩ match parsed with | .success _pos res => printMatchingEntries res $ queries.filterMap Query.ofString - | .error pos err => IO.eprint s!"Parse error at line {pos.lineNumber}: {err}" + | .error pos err => IO.eprint s!"Parse error at line {lineNumberOfValidPos pos}: {err}" | "c" :: (fname :: queries) => do - let parsed := BibtexQuery.Parser.bibtexFile (←IO.FS.readFile fname).iter + let s := ← IO.FS.readFile fname + let parsed := BibtexQuery.Parser.bibtexFile ⟨s, s.startValidPos⟩ match parsed with | .success _pos res => printMatchingCitations res $ queries.filterMap Query.ofString - | .error pos err => IO.eprint s!"Parse error at line {pos.lineNumber}: {err}" + | .error pos err => IO.eprint s!"Parse error at line {lineNumberOfValidPos pos}: {err}" | _ => do IO.eprintln "Invalid command-line arguments"; printHelp diff --git a/BibtexQuery/Name.lean b/BibtexQuery/Name.lean index 07b5784..2846d63 100644 --- a/BibtexQuery/Name.lean +++ b/BibtexQuery/Name.lean @@ -92,7 +92,7 @@ def stripDiacritics (c : Char) : Char := /-- Strip diacritics from a string. -/ def stripDiacriticsFromString (s : String) : String := s.toList.toArray.map stripDiacritics |>.filter (not <| GeneralCategory.isMark ·) - |>.toList |> String.mk + |>.toList |> String.ofList /-- Get the array of alphabets of a string after stripping diacritics. -/ def getAlphabets (s : String) : Array Char := @@ -136,16 +136,16 @@ def getLastNameAbbr (arr : Array String) : String × String := let arr : Array Nat := s.zipIdx.filterMap fun x => if isUppercase x.1 then .some x.2 else .none if arr.size = 2 ∧ arr[0]! + 2 = arr[1]! then - let s := s.toSubarray.drop arr[0]! |>.take 3 |>.toArray.toList |> String.mk + let s := s.toSubarray.drop arr[0]! |>.take 3 |>.toArray.toList |> String.ofList (s, s) else if arr.size ≥ 2 then - let s := arr.map (s[·]!) |>.toList |> String.mk + let s := arr.map (s[·]!) |>.toList |> String.ofList (s, s) else - let s := String.mk s.toList + let s := String.ofList s.toList (s.take 1, s.take 3) | _ => - let s := arr.filterMap (getAlphabets · |> (·[0]?)) |>.toList |> String.mk + let s := arr.filterMap (getAlphabets · |> (·[0]?)) |>.toList |> String.ofList (s, s) /-- Represents the name of a person in bibtex author field. -/ diff --git a/BibtexQuery/ParsecExtra.lean b/BibtexQuery/ParsecExtra.lean index bc45430..af929d4 100644 --- a/BibtexQuery/ParsecExtra.lean +++ b/BibtexQuery/ParsecExtra.lean @@ -23,13 +23,13 @@ open Lean Std.Internal.Parsec.String Std.Internal.Parsec namespace BibtexQuery.ParsecExtra def _root_.String.parse? [Inhabited α] (s : String) (p : Parser α) : Option α := - match p s.iter with + match p ⟨s, s.startValidPos⟩ with | .success _ x => some x | .error _ _ => none def _root_.String.parseDebug [Inhabited α] (s : String) (p : Parser α) : Option (α × String.Pos.Raw) := - match p s.iter with - | .success pos x => some ⟨x, pos.i⟩ + match p ⟨s, s.startValidPos⟩ with + | .success pos x => some ⟨x, Input.pos pos⟩ | .error _ _ => none @[inline] @@ -110,10 +110,19 @@ def natNum : Parser Nat := attempt do return n def manyCharsUntilWithPrev (test : Option Char → Char → Bool) : Parser String := fun it => - let out := - it.foldUntil "" fun acc c => + let ⟨res, pos⟩ := go it.1 it.2 "" + .success ⟨_, pos⟩ res +where + go {s : String} (str : String) (it : s.ValidPos) (acc : String) : String × s.ValidPos := + if h : ¬it.IsAtEnd then + let c := it.get h let prev : Option Char := if acc == "" then none else acc.back - if test prev c then none else some (acc ++ c.toString) - .success out.2 out.1 + if test prev c then + (acc, it) + else + go str (it.next h) (acc ++ c.toString) + else + (acc, it) + termination_by it end BibtexQuery.ParsecExtra diff --git a/BibtexQuery/String.lean b/BibtexQuery/String.lean index 7861ec9..bf39fab 100644 --- a/BibtexQuery/String.lean +++ b/BibtexQuery/String.lean @@ -11,8 +11,13 @@ This file contains various string processing functions. -/ /-- Get the line number of the current position of the iterator. -/ -def String.Iterator.lineNumber (it : String.Iterator) : Nat := - let s : Substring := ⟨it.toString, 0, it.pos⟩ +def String.Legacy.Iterator.lineNumber (it : String.Legacy.Iterator) : Nat := + let s : Substring.Raw := ⟨it.toString, 0, it.pos⟩ + s.foldl (fun n c => if c = '\n' then n+1 else n) 1 + +/-- Get the line number of the current position for ValidPos. -/ +def lineNumberOfValidPos (it : Sigma String.ValidPos) : Nat := + let s : Substring.Raw := ⟨it.1, 0, it.2.offset⟩ s.foldl (fun n c => if c = '\n' then n+1 else n) 1 /-- Strip diacritics from a string. -/ @@ -156,13 +161,13 @@ def String.toFirstnameLastname (s : String) : String := def String.toFullNames (s : String) : String := String.join $ (s.splitIntoNames.map String.toFirstnameLastname).map String.flattenWords -partial def Substring.containsSubstrStartingAt (s : Substring) (q : String) : Bool := - if s.toString.length = 0 then q.length = 0 - else if q.isPrefixOf s.toString then true - else (s.drop 1).containsSubstrStartingAt q +partial def Substring.Raw.containsSubstrStartingAt (s : Substring.Raw) (q : String) : Bool := + if (Substring.Raw.toString s).length = 0 then q.length = 0 + else if q.isPrefixOf (Substring.Raw.toString s) then true + else (Substring.Raw.drop s 1).containsSubstrStartingAt q def String.containsSubstr (s : String) (q : String) : Bool := s.toSubstring.containsSubstrStartingAt q def String.pad (s : String) (c : Char) (n : Nat) : String := - (s ++ (List.replicate n c).asString).take n + (s ++ String.ofList (List.replicate n c)).take n diff --git a/BibtexQuery/TexDiacritics.lean b/BibtexQuery/TexDiacritics.lean index 9d84726..b4a1a97 100644 --- a/BibtexQuery/TexDiacritics.lean +++ b/BibtexQuery/TexDiacritics.lean @@ -255,8 +255,8 @@ partial def mathContentAux : Parser String := do | '\\' | '$' | '{' | '}' => false | _ => true let doOne : Parser (Option String) := fun it => - if it.hasNext then - match it.curr with + if Input.hasNext it then + match Input.curr it with | '{' => (.some <$> bracedContent mathContentAux) it | '\\' => match texCommand' #["\\(", "\\)", "\\[", "\\]"] it with @@ -272,14 +272,14 @@ partial def mathContentAux : Parser String := do def mathContent : Parser (Option TexContent) := fun it => let aux (beginning ending : String) : Parser String := pstring beginning *> mathContentAux <* pstring ending - let substr := it.extract (it.forward 2) + let substr := it.2.extract (it.2.nextn 2) if substr = "\\[" then ((.some <| .math "$$" ·) <$> aux "\\[" "\\]") it else if substr = "\\(" then ((.some <| .math "$" ·) <$> aux "\\(" "\\)") it else if substr = "$$" then ((.some <| .math "$$" ·) <$> aux "$$" "$$") it - else if it.curr = '$' then + else if Input.curr it = '$' then ((.some <| .math "$" ·) <$> aux "$" "$") it else .success it .none @@ -289,8 +289,8 @@ partial def rawContentAux : Parser String := do | '\\' | '{' | '}' => false | _ => true let doOne : Parser (Option String) := fun it => - if it.hasNext then - match it.curr with + if Input.hasNext it then + match Input.curr it with | '{' => (.some <$> bracedContent rawContentAux) it | '\\' => (.some <$> texCommand) it | '}' => .success it .none @@ -360,10 +360,10 @@ partial def texContent : Parser (Option TexContent) := fun it => match ret with | .some ret => .success it (.some ret) | .none => - if it.hasNext then - match it.curr with + if Input.hasNext it then + match Input.curr it with | ' ' | '\t' | '\r' | '\n' => ((fun _ => .some (.char ' ')) <$> ws) it - | ',' => .success it.next <| .some <| .char it.curr + | ',' => .success (Input.next it) (.some (.char (Input.curr it))) | '\\' => texDiacriticsCommand texContent it | '{' => ((.some <| .braced ·) <$> (pchar '{' *> manyOptions texContent <* pchar '}')) it | '}' => .success it .none diff --git a/lake-manifest.json b/lake-manifest.json index c659485..c190ade 100644 --- a/lake-manifest.json +++ b/lake-manifest.json @@ -5,10 +5,10 @@ "type": "git", "subDir": null, "scope": "", - "rev": "c120f43ef14fb577096e39ebed0cb25af2b5682d", + "rev": "378b62cdf3bb90d3cd9aa12dc2be43b42bdda339", "name": "UnicodeBasic", "manifestFile": "lake-manifest.json", - "inputRev": "main", + "inputRev": "nightly-testing", "inherited": false, "configFile": "lakefile.lean"}], "name": "BibtexQuery", diff --git a/lakefile.toml b/lakefile.toml index 555ac1d..fa530bf 100644 --- a/lakefile.toml +++ b/lakefile.toml @@ -4,7 +4,7 @@ defaultTargets = ["BibtexQuery", "«bibtex-query»"] [[require]] name = "UnicodeBasic" git = "https://github.com/fgdorais/lean4-unicode-basic" -rev = "main" +rev = "nightly-testing" [[lean_lib]] name = "BibtexQuery" diff --git a/lean-toolchain b/lean-toolchain index dd1a8dd..0fac186 100644 --- a/lean-toolchain +++ b/lean-toolchain @@ -1 +1 @@ -leanprover/lean4:nightly-2025-11-11 \ No newline at end of file +leanprover/lean4:nightly-2025-11-17 \ No newline at end of file From 9f78601c5c3fac7f4539c172e423657feaf8c891 Mon Sep 17 00:00:00 2001 From: Kim Morrison Date: Tue, 18 Nov 2025 16:00:58 +1100 Subject: [PATCH 06/11] bump to v4.26.0-rc1 --- lean-toolchain | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lean-toolchain b/lean-toolchain index 0fac186..fbd2c39 100644 --- a/lean-toolchain +++ b/lean-toolchain @@ -1 +1 @@ -leanprover/lean4:nightly-2025-11-17 \ No newline at end of file +leanprover/lean4:v4.26.0-rc1 \ No newline at end of file From 58259e0f1589a2bad613f97d6c74a80716054bea Mon Sep 17 00:00:00 2001 From: Markus Himmel Date: Thu, 20 Nov 2025 07:58:01 +0000 Subject: [PATCH 07/11] fix: check if at end before calling `Input.curr` on parsec parser --- BibtexQuery/TexDiacritics.lean | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/BibtexQuery/TexDiacritics.lean b/BibtexQuery/TexDiacritics.lean index b4a1a97..fd957e3 100644 --- a/BibtexQuery/TexDiacritics.lean +++ b/BibtexQuery/TexDiacritics.lean @@ -279,7 +279,7 @@ def mathContent : Parser (Option TexContent) := fun it => ((.some <| .math "$" ·) <$> aux "\\(" "\\)") it else if substr = "$$" then ((.some <| .math "$$" ·) <$> aux "$$" "$$") it - else if Input.curr it = '$' then + else if Input.hasNext it && Input.curr it = '$' then ((.some <| .math "$" ·) <$> aux "$" "$") it else .success it .none From 46c47b6438bd5e87b7fa457c05e9e67d67e049a5 Mon Sep 17 00:00:00 2001 From: Markus Himmel Date: Wed, 26 Nov 2025 05:21:11 +0100 Subject: [PATCH 08/11] chore: update `nightly-testing` to `nightly-2025-11-24` (#28) --- BibtexQuery/Format.lean | 12 ++++++------ BibtexQuery/Name.lean | 4 ++-- BibtexQuery/Parser.lean | 2 +- BibtexQuery/Query.lean | 10 +++++----- BibtexQuery/String.lean | 23 +++++++++-------------- BibtexQuery/TexDiacritics.lean | 18 +++++++++--------- lake-manifest.json | 2 +- lean-toolchain | 2 +- 8 files changed, 34 insertions(+), 39 deletions(-) diff --git a/BibtexQuery/Format.lean b/BibtexQuery/Format.lean index a38f933..c590990 100644 --- a/BibtexQuery/Format.lean +++ b/BibtexQuery/Format.lean @@ -61,7 +61,7 @@ def getDate (tags : Std.HashMap String (Array TexContent)) : Nat × Array Conten Char.isDigit |> String.ofList |>.toNat? then let month : Nat := if let .some monthTex := tags["month"]? then - let monthStr := (TexContent.toPlaintextArray monthTex).trim.toLower + let monthStr := (TexContent.toPlaintextArray monthTex).trimAscii.copy.toLower match monthStr with | "jan" => 1 | "feb" => 2 | "mar" => 3 | "apr" => 4 | "may" => 5 | "jun" => 6 @@ -100,7 +100,7 @@ def getTag (authors : Array BibtexName) (date : Nat) : String := authors.map (·.oneLetterAbbr) |>.toList |> String.join else authors.map (·.threeLetterAbbr) |>.toList |> String.join - let dateString := if date > 0 then (toString (date / 100 + 100)).takeRight 2 else "" + let dateString := if date > 0 then (toString (date / 100 + 100)).takeEnd 2 else "" "[" ++ authorString ++ dateString ++ "]" partial def removeDuplicatedSpacesAux (s : String) : String := @@ -111,7 +111,7 @@ partial def removeDuplicatedSpacesAux (s : String) : String := s def removeDuplicatedSpaces (s : String) : String := - s.replace "\r" " " |>.replace "\n" " " |>.replace "\t" " " |>.trim |> removeDuplicatedSpacesAux + s.replace "\r" " " |>.replace "\n" " " |>.replace "\t" " " |>.trimAscii |>.copy |> removeDuplicatedSpacesAux /-- Get a `ProcessedEntry` from an `Entry`, computes all its field except for `html`. If the input is not `BibtexQuery.Entry.normalType`, returns `Option.none`. -/ @@ -243,7 +243,7 @@ def mkWebRef (urlPrefix namePrefix url : String) : Array Content := url.drop namePrefix.length else url - if s.toLower.startsWith "http" then + if s.copy.toLower.startsWith "http" then -- the url does not starts with `urlPrefix` or `namePrefix`, -- but still starts with "http" mkUrl url @@ -690,10 +690,10 @@ partial def deduplicateTagAux if let .some (first, count) := x.2[tag]? then let z : Array String := if count = 0 then - x.1.modify first fun x => x.dropRight 1 ++ "a]" + x.1.modify first fun x => (x.dropEnd 1).copy ++ "a]" else x.1 - let z := z.modify i fun x => x.dropRight 1 ++ toBase26 (count + 1) ++ "]" + let z := z.modify i fun x => (x.dropEnd 1).copy ++ toBase26 (count + 1) ++ "]" (z, x.2.insert tag (first, count + 1)) else (x.1, x.2.insert tag (i, 0)) diff --git a/BibtexQuery/Name.lean b/BibtexQuery/Name.lean index 2846d63..08f5a85 100644 --- a/BibtexQuery/Name.lean +++ b/BibtexQuery/Name.lean @@ -87,7 +87,7 @@ def stripDiacritics (c : Char) : Char := | '\u00DF' => 's' | '\u1E9E' => 'S' | _ => let s := getCanonicalDecomposition c - String.Pos.Raw.get? s (s.find fun c => getCanonicalCombiningClass c == 0) |>.getD c + s.find (fun c => getCanonicalCombiningClass c == 0) |>.get? |>.getD c /-- Strip diacritics from a string. -/ def stripDiacriticsFromString (s : String) : String := @@ -143,7 +143,7 @@ def getLastNameAbbr (arr : Array String) : String × String := (s, s) else let s := String.ofList s.toList - (s.take 1, s.take 3) + (s.take 1 |>.copy, s.take 3 |>.copy) | _ => let s := arr.filterMap (getAlphabets · |> (·[0]?)) |>.toList |> String.ofList (s, s) diff --git a/BibtexQuery/Parser.lean b/BibtexQuery/Parser.lean index 0105601..b1e800f 100644 --- a/BibtexQuery/Parser.lean +++ b/BibtexQuery/Parser.lean @@ -43,7 +43,7 @@ partial def bracedContentTail (acc : String) : Parser String := attempt do def bracedContent : Parser String := attempt do skipChar '{' let s ← bracedContentTail "" - return s.dropRight 1 + return s.dropEnd 1 |>.copy def quotedContent : Parser String := attempt do skipChar '"' diff --git a/BibtexQuery/Query.lean b/BibtexQuery/Query.lean index 58479db..b279eb3 100644 --- a/BibtexQuery/Query.lean +++ b/BibtexQuery/Query.lean @@ -24,11 +24,11 @@ inductive Query where deriving Repr, Inhabited def Query.ofString (s : String) : Option Query := - if s.startsWith "k." then some <| .key <| s.drop 2 - else if s.startsWith "a." then some <| .author <| s.drop 2 - else if s.startsWith "t." then some <| .title <| s.drop 2 - else if s.startsWith "w." then some <| .word <| s.drop 2 - else if s.startsWith "c." then some <| .class <| s.drop 2 + if s.startsWith "k." then some <| .key <| s.drop 2 |>.copy + else if s.startsWith "a." then some <| .author <| s.drop 2 |>.copy + else if s.startsWith "t." then some <| .title <| s.drop 2 |>.copy + else if s.startsWith "w." then some <| .word <| s.drop 2 |>.copy + else if s.startsWith "c." then some <| .class <| s.drop 2 |>.copy else none def Entry.matchQuery (e : Entry) (q : Query) : Bool := diff --git a/BibtexQuery/String.lean b/BibtexQuery/String.lean index bf39fab..578d52f 100644 --- a/BibtexQuery/String.lean +++ b/BibtexQuery/String.lean @@ -138,24 +138,19 @@ def String.flattenWords (s : String) : String := s.foldl --#eval "Frédéric Dupuis, Marco {T}omamichel".flattenWords def String.splitIntoNames (s : String) : List String := - (s.splitOn (sep := " and ")).map trim + (s.splitOn (sep := " and ")).map (String.Slice.copy ∘ trimAscii) -def String.toLastName (s : String) : String := - let s' := (s.splitToList (fun c => c = ',')).map trim - match s' with - | [s₁] => s₁ - | (s₁ :: _) => s₁ - | _ => "" +def String.toLastName (s : String) : String.Slice := + s.split ',' |>.find? (fun _ => true) |>.getD "".toSlice def String.toLastNames (s : String) : String := - String.intercalate " " $ s.splitIntoNames.map String.toLastName + " ".toSlice.intercalate $ s.splitIntoNames.map String.toLastName /-- Standardize to "Firstname Lastname" -/ def String.toFirstnameLastname (s : String) : String := - let s' := (s.splitToList (fun c => c = ',')).map trim - match s' with - | [s₁] => s₁ - | [s₁, s₂] => s₂ ++ " " ++ s₁ + match s.split ',' |>.map String.Slice.trimAscii |>.toList with + | [s₁] => s₁.copy + | [s₁, s₂] => s₂.copy ++ " " ++ s₁ | _ => "" def String.toFullNames (s : String) : String := @@ -167,7 +162,7 @@ partial def Substring.Raw.containsSubstrStartingAt (s : Substring.Raw) (q : Stri else (Substring.Raw.drop s 1).containsSubstrStartingAt q def String.containsSubstr (s : String) (q : String) : Bool := - s.toSubstring.containsSubstrStartingAt q + s.toRawSubstring.containsSubstrStartingAt q def String.pad (s : String) (c : Char) (n : Nat) : String := - (s ++ String.ofList (List.replicate n c)).take n + (s ++ String.ofList (List.replicate n c)).take n |>.copy diff --git a/BibtexQuery/TexDiacritics.lean b/BibtexQuery/TexDiacritics.lean index fd957e3..5077196 100644 --- a/BibtexQuery/TexDiacritics.lean +++ b/BibtexQuery/TexDiacritics.lean @@ -50,7 +50,7 @@ partial def addDiacritics (x : TexContent) (ch : String) : if s.isEmpty then throw "expected a non-empty normal string, but got ''" else if GeneralCategory.isLetter s.front then - return .normal <| s.take 1 ++ ch ++ s.drop 1 + return .normal <| (s.take 1).copy ++ ch ++ s.drop 1 else throw s!"diacritics character can only be added after a letter, but got '{s.front}'" | .char c => throw s!"expected a non-empty normal string, but got '{c}'" @@ -142,7 +142,7 @@ partial def toHtml (x : TexContent) : Array Content := | _ => .Character c.toString #[ret] | .command cmd => - let ret : Content := match cmd.trim with + let ret : Content := match cmd.trimAscii with | "\\\\" => .Element ⟨ "br", empty, #[] ⟩ | _ => .Element ⟨ "span", empty.insert "style" "color:red;", #[.Character cmd] ⟩ #[ret] @@ -154,7 +154,7 @@ partial def toHtmlArray (arr : Array TexContent) (i : Nat := 0) if h : i < arr.size then if h' : i + 1 < arr.size then if let .command cmd := arr[i] then - match cmd.trim with + match cmd.trimAscii with | "\\url" => let next := arr[i + 1] let x : Content := .Element ⟨ "a", empty.insert "href" @@ -233,8 +233,8 @@ def texCommand : Parser String := pchar '\\' *> attempt do /-- Similar to `texCommand` but it excludes some commands. -/ def texCommand' (exclude : Array String) : Parser String := attempt do let s ← texCommand - match exclude.find? (· == s.trim) with - | .some _ => fail s!"'{s.trim}' is not allowed" + match exclude.find? (·.toSlice == s.trimAscii) with + | .some _ => fail s!"'{s.trimAscii}' is not allowed" | .none => return s /-- Match a sequence starting with `{` and ending with `}`. -/ @@ -304,16 +304,16 @@ Sometimes it needs to read the contents after the command, in this case the `p` def texDiacriticsCommand (p : Parser (Option TexContent)) : Parser (Option TexContent) := do let cmd ← texCommand -- some special commands - if cmd.trim = "\\url" then + if cmd.trimAscii == "\\url" then let s ← pchar '{' *> rawContentAux <* pchar '}' return .some <| .braced #[.command cmd, .braced <| #[.normal s]] -- some special characters need to put into `` - let c : Char := match cmd.trim with + let c : Char := match cmd.trimAscii.copy with | "\\$" => '$' | "\\textbackslash" => '\\' | _ => ' ' if c ≠ ' ' then return .some <| .char c -- some other characters - let s : String := match cmd.trim with + let s : String := match cmd.trimAscii with | "\\oe" => "œ" | "\\OE" => "Œ" | "\\ae" => "æ" | "\\AE" => "Æ" | "\\aa" => "å" | "\\AA" => "Å" @@ -329,7 +329,7 @@ def texDiacriticsCommand (p : Parser (Option TexContent)) : Parser (Option TexCo | _ => "" if not s.isEmpty then return .some <| .normal s -- diacritics characters - let s : String := match cmd.trim with + let s : String := match cmd.trimAscii with | "\\`" => "\u0300" | "\\'" => "\u0301" | "\\^" => "\u0302" | "\\\"" => "\u0308" | "\\~" => "\u0303" | "\\=" => "\u0304" diff --git a/lake-manifest.json b/lake-manifest.json index c190ade..03828b3 100644 --- a/lake-manifest.json +++ b/lake-manifest.json @@ -5,7 +5,7 @@ "type": "git", "subDir": null, "scope": "", - "rev": "378b62cdf3bb90d3cd9aa12dc2be43b42bdda339", + "rev": "14607afb6759e27a1e9f31cbcdb30b5eb7dfd977", "name": "UnicodeBasic", "manifestFile": "lake-manifest.json", "inputRev": "nightly-testing", diff --git a/lean-toolchain b/lean-toolchain index fbd2c39..45cddb2 100644 --- a/lean-toolchain +++ b/lean-toolchain @@ -1 +1 @@ -leanprover/lean4:v4.26.0-rc1 \ No newline at end of file +leanprover/lean4:nightly-2025-11-24 From 0da8fbaa24d483408d73ec3a6747dc372f19d5a3 Mon Sep 17 00:00:00 2001 From: Kim Morrison Date: Wed, 10 Dec 2025 10:08:46 +1100 Subject: [PATCH 09/11] fix deprecations --- BibtexQuery/Format.lean | 2 +- BibtexQuery/Main.lean | 16 ++++++++-------- BibtexQuery/ParsecExtra.lean | 6 +++--- BibtexQuery/String.lean | 4 ++-- BibtexQuery/TexDiacritics.lean | 2 +- lake-manifest.json | 2 +- lean-toolchain | 2 +- 7 files changed, 17 insertions(+), 17 deletions(-) diff --git a/BibtexQuery/Format.lean b/BibtexQuery/Format.lean index c590990..51d76d7 100644 --- a/BibtexQuery/Format.lean +++ b/BibtexQuery/Format.lean @@ -123,7 +123,7 @@ def ProcessedEntry.ofEntry (e : Entry) : Except String (Option ProcessedEntry) : if x.name = "pubmed" ∨ x.name = "doi" ∨ x.name = "eprint" ∨ x.name.endsWith "url" then .ok (x.name, #[.normal s]) else - match texContents ⟨s, s.startValidPos⟩ with + match texContents ⟨s, s.startPos⟩ with | .success _ arr => .ok (x.name, arr) | .error it err => .error s!"failed to run texContents on '{it.1}' at pos {Input.pos it}: {err}" let tags := Std.HashMap.ofList lst diff --git a/BibtexQuery/Main.lean b/BibtexQuery/Main.lean index 5d44a47..993a4bb 100644 --- a/BibtexQuery/Main.lean +++ b/BibtexQuery/Main.lean @@ -98,28 +98,28 @@ def main : List String → IO Unit | ["d", fname] => do IO.println s!"Reading {fname} to find doubled keys" let s := ← IO.FS.readFile fname - let parsed := BibtexQuery.Parser.bibtexFile ⟨s, s.startValidPos⟩ + let parsed := BibtexQuery.Parser.bibtexFile ⟨s, s.startPos⟩ match parsed with | .success _pos res => let lst := listDoublons res IO.println lst - | .error pos err => IO.eprintln s!"Parse error at line {lineNumberOfValidPos pos}: {err}" + | .error pos err => IO.eprintln s!"Parse error at line {lineNumberOfPos pos}: {err}" | ["l", fname] => do let s := ← IO.FS.readFile fname - let parsed := BibtexQuery.Parser.bibtexFile ⟨s, s.startValidPos⟩ + let parsed := BibtexQuery.Parser.bibtexFile ⟨s, s.startPos⟩ match parsed with | .success _pos res => printEntries res - | .error pos err => IO.eprint s!"Parse error at line {lineNumberOfValidPos pos}: {err}" + | .error pos err => IO.eprint s!"Parse error at line {lineNumberOfPos pos}: {err}" | "q" :: (fname :: queries) => do let s := ← IO.FS.readFile fname - let parsed := BibtexQuery.Parser.bibtexFile ⟨s, s.startValidPos⟩ + let parsed := BibtexQuery.Parser.bibtexFile ⟨s, s.startPos⟩ match parsed with | .success _pos res => printMatchingEntries res $ queries.filterMap Query.ofString - | .error pos err => IO.eprint s!"Parse error at line {lineNumberOfValidPos pos}: {err}" + | .error pos err => IO.eprint s!"Parse error at line {lineNumberOfPos pos}: {err}" | "c" :: (fname :: queries) => do let s := ← IO.FS.readFile fname - let parsed := BibtexQuery.Parser.bibtexFile ⟨s, s.startValidPos⟩ + let parsed := BibtexQuery.Parser.bibtexFile ⟨s, s.startPos⟩ match parsed with | .success _pos res => printMatchingCitations res $ queries.filterMap Query.ofString - | .error pos err => IO.eprint s!"Parse error at line {lineNumberOfValidPos pos}: {err}" + | .error pos err => IO.eprint s!"Parse error at line {lineNumberOfPos pos}: {err}" | _ => do IO.eprintln "Invalid command-line arguments"; printHelp diff --git a/BibtexQuery/ParsecExtra.lean b/BibtexQuery/ParsecExtra.lean index af929d4..6af94df 100644 --- a/BibtexQuery/ParsecExtra.lean +++ b/BibtexQuery/ParsecExtra.lean @@ -23,12 +23,12 @@ open Lean Std.Internal.Parsec.String Std.Internal.Parsec namespace BibtexQuery.ParsecExtra def _root_.String.parse? [Inhabited α] (s : String) (p : Parser α) : Option α := - match p ⟨s, s.startValidPos⟩ with + match p ⟨s, s.startPos⟩ with | .success _ x => some x | .error _ _ => none def _root_.String.parseDebug [Inhabited α] (s : String) (p : Parser α) : Option (α × String.Pos.Raw) := - match p ⟨s, s.startValidPos⟩ with + match p ⟨s, s.startPos⟩ with | .success pos x => some ⟨x, Input.pos pos⟩ | .error _ _ => none @@ -113,7 +113,7 @@ def manyCharsUntilWithPrev (test : Option Char → Char → Bool) : Parser Strin let ⟨res, pos⟩ := go it.1 it.2 "" .success ⟨_, pos⟩ res where - go {s : String} (str : String) (it : s.ValidPos) (acc : String) : String × s.ValidPos := + go {s : String} (str : String) (it : s.Pos) (acc : String) : String × s.Pos := if h : ¬it.IsAtEnd then let c := it.get h let prev : Option Char := if acc == "" then none else acc.back diff --git a/BibtexQuery/String.lean b/BibtexQuery/String.lean index 578d52f..c8e5b85 100644 --- a/BibtexQuery/String.lean +++ b/BibtexQuery/String.lean @@ -15,8 +15,8 @@ def String.Legacy.Iterator.lineNumber (it : String.Legacy.Iterator) : Nat := let s : Substring.Raw := ⟨it.toString, 0, it.pos⟩ s.foldl (fun n c => if c = '\n' then n+1 else n) 1 -/-- Get the line number of the current position for ValidPos. -/ -def lineNumberOfValidPos (it : Sigma String.ValidPos) : Nat := +/-- Get the line number of the current position for Pos. -/ +def lineNumberOfPos (it : Sigma String.Pos) : Nat := let s : Substring.Raw := ⟨it.1, 0, it.2.offset⟩ s.foldl (fun n c => if c = '\n' then n+1 else n) 1 diff --git a/BibtexQuery/TexDiacritics.lean b/BibtexQuery/TexDiacritics.lean index 5077196..79c745c 100644 --- a/BibtexQuery/TexDiacritics.lean +++ b/BibtexQuery/TexDiacritics.lean @@ -272,7 +272,7 @@ partial def mathContentAux : Parser String := do def mathContent : Parser (Option TexContent) := fun it => let aux (beginning ending : String) : Parser String := pstring beginning *> mathContentAux <* pstring ending - let substr := it.2.extract (it.2.nextn 2) + let substr := it.1.extract it.2 (it.2.nextn 2) if substr = "\\[" then ((.some <| .math "$$" ·) <$> aux "\\[" "\\]") it else if substr = "\\(" then diff --git a/lake-manifest.json b/lake-manifest.json index 03828b3..388c73f 100644 --- a/lake-manifest.json +++ b/lake-manifest.json @@ -5,7 +5,7 @@ "type": "git", "subDir": null, "scope": "", - "rev": "14607afb6759e27a1e9f31cbcdb30b5eb7dfd977", + "rev": "85864d2f8b5bb403f884a37da95b57b95d86e169", "name": "UnicodeBasic", "manifestFile": "lake-manifest.json", "inputRev": "nightly-testing", diff --git a/lean-toolchain b/lean-toolchain index 45cddb2..f0201a3 100644 --- a/lean-toolchain +++ b/lean-toolchain @@ -1 +1 @@ -leanprover/lean4:nightly-2025-11-24 +leanprover/lean4:nightly-2025-12-08 \ No newline at end of file From 9e46c658c5dec1346cdcc37863c445e001465a35 Mon Sep 17 00:00:00 2001 From: Kim Morrison Date: Mon, 26 Jan 2026 13:37:43 +1100 Subject: [PATCH 10/11] chore: bump toolchain to v4.28.0-rc1 --- lake-manifest.json | 2 +- lean-toolchain | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/lake-manifest.json b/lake-manifest.json index e00a7f3..b06c32e 100644 --- a/lake-manifest.json +++ b/lake-manifest.json @@ -5,7 +5,7 @@ "type": "git", "subDir": null, "scope": "", - "rev": "b09d573cbacf5a8e767292e0edeec787a81689ce", + "rev": "8668e1ab7c987fb8ed1349f14c3b7b60bd5f27b6", "name": "UnicodeBasic", "manifestFile": "lake-manifest.json", "inputRev": "main", diff --git a/lean-toolchain b/lean-toolchain index 5249182..4bafde2 100644 --- a/lean-toolchain +++ b/lean-toolchain @@ -1 +1 @@ -leanprover/lean4:v4.27.0 +leanprover/lean4:v4.28.0-rc1 From e3f36885f8a43f4afae8752d02c381107a03f40b Mon Sep 17 00:00:00 2001 From: Kim Morrison Date: Mon, 26 Jan 2026 13:46:06 +1100 Subject: [PATCH 11/11] fix: use main branch for UnicodeBasic dependency --- lakefile.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lakefile.toml b/lakefile.toml index fa530bf..555ac1d 100644 --- a/lakefile.toml +++ b/lakefile.toml @@ -4,7 +4,7 @@ defaultTargets = ["BibtexQuery", "«bibtex-query»"] [[require]] name = "UnicodeBasic" git = "https://github.com/fgdorais/lean4-unicode-basic" -rev = "nightly-testing" +rev = "main" [[lean_lib]] name = "BibtexQuery"