diff --git a/BibtexQuery/Format.lean b/BibtexQuery/Format.lean index a38f933..51d76d7 100644 --- a/BibtexQuery/Format.lean +++ b/BibtexQuery/Format.lean @@ -61,7 +61,7 @@ def getDate (tags : Std.HashMap String (Array TexContent)) : Nat × Array Conten Char.isDigit |> String.ofList |>.toNat? then let month : Nat := if let .some monthTex := tags["month"]? then - let monthStr := (TexContent.toPlaintextArray monthTex).trim.toLower + let monthStr := (TexContent.toPlaintextArray monthTex).trimAscii.copy.toLower match monthStr with | "jan" => 1 | "feb" => 2 | "mar" => 3 | "apr" => 4 | "may" => 5 | "jun" => 6 @@ -100,7 +100,7 @@ def getTag (authors : Array BibtexName) (date : Nat) : String := authors.map (·.oneLetterAbbr) |>.toList |> String.join else authors.map (·.threeLetterAbbr) |>.toList |> String.join - let dateString := if date > 0 then (toString (date / 100 + 100)).takeRight 2 else "" + let dateString := if date > 0 then (toString (date / 100 + 100)).takeEnd 2 else "" "[" ++ authorString ++ dateString ++ "]" partial def removeDuplicatedSpacesAux (s : String) : String := @@ -111,7 +111,7 @@ partial def removeDuplicatedSpacesAux (s : String) : String := s def removeDuplicatedSpaces (s : String) : String := - s.replace "\r" " " |>.replace "\n" " " |>.replace "\t" " " |>.trim |> removeDuplicatedSpacesAux + s.replace "\r" " " |>.replace "\n" " " |>.replace "\t" " " |>.trimAscii |>.copy |> removeDuplicatedSpacesAux /-- Get a `ProcessedEntry` from an `Entry`, computes all its field except for `html`. If the input is not `BibtexQuery.Entry.normalType`, returns `Option.none`. -/ @@ -123,7 +123,7 @@ def ProcessedEntry.ofEntry (e : Entry) : Except String (Option ProcessedEntry) : if x.name = "pubmed" ∨ x.name = "doi" ∨ x.name = "eprint" ∨ x.name.endsWith "url" then .ok (x.name, #[.normal s]) else - match texContents ⟨s, s.startValidPos⟩ with + match texContents ⟨s, s.startPos⟩ with | .success _ arr => .ok (x.name, arr) | .error it err => .error s!"failed to run texContents on '{it.1}' at pos {Input.pos it}: {err}" let tags := Std.HashMap.ofList lst @@ -243,7 +243,7 @@ def mkWebRef (urlPrefix namePrefix url : String) : Array Content := url.drop namePrefix.length else url - if s.toLower.startsWith "http" then + if s.copy.toLower.startsWith "http" then -- the url does not starts with `urlPrefix` or `namePrefix`, -- but still starts with "http" mkUrl url @@ -690,10 +690,10 @@ partial def deduplicateTagAux if let .some (first, count) := x.2[tag]? then let z : Array String := if count = 0 then - x.1.modify first fun x => x.dropRight 1 ++ "a]" + x.1.modify first fun x => (x.dropEnd 1).copy ++ "a]" else x.1 - let z := z.modify i fun x => x.dropRight 1 ++ toBase26 (count + 1) ++ "]" + let z := z.modify i fun x => (x.dropEnd 1).copy ++ toBase26 (count + 1) ++ "]" (z, x.2.insert tag (first, count + 1)) else (x.1, x.2.insert tag (i, 0)) diff --git a/BibtexQuery/Main.lean b/BibtexQuery/Main.lean index 5d44a47..993a4bb 100644 --- a/BibtexQuery/Main.lean +++ b/BibtexQuery/Main.lean @@ -98,28 +98,28 @@ def main : List String → IO Unit | ["d", fname] => do IO.println s!"Reading {fname} to find doubled keys" let s := ← IO.FS.readFile fname - let parsed := BibtexQuery.Parser.bibtexFile ⟨s, s.startValidPos⟩ + let parsed := BibtexQuery.Parser.bibtexFile ⟨s, s.startPos⟩ match parsed with | .success _pos res => let lst := listDoublons res IO.println lst - | .error pos err => IO.eprintln s!"Parse error at line {lineNumberOfValidPos pos}: {err}" + | .error pos err => IO.eprintln s!"Parse error at line {lineNumberOfPos pos}: {err}" | ["l", fname] => do let s := ← IO.FS.readFile fname - let parsed := BibtexQuery.Parser.bibtexFile ⟨s, s.startValidPos⟩ + let parsed := BibtexQuery.Parser.bibtexFile ⟨s, s.startPos⟩ match parsed with | .success _pos res => printEntries res - | .error pos err => IO.eprint s!"Parse error at line {lineNumberOfValidPos pos}: {err}" + | .error pos err => IO.eprint s!"Parse error at line {lineNumberOfPos pos}: {err}" | "q" :: (fname :: queries) => do let s := ← IO.FS.readFile fname - let parsed := BibtexQuery.Parser.bibtexFile ⟨s, s.startValidPos⟩ + let parsed := BibtexQuery.Parser.bibtexFile ⟨s, s.startPos⟩ match parsed with | .success _pos res => printMatchingEntries res $ queries.filterMap Query.ofString - | .error pos err => IO.eprint s!"Parse error at line {lineNumberOfValidPos pos}: {err}" + | .error pos err => IO.eprint s!"Parse error at line {lineNumberOfPos pos}: {err}" | "c" :: (fname :: queries) => do let s := ← IO.FS.readFile fname - let parsed := BibtexQuery.Parser.bibtexFile ⟨s, s.startValidPos⟩ + let parsed := BibtexQuery.Parser.bibtexFile ⟨s, s.startPos⟩ match parsed with | .success _pos res => printMatchingCitations res $ queries.filterMap Query.ofString - | .error pos err => IO.eprint s!"Parse error at line {lineNumberOfValidPos pos}: {err}" + | .error pos err => IO.eprint s!"Parse error at line {lineNumberOfPos pos}: {err}" | _ => do IO.eprintln "Invalid command-line arguments"; printHelp diff --git a/BibtexQuery/Name.lean b/BibtexQuery/Name.lean index 2846d63..08f5a85 100644 --- a/BibtexQuery/Name.lean +++ b/BibtexQuery/Name.lean @@ -87,7 +87,7 @@ def stripDiacritics (c : Char) : Char := | '\u00DF' => 's' | '\u1E9E' => 'S' | _ => let s := getCanonicalDecomposition c - String.Pos.Raw.get? s (s.find fun c => getCanonicalCombiningClass c == 0) |>.getD c + s.find (fun c => getCanonicalCombiningClass c == 0) |>.get? |>.getD c /-- Strip diacritics from a string. -/ def stripDiacriticsFromString (s : String) : String := @@ -143,7 +143,7 @@ def getLastNameAbbr (arr : Array String) : String × String := (s, s) else let s := String.ofList s.toList - (s.take 1, s.take 3) + (s.take 1 |>.copy, s.take 3 |>.copy) | _ => let s := arr.filterMap (getAlphabets · |> (·[0]?)) |>.toList |> String.ofList (s, s) diff --git a/BibtexQuery/ParsecExtra.lean b/BibtexQuery/ParsecExtra.lean index 72b922d..6af94df 100644 --- a/BibtexQuery/ParsecExtra.lean +++ b/BibtexQuery/ParsecExtra.lean @@ -23,12 +23,12 @@ open Lean Std.Internal.Parsec.String Std.Internal.Parsec namespace BibtexQuery.ParsecExtra def _root_.String.parse? [Inhabited α] (s : String) (p : Parser α) : Option α := - match p ⟨s, s.startValidPos⟩ with + match p ⟨s, s.startPos⟩ with | .success _ x => some x | .error _ _ => none def _root_.String.parseDebug [Inhabited α] (s : String) (p : Parser α) : Option (α × String.Pos.Raw) := - match p ⟨s, s.startValidPos⟩ with + match p ⟨s, s.startPos⟩ with | .success pos x => some ⟨x, Input.pos pos⟩ | .error _ _ => none @@ -110,17 +110,17 @@ def natNum : Parser Nat := attempt do return n def manyCharsUntilWithPrev (test : Option Char → Char → Bool) : Parser String := fun it => - let ⟨res, pos⟩ := go it.2 "" + let ⟨res, pos⟩ := go it.1 it.2 "" .success ⟨_, pos⟩ res where - go {s : String} (it : s.ValidPos) (acc : String) : String × s.ValidPos := + go {s : String} (str : String) (it : s.Pos) (acc : String) : String × s.Pos := if h : ¬it.IsAtEnd then let c := it.get h let prev : Option Char := if acc == "" then none else acc.back if test prev c then (acc, it) else - go (it.next h) (acc ++ c.toString) + go str (it.next h) (acc ++ c.toString) else (acc, it) termination_by it diff --git a/BibtexQuery/Parser.lean b/BibtexQuery/Parser.lean index 0105601..b1e800f 100644 --- a/BibtexQuery/Parser.lean +++ b/BibtexQuery/Parser.lean @@ -43,7 +43,7 @@ partial def bracedContentTail (acc : String) : Parser String := attempt do def bracedContent : Parser String := attempt do skipChar '{' let s ← bracedContentTail "" - return s.dropRight 1 + return s.dropEnd 1 |>.copy def quotedContent : Parser String := attempt do skipChar '"' diff --git a/BibtexQuery/Query.lean b/BibtexQuery/Query.lean index 58479db..b279eb3 100644 --- a/BibtexQuery/Query.lean +++ b/BibtexQuery/Query.lean @@ -24,11 +24,11 @@ inductive Query where deriving Repr, Inhabited def Query.ofString (s : String) : Option Query := - if s.startsWith "k." then some <| .key <| s.drop 2 - else if s.startsWith "a." then some <| .author <| s.drop 2 - else if s.startsWith "t." then some <| .title <| s.drop 2 - else if s.startsWith "w." then some <| .word <| s.drop 2 - else if s.startsWith "c." then some <| .class <| s.drop 2 + if s.startsWith "k." then some <| .key <| s.drop 2 |>.copy + else if s.startsWith "a." then some <| .author <| s.drop 2 |>.copy + else if s.startsWith "t." then some <| .title <| s.drop 2 |>.copy + else if s.startsWith "w." then some <| .word <| s.drop 2 |>.copy + else if s.startsWith "c." then some <| .class <| s.drop 2 |>.copy else none def Entry.matchQuery (e : Entry) (q : Query) : Bool := diff --git a/BibtexQuery/String.lean b/BibtexQuery/String.lean index df231dd..c8e5b85 100644 --- a/BibtexQuery/String.lean +++ b/BibtexQuery/String.lean @@ -10,8 +10,13 @@ Author: Frédéric Dupuis This file contains various string processing functions. -/ -/-- Get the line number of the current position for ValidPos. -/ -def lineNumberOfValidPos (it : Sigma String.ValidPos) : Nat := +/-- Get the line number of the current position of the iterator. -/ +def String.Legacy.Iterator.lineNumber (it : String.Legacy.Iterator) : Nat := + let s : Substring.Raw := ⟨it.toString, 0, it.pos⟩ + s.foldl (fun n c => if c = '\n' then n+1 else n) 1 + +/-- Get the line number of the current position for Pos. -/ +def lineNumberOfPos (it : Sigma String.Pos) : Nat := let s : Substring.Raw := ⟨it.1, 0, it.2.offset⟩ s.foldl (fun n c => if c = '\n' then n+1 else n) 1 @@ -133,37 +138,31 @@ def String.flattenWords (s : String) : String := s.foldl --#eval "Frédéric Dupuis, Marco {T}omamichel".flattenWords def String.splitIntoNames (s : String) : List String := - (s.splitOn (sep := " and ")).map trim + (s.splitOn (sep := " and ")).map (String.Slice.copy ∘ trimAscii) -def String.toLastName (s : String) : String := - let s' := (s.splitToList (fun c => c = ',')).map trim - match s' with - | [s₁] => s₁ - | (s₁ :: _) => s₁ - | _ => "" +def String.toLastName (s : String) : String.Slice := + s.split ',' |>.find? (fun _ => true) |>.getD "".toSlice def String.toLastNames (s : String) : String := - String.intercalate " " $ s.splitIntoNames.map String.toLastName + " ".toSlice.intercalate $ s.splitIntoNames.map String.toLastName /-- Standardize to "Firstname Lastname" -/ def String.toFirstnameLastname (s : String) : String := - let s' := (s.splitToList (fun c => c = ',')).map trim - match s' with - | [s₁] => s₁ - | [s₁, s₂] => s₂ ++ " " ++ s₁ + match s.split ',' |>.map String.Slice.trimAscii |>.toList with + | [s₁] => s₁.copy + | [s₁, s₂] => s₂.copy ++ " " ++ s₁ | _ => "" def String.toFullNames (s : String) : String := String.join $ (s.splitIntoNames.map String.toFirstnameLastname).map String.flattenWords --- FIXME: use `String.Slice` instead of `Substring`. partial def Substring.Raw.containsSubstrStartingAt (s : Substring.Raw) (q : String) : Bool := if (Substring.Raw.toString s).length = 0 then q.length = 0 else if q.isPrefixOf (Substring.Raw.toString s) then true else (Substring.Raw.drop s 1).containsSubstrStartingAt q def String.containsSubstr (s : String) (q : String) : Bool := - s.toSubstring.containsSubstrStartingAt q + s.toRawSubstring.containsSubstrStartingAt q def String.pad (s : String) (c : Char) (n : Nat) : String := - (s ++ String.ofList (List.replicate n c)).take n + (s ++ String.ofList (List.replicate n c)).take n |>.copy diff --git a/BibtexQuery/TexDiacritics.lean b/BibtexQuery/TexDiacritics.lean index b4a1a97..79c745c 100644 --- a/BibtexQuery/TexDiacritics.lean +++ b/BibtexQuery/TexDiacritics.lean @@ -50,7 +50,7 @@ partial def addDiacritics (x : TexContent) (ch : String) : if s.isEmpty then throw "expected a non-empty normal string, but got ''" else if GeneralCategory.isLetter s.front then - return .normal <| s.take 1 ++ ch ++ s.drop 1 + return .normal <| (s.take 1).copy ++ ch ++ s.drop 1 else throw s!"diacritics character can only be added after a letter, but got '{s.front}'" | .char c => throw s!"expected a non-empty normal string, but got '{c}'" @@ -142,7 +142,7 @@ partial def toHtml (x : TexContent) : Array Content := | _ => .Character c.toString #[ret] | .command cmd => - let ret : Content := match cmd.trim with + let ret : Content := match cmd.trimAscii with | "\\\\" => .Element ⟨ "br", empty, #[] ⟩ | _ => .Element ⟨ "span", empty.insert "style" "color:red;", #[.Character cmd] ⟩ #[ret] @@ -154,7 +154,7 @@ partial def toHtmlArray (arr : Array TexContent) (i : Nat := 0) if h : i < arr.size then if h' : i + 1 < arr.size then if let .command cmd := arr[i] then - match cmd.trim with + match cmd.trimAscii with | "\\url" => let next := arr[i + 1] let x : Content := .Element ⟨ "a", empty.insert "href" @@ -233,8 +233,8 @@ def texCommand : Parser String := pchar '\\' *> attempt do /-- Similar to `texCommand` but it excludes some commands. -/ def texCommand' (exclude : Array String) : Parser String := attempt do let s ← texCommand - match exclude.find? (· == s.trim) with - | .some _ => fail s!"'{s.trim}' is not allowed" + match exclude.find? (·.toSlice == s.trimAscii) with + | .some _ => fail s!"'{s.trimAscii}' is not allowed" | .none => return s /-- Match a sequence starting with `{` and ending with `}`. -/ @@ -272,14 +272,14 @@ partial def mathContentAux : Parser String := do def mathContent : Parser (Option TexContent) := fun it => let aux (beginning ending : String) : Parser String := pstring beginning *> mathContentAux <* pstring ending - let substr := it.2.extract (it.2.nextn 2) + let substr := it.1.extract it.2 (it.2.nextn 2) if substr = "\\[" then ((.some <| .math "$$" ·) <$> aux "\\[" "\\]") it else if substr = "\\(" then ((.some <| .math "$" ·) <$> aux "\\(" "\\)") it else if substr = "$$" then ((.some <| .math "$$" ·) <$> aux "$$" "$$") it - else if Input.curr it = '$' then + else if Input.hasNext it && Input.curr it = '$' then ((.some <| .math "$" ·) <$> aux "$" "$") it else .success it .none @@ -304,16 +304,16 @@ Sometimes it needs to read the contents after the command, in this case the `p` def texDiacriticsCommand (p : Parser (Option TexContent)) : Parser (Option TexContent) := do let cmd ← texCommand -- some special commands - if cmd.trim = "\\url" then + if cmd.trimAscii == "\\url" then let s ← pchar '{' *> rawContentAux <* pchar '}' return .some <| .braced #[.command cmd, .braced <| #[.normal s]] -- some special characters need to put into `` - let c : Char := match cmd.trim with + let c : Char := match cmd.trimAscii.copy with | "\\$" => '$' | "\\textbackslash" => '\\' | _ => ' ' if c ≠ ' ' then return .some <| .char c -- some other characters - let s : String := match cmd.trim with + let s : String := match cmd.trimAscii with | "\\oe" => "œ" | "\\OE" => "Œ" | "\\ae" => "æ" | "\\AE" => "Æ" | "\\aa" => "å" | "\\AA" => "Å" @@ -329,7 +329,7 @@ def texDiacriticsCommand (p : Parser (Option TexContent)) : Parser (Option TexCo | _ => "" if not s.isEmpty then return .some <| .normal s -- diacritics characters - let s : String := match cmd.trim with + let s : String := match cmd.trimAscii with | "\\`" => "\u0300" | "\\'" => "\u0301" | "\\^" => "\u0302" | "\\\"" => "\u0308" | "\\~" => "\u0303" | "\\=" => "\u0304" diff --git a/lake-manifest.json b/lake-manifest.json index f873d3a..3546b65 100644 --- a/lake-manifest.json +++ b/lake-manifest.json @@ -5,7 +5,7 @@ "type": "git", "subDir": null, "scope": "", - "rev": "45c6d87607c82ef5c832ab73bbc73631833aa9c9", + "rev": "cff8377dbe50aae42cbd04213d5b3dacf742c3ba", "name": "UnicodeBasic", "manifestFile": "lake-manifest.json", "inputRev": "main", diff --git a/lean-toolchain b/lean-toolchain index 7035713..bd19bde 100644 --- a/lean-toolchain +++ b/lean-toolchain @@ -1 +1 @@ -leanprover/lean4:v4.26.0-rc2 \ No newline at end of file +leanprover/lean4:v4.27.0-rc1