Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 7 additions & 7 deletions BibtexQuery/Format.lean
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@ def getDate (tags : Std.HashMap String (Array TexContent)) : Nat × Array Conten
Char.isDigit |> String.ofList |>.toNat? then
let month : Nat :=
if let .some monthTex := tags["month"]? then
let monthStr := (TexContent.toPlaintextArray monthTex).trim.toLower
let monthStr := (TexContent.toPlaintextArray monthTex).trimAscii.copy.toLower
match monthStr with
| "jan" => 1 | "feb" => 2 | "mar" => 3
| "apr" => 4 | "may" => 5 | "jun" => 6
Expand Down Expand Up @@ -100,7 +100,7 @@ def getTag (authors : Array BibtexName) (date : Nat) : String :=
authors.map (·.oneLetterAbbr) |>.toList |> String.join
else
authors.map (·.threeLetterAbbr) |>.toList |> String.join
let dateString := if date > 0 then (toString (date / 100 + 100)).takeRight 2 else ""
let dateString := if date > 0 then (toString (date / 100 + 100)).takeEnd 2 else ""
"[" ++ authorString ++ dateString ++ "]"

partial def removeDuplicatedSpacesAux (s : String) : String :=
Expand All @@ -111,7 +111,7 @@ partial def removeDuplicatedSpacesAux (s : String) : String :=
s

def removeDuplicatedSpaces (s : String) : String :=
s.replace "\r" " " |>.replace "\n" " " |>.replace "\t" " " |>.trim |> removeDuplicatedSpacesAux
s.replace "\r" " " |>.replace "\n" " " |>.replace "\t" " " |>.trimAscii |>.copy |> removeDuplicatedSpacesAux

/-- Get a `ProcessedEntry` from an `Entry`, computes all its field except for `html`.
If the input is not `BibtexQuery.Entry.normalType`, returns `Option.none`. -/
Expand All @@ -123,7 +123,7 @@ def ProcessedEntry.ofEntry (e : Entry) : Except String (Option ProcessedEntry) :
if x.name = "pubmed" ∨ x.name = "doi" ∨ x.name = "eprint" ∨ x.name.endsWith "url" then
.ok (x.name, #[.normal s])
else
match texContents ⟨s, s.startValidPos⟩ with
match texContents ⟨s, s.startPos⟩ with
| .success _ arr => .ok (x.name, arr)
| .error it err => .error s!"failed to run texContents on '{it.1}' at pos {Input.pos it}: {err}"
let tags := Std.HashMap.ofList lst
Expand Down Expand Up @@ -243,7 +243,7 @@ def mkWebRef (urlPrefix namePrefix url : String) : Array Content :=
url.drop namePrefix.length
else
url
if s.toLower.startsWith "http" then
if s.copy.toLower.startsWith "http" then
-- the url does not starts with `urlPrefix` or `namePrefix`,
-- but still starts with "http"
mkUrl url
Expand Down Expand Up @@ -690,10 +690,10 @@ partial def deduplicateTagAux
if let .some (first, count) := x.2[tag]? then
let z : Array String :=
if count = 0 then
x.1.modify first fun x => x.dropRight 1 ++ "a]"
x.1.modify first fun x => (x.dropEnd 1).copy ++ "a]"
else
x.1
let z := z.modify i fun x => x.dropRight 1 ++ toBase26 (count + 1) ++ "]"
let z := z.modify i fun x => (x.dropEnd 1).copy ++ toBase26 (count + 1) ++ "]"
(z, x.2.insert tag (first, count + 1))
else
(x.1, x.2.insert tag (i, 0))
Expand Down
16 changes: 8 additions & 8 deletions BibtexQuery/Main.lean
Original file line number Diff line number Diff line change
Expand Up @@ -98,28 +98,28 @@ def main : List String → IO Unit
| ["d", fname] => do
IO.println s!"Reading {fname} to find doubled keys"
let s := ← IO.FS.readFile fname
let parsed := BibtexQuery.Parser.bibtexFile ⟨s, s.startValidPos
let parsed := BibtexQuery.Parser.bibtexFile ⟨s, s.startPos
match parsed with
| .success _pos res =>
let lst := listDoublons res
IO.println lst
| .error pos err => IO.eprintln s!"Parse error at line {lineNumberOfValidPos pos}: {err}"
| .error pos err => IO.eprintln s!"Parse error at line {lineNumberOfPos pos}: {err}"
| ["l", fname] => do
let s := ← IO.FS.readFile fname
let parsed := BibtexQuery.Parser.bibtexFile ⟨s, s.startValidPos
let parsed := BibtexQuery.Parser.bibtexFile ⟨s, s.startPos
match parsed with
| .success _pos res => printEntries res
| .error pos err => IO.eprint s!"Parse error at line {lineNumberOfValidPos pos}: {err}"
| .error pos err => IO.eprint s!"Parse error at line {lineNumberOfPos pos}: {err}"
| "q" :: (fname :: queries) => do
let s := ← IO.FS.readFile fname
let parsed := BibtexQuery.Parser.bibtexFile ⟨s, s.startValidPos
let parsed := BibtexQuery.Parser.bibtexFile ⟨s, s.startPos
match parsed with
| .success _pos res => printMatchingEntries res $ queries.filterMap Query.ofString
| .error pos err => IO.eprint s!"Parse error at line {lineNumberOfValidPos pos}: {err}"
| .error pos err => IO.eprint s!"Parse error at line {lineNumberOfPos pos}: {err}"
| "c" :: (fname :: queries) => do
let s := ← IO.FS.readFile fname
let parsed := BibtexQuery.Parser.bibtexFile ⟨s, s.startValidPos
let parsed := BibtexQuery.Parser.bibtexFile ⟨s, s.startPos
match parsed with
| .success _pos res => printMatchingCitations res $ queries.filterMap Query.ofString
| .error pos err => IO.eprint s!"Parse error at line {lineNumberOfValidPos pos}: {err}"
| .error pos err => IO.eprint s!"Parse error at line {lineNumberOfPos pos}: {err}"
| _ => do IO.eprintln "Invalid command-line arguments"; printHelp
4 changes: 2 additions & 2 deletions BibtexQuery/Name.lean
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,7 @@ def stripDiacritics (c : Char) : Char :=
| '\u00DF' => 's' | '\u1E9E' => 'S'
| _ =>
let s := getCanonicalDecomposition c
String.Pos.Raw.get? s (s.find fun c => getCanonicalCombiningClass c == 0) |>.getD c
s.find (fun c => getCanonicalCombiningClass c == 0) |>.get? |>.getD c

/-- Strip diacritics from a string. -/
def stripDiacriticsFromString (s : String) : String :=
Expand Down Expand Up @@ -143,7 +143,7 @@ def getLastNameAbbr (arr : Array String) : String × String :=
(s, s)
else
let s := String.ofList s.toList
(s.take 1, s.take 3)
(s.take 1 |>.copy, s.take 3 |>.copy)
| _ =>
let s := arr.filterMap (getAlphabets · |> (·[0]?)) |>.toList |> String.ofList
(s, s)
Expand Down
10 changes: 5 additions & 5 deletions BibtexQuery/ParsecExtra.lean
Original file line number Diff line number Diff line change
Expand Up @@ -23,12 +23,12 @@ open Lean Std.Internal.Parsec.String Std.Internal.Parsec
namespace BibtexQuery.ParsecExtra

def _root_.String.parse? [Inhabited α] (s : String) (p : Parser α) : Option α :=
match p ⟨s, s.startValidPos⟩ with
match p ⟨s, s.startPos⟩ with
| .success _ x => some x
| .error _ _ => none

def _root_.String.parseDebug [Inhabited α] (s : String) (p : Parser α) : Option (α × String.Pos.Raw) :=
match p ⟨s, s.startValidPos⟩ with
match p ⟨s, s.startPos⟩ with
| .success pos x => some ⟨x, Input.pos pos⟩
| .error _ _ => none

Expand Down Expand Up @@ -110,17 +110,17 @@ def natNum : Parser Nat := attempt do
return n

def manyCharsUntilWithPrev (test : Option Char → Char → Bool) : Parser String := fun it =>
let ⟨res, pos⟩ := go it.2 ""
let ⟨res, pos⟩ := go it.1 it.2 ""
.success ⟨_, pos⟩ res
where
go {s : String} (it : s.ValidPos) (acc : String) : String × s.ValidPos :=
go {s : String} (str : String) (it : s.Pos) (acc : String) : String × s.Pos :=
if h : ¬it.IsAtEnd then
let c := it.get h
let prev : Option Char := if acc == "" then none else acc.back
if test prev c then
(acc, it)
else
go (it.next h) (acc ++ c.toString)
go str (it.next h) (acc ++ c.toString)
else
(acc, it)
termination_by it
Expand Down
2 changes: 1 addition & 1 deletion BibtexQuery/Parser.lean
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ partial def bracedContentTail (acc : String) : Parser String := attempt do
def bracedContent : Parser String := attempt do
skipChar '{'
let s ← bracedContentTail ""
return s.dropRight 1
return s.dropEnd 1 |>.copy

def quotedContent : Parser String := attempt do
skipChar '"'
Expand Down
10 changes: 5 additions & 5 deletions BibtexQuery/Query.lean
Original file line number Diff line number Diff line change
Expand Up @@ -24,11 +24,11 @@ inductive Query where
deriving Repr, Inhabited

def Query.ofString (s : String) : Option Query :=
if s.startsWith "k." then some <| .key <| s.drop 2
else if s.startsWith "a." then some <| .author <| s.drop 2
else if s.startsWith "t." then some <| .title <| s.drop 2
else if s.startsWith "w." then some <| .word <| s.drop 2
else if s.startsWith "c." then some <| .class <| s.drop 2
if s.startsWith "k." then some <| .key <| s.drop 2 |>.copy
else if s.startsWith "a." then some <| .author <| s.drop 2 |>.copy
else if s.startsWith "t." then some <| .title <| s.drop 2 |>.copy
else if s.startsWith "w." then some <| .word <| s.drop 2 |>.copy
else if s.startsWith "c." then some <| .class <| s.drop 2 |>.copy
else none

def Entry.matchQuery (e : Entry) (q : Query) : Bool :=
Expand Down
33 changes: 16 additions & 17 deletions BibtexQuery/String.lean
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,13 @@ Author: Frédéric Dupuis
This file contains various string processing functions.
-/

/-- Get the line number of the current position for ValidPos. -/
def lineNumberOfValidPos (it : Sigma String.ValidPos) : Nat :=
/-- Get the line number of the current position of the iterator. -/
def String.Legacy.Iterator.lineNumber (it : String.Legacy.Iterator) : Nat :=
let s : Substring.Raw := ⟨it.toString, 0, it.pos⟩
s.foldl (fun n c => if c = '\n' then n+1 else n) 1

/-- Get the line number of the current position for Pos. -/
def lineNumberOfPos (it : Sigma String.Pos) : Nat :=
let s : Substring.Raw := ⟨it.1, 0, it.2.offset⟩
s.foldl (fun n c => if c = '\n' then n+1 else n) 1

Expand Down Expand Up @@ -133,37 +138,31 @@ def String.flattenWords (s : String) : String := s.foldl
--#eval "Frédéric Dupuis, Marco {T}omamichel".flattenWords

def String.splitIntoNames (s : String) : List String :=
(s.splitOn (sep := " and ")).map trim
(s.splitOn (sep := " and ")).map (String.Slice.copy ∘ trimAscii)

def String.toLastName (s : String) : String :=
let s' := (s.splitToList (fun c => c = ',')).map trim
match s' with
| [s₁] => s₁
| (s₁ :: _) => s₁
| _ => ""
def String.toLastName (s : String) : String.Slice :=
s.split ',' |>.find? (fun _ => true) |>.getD "".toSlice

def String.toLastNames (s : String) : String :=
String.intercalate " " $ s.splitIntoNames.map String.toLastName
" ".toSlice.intercalate $ s.splitIntoNames.map String.toLastName

/-- Standardize to "Firstname Lastname" -/
def String.toFirstnameLastname (s : String) : String :=
let s' := (s.splitToList (fun c => c = ',')).map trim
match s' with
| [s₁] => s₁
| [s₁, s₂] => s₂ ++ " " ++ s₁
match s.split ',' |>.map String.Slice.trimAscii |>.toList with
| [s₁] => s₁.copy
| [s₁, s₂] => s₂.copy ++ " " ++ s₁
| _ => ""

def String.toFullNames (s : String) : String :=
String.join $ (s.splitIntoNames.map String.toFirstnameLastname).map String.flattenWords

-- FIXME: use `String.Slice` instead of `Substring`.
partial def Substring.Raw.containsSubstrStartingAt (s : Substring.Raw) (q : String) : Bool :=
if (Substring.Raw.toString s).length = 0 then q.length = 0
else if q.isPrefixOf (Substring.Raw.toString s) then true
else (Substring.Raw.drop s 1).containsSubstrStartingAt q

def String.containsSubstr (s : String) (q : String) : Bool :=
s.toSubstring.containsSubstrStartingAt q
s.toRawSubstring.containsSubstrStartingAt q

def String.pad (s : String) (c : Char) (n : Nat) : String :=
(s ++ String.ofList (List.replicate n c)).take n
(s ++ String.ofList (List.replicate n c)).take n |>.copy
22 changes: 11 additions & 11 deletions BibtexQuery/TexDiacritics.lean
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ partial def addDiacritics (x : TexContent) (ch : String) :
if s.isEmpty then
throw "expected a non-empty normal string, but got ''"
else if GeneralCategory.isLetter s.front then
return .normal <| s.take 1 ++ ch ++ s.drop 1
return .normal <| (s.take 1).copy ++ ch ++ s.drop 1
else
throw s!"diacritics character can only be added after a letter, but got '{s.front}'"
| .char c => throw s!"expected a non-empty normal string, but got '{c}'"
Expand Down Expand Up @@ -142,7 +142,7 @@ partial def toHtml (x : TexContent) : Array Content :=
| _ => .Character c.toString
#[ret]
| .command cmd =>
let ret : Content := match cmd.trim with
let ret : Content := match cmd.trimAscii with
| "\\\\" => .Element ⟨ "br", empty, #[] ⟩
| _ => .Element ⟨ "span", empty.insert "style" "color:red;", #[.Character cmd] ⟩
#[ret]
Expand All @@ -154,7 +154,7 @@ partial def toHtmlArray (arr : Array TexContent) (i : Nat := 0)
if h : i < arr.size then
if h' : i + 1 < arr.size then
if let .command cmd := arr[i] then
match cmd.trim with
match cmd.trimAscii with
| "\\url" =>
let next := arr[i + 1]
let x : Content := .Element ⟨ "a", empty.insert "href"
Expand Down Expand Up @@ -233,8 +233,8 @@ def texCommand : Parser String := pchar '\\' *> attempt do
/-- Similar to `texCommand` but it excludes some commands. -/
def texCommand' (exclude : Array String) : Parser String := attempt do
let s ← texCommand
match exclude.find? (· == s.trim) with
| .some _ => fail s!"'{s.trim}' is not allowed"
match exclude.find? (·.toSlice == s.trimAscii) with
| .some _ => fail s!"'{s.trimAscii}' is not allowed"
| .none => return s

/-- Match a sequence starting with `{` and ending with `}`. -/
Expand Down Expand Up @@ -272,14 +272,14 @@ partial def mathContentAux : Parser String := do
def mathContent : Parser (Option TexContent) := fun it =>
let aux (beginning ending : String) : Parser String :=
pstring beginning *> mathContentAux <* pstring ending
let substr := it.2.extract (it.2.nextn 2)
let substr := it.1.extract it.2 (it.2.nextn 2)
if substr = "\\[" then
((.some <| .math "$$" ·) <$> aux "\\[" "\\]") it
else if substr = "\\(" then
((.some <| .math "$" ·) <$> aux "\\(" "\\)") it
else if substr = "$$" then
((.some <| .math "$$" ·) <$> aux "$$" "$$") it
else if Input.curr it = '$' then
else if Input.hasNext it && Input.curr it = '$' then
((.some <| .math "$" ·) <$> aux "$" "$") it
else
.success it .none
Expand All @@ -304,16 +304,16 @@ Sometimes it needs to read the contents after the command, in this case the `p`
def texDiacriticsCommand (p : Parser (Option TexContent)) : Parser (Option TexContent) := do
let cmd ← texCommand
-- some special commands
if cmd.trim = "\\url" then
if cmd.trimAscii == "\\url" then
let s ← pchar '{' *> rawContentAux <* pchar '}'
return .some <| .braced #[.command cmd, .braced <| #[.normal s]]
-- some special characters need to put into `<span>`
let c : Char := match cmd.trim with
let c : Char := match cmd.trimAscii.copy with
| "\\$" => '$' | "\\textbackslash" => '\\'
| _ => ' '
if c ≠ ' ' then return .some <| .char c
-- some other characters
let s : String := match cmd.trim with
let s : String := match cmd.trimAscii with
| "\\oe" => "œ" | "\\OE" => "Œ"
| "\\ae" => "æ" | "\\AE" => "Æ"
| "\\aa" => "å" | "\\AA" => "Å"
Expand All @@ -329,7 +329,7 @@ def texDiacriticsCommand (p : Parser (Option TexContent)) : Parser (Option TexCo
| _ => ""
if not s.isEmpty then return .some <| .normal s
-- diacritics characters
let s : String := match cmd.trim with
let s : String := match cmd.trimAscii with
| "\\`" => "\u0300" | "\\'" => "\u0301"
| "\\^" => "\u0302" | "\\\"" => "\u0308"
| "\\~" => "\u0303" | "\\=" => "\u0304"
Expand Down
2 changes: 1 addition & 1 deletion lake-manifest.json
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
"type": "git",
"subDir": null,
"scope": "",
"rev": "45c6d87607c82ef5c832ab73bbc73631833aa9c9",
"rev": "cff8377dbe50aae42cbd04213d5b3dacf742c3ba",
"name": "UnicodeBasic",
"manifestFile": "lake-manifest.json",
"inputRev": "main",
Expand Down
2 changes: 1 addition & 1 deletion lean-toolchain
Original file line number Diff line number Diff line change
@@ -1 +1 @@
leanprover/lean4:v4.26.0-rc2
leanprover/lean4:v4.27.0-rc1