From b911328396b203a4e3111b432c46c446bff0cdd2 Mon Sep 17 00:00:00 2001 From: Sam Raker Date: Wed, 9 Nov 2016 00:28:31 -0500 Subject: [PATCH 01/13] haskell version --- README.md | 40 +++++++++++++++++++++ Setup.hs | 2 ++ lib/Paths_wordfilter.hs | 6 ++++ lib/Wordfilter.hs | 78 +++++++++++++++++++++++++++++++++++++++++ stack.yaml | 66 ++++++++++++++++++++++++++++++++++ test/Wordlist_Test.hs | 25 +++++++++++++ wordfilter.cabal | 29 +++++++++++++++ 7 files changed, 246 insertions(+) create mode 100644 Setup.hs create mode 100644 lib/Paths_wordfilter.hs create mode 100644 lib/Wordfilter.hs create mode 100644 stack.yaml create mode 100644 test/Wordlist_Test.hs create mode 100644 wordfilter.cabal diff --git a/README.md b/README.md index 87a2b9b..b15b2a2 100644 --- a/README.md +++ b/README.md @@ -37,6 +37,46 @@ wordfilter.addWords(['zebra','elephant']) wordfilter.blacklisted('this string has zebra in it') # True ``` +Or with Haskell: +Clone this repo and then `cabal install` (or `stack build`) + +```haskell +module MightBeNaughty where + +import System.IO +import Wordlist + +-- functions without trailing ' use Darius' wordlist +checkInput :: IO String -> IO () +checkInput = do + input <- getLine + ok <- blacklisted input + printLn $ if ok then "cool :)" else "not cool >:(" + +lessThanOriginalList :: String -> IO [String] +lessThanOriginalList toRemove1 toRemove2 = removeWord toRemove1 >>= + removeWord toRemove2 + +-- functions with a trailing ' need an IO [String] wordlist +getSomeOtherList :: IO [String] +getSomeOtherList = ... + +otherListAndMore :: [String] -> IO Bool +otherListAndMore otherWords toCheck = getSomeOtherList >>= + addWords' otherWords >>= + blacklisted' toCheck + +-- clearList is just an empty IO [String] for compatability/convenience(?) + +-- blacklist is original +checkInputParticular :: String -> String -> [String] -> IO Bool +checkInputParticular toTest toRemove toAdd = blacklist >>= + removeWord' toRemove >>= + addWords' toAdd >>= + blacklisted' toTest +``` + + ## Documentation This is a word filter adapted from code that I use in a lot of my twitter bots. It is based on [a list of words that I've hand-picked](https://github.com/dariusk/wordfilter/blob/master/lib/badwords.json) for exclusion from my bots: essentially, it's a list of things that I would not say myself. Generally speaking, they are "words of oppression", aka racist/sexist/ableist things that I would not say. diff --git a/Setup.hs b/Setup.hs new file mode 100644 index 0000000..9a994af --- /dev/null +++ b/Setup.hs @@ -0,0 +1,2 @@ +import Distribution.Simple +main = defaultMain diff --git a/lib/Paths_wordfilter.hs b/lib/Paths_wordfilter.hs new file mode 100644 index 0000000..eb72b1c --- /dev/null +++ b/lib/Paths_wordfilter.hs @@ -0,0 +1,6 @@ +module Paths_wordfilter where + +import System.FilePath (()) + +getDataFileName :: FilePath -> IO FilePath +getDataFileName = (return . ("lib" )) diff --git a/lib/Wordfilter.hs b/lib/Wordfilter.hs new file mode 100644 index 0000000..4cc40b5 --- /dev/null +++ b/lib/Wordfilter.hs @@ -0,0 +1,78 @@ +-- | +-- Module: Wordfilter +-- License: MIT +-- Portability: portable +-- +-- Haskell port of Darius Kazemi's Wordfilter + +module Wordfilter + ( + -- Immutability changes some of the functionality: + -- addWords and removeWord return changed copies of + -- the list instead of changing the list itself. To + -- address this, we export "raw" and "convenience" + -- versions of those functions. The "raw" versions + -- (marked with a ') take an IO [String] wordlist, + -- while the "convenience" versions "bake in" the + -- original blacklist. Similarly, clearList is + -- just an empty list, which can be passed to + -- the "raw" functions to build up a fresh list. + -- Examples: + -- + -- blacklisted "foo" // IO False + -- clearList >>= addWords ["foo", "bar"] >>= blacklisted' "foo" // IO True + -- + -- real blacklist + blacklist + -- empty "blacklist" + , clearList + -- "convenience" functions + , blacklisted + , addWords + , removeWord + -- "raw" functions + , blacklisted' + , addWords' + , removeWord' + ) where + + +import Data.Aeson +import Data.Bits ((.|.)) +import qualified Data.ByteString.Lazy as B +import Data.Maybe (maybeToList) +import Data.List (intersperse) +import Text.Regex.PCRE + +import Paths_wordfilter (getDataFileName) + +blacklist :: IO [String] +blacklist = getDataFileName "badwords.json" >>= + B.readFile >>= + (return . concat . maybeToList . decode) + +clearList :: IO [String] +clearList = return [] + +blacklisted' :: String -> [String] -> IO Bool +blacklisted' _ [] = return False +blacklisted' s bl = return $ matchTest re s where + re = makeRegexOpts (defaultCompOpt .|. compCaseless) + defaultExecOpt + (concat $ intersperse "|" bl) + +blacklisted :: String -> IO Bool +blacklisted s = blacklist >>= (blacklisted' s) + +addWords' :: [String] -> [String] -> IO [String] +addWords' ws bl = return $ bl ++ ws + +addWords :: [String] -> IO [String] +addWords ws = blacklist >>= addWords' ws + +removeWord' :: String -> [String] -> IO [String] +removeWord' w bl = return $ filter (not . (== w)) bl + +removeWord :: String -> IO [String] +removeWord w = blacklist >>= (removeWord' w) + diff --git a/stack.yaml b/stack.yaml new file mode 100644 index 0000000..eff642c --- /dev/null +++ b/stack.yaml @@ -0,0 +1,66 @@ +# This file was automatically generated by 'stack init' +# +# Some commonly used options have been documented as comments in this file. +# For advanced use and comprehensive documentation of the format, please see: +# http://docs.haskellstack.org/en/stable/yaml_configuration/ + +# Resolver to choose a 'specific' stackage snapshot or a compiler version. +# A snapshot resolver dictates the compiler version and the set of packages +# to be used for project dependencies. For example: +# +# resolver: lts-3.5 +# resolver: nightly-2015-09-21 +# resolver: ghc-7.10.2 +# resolver: ghcjs-0.1.0_ghc-7.10.2 +# resolver: +# name: custom-snapshot +# location: "./custom-snapshot.yaml" +resolver: lts-7.8 + +# User packages to be built. +# Various formats can be used as shown in the example below. +# +# packages: +# - some-directory +# - https://example.com/foo/bar/baz-0.0.2.tar.gz +# - location: +# git: https://github.com/commercialhaskell/stack.git +# commit: e7b331f14bcffb8367cd58fbfc8b40ec7642100a +# - location: https://github.com/commercialhaskell/stack/commit/e7b331f14bcffb8367cd58fbfc8b40ec7642100a +# extra-dep: true +# subdirs: +# - auto-update +# - wai +# +# A package marked 'extra-dep: true' will only be built if demanded by a +# non-dependency (i.e. a user package), and its test suites and benchmarks +# will not be run. This is useful for tweaking upstream packages. +packages: +- '.' +# Dependency packages to be pulled from upstream that are not in the resolver +# (e.g., acme-missiles-0.3) +extra-deps: [] + +# Override default flag values for local packages and extra-deps +flags: {} + +# Extra package databases containing global packages +extra-package-dbs: [] + +# Control whether we use the GHC we find on the path +# system-ghc: true +# +# Require a specific version of stack, using version ranges +# require-stack-version: -any # Default +# require-stack-version: ">=1.2" +# +# Override the architecture used by stack, especially useful on Windows +# arch: i386 +# arch: x86_64 +# +# Extra directories used by stack for building +# extra-include-dirs: [/path/to/dir] +# extra-lib-dirs: [/path/to/dir] +# +# Allow a newer minor version of GHC than the snapshot specifies +# compiler-check: newer-minor \ No newline at end of file diff --git a/test/Wordlist_Test.hs b/test/Wordlist_Test.hs new file mode 100644 index 0000000..3b679ad --- /dev/null +++ b/test/Wordlist_Test.hs @@ -0,0 +1,25 @@ +module Wordlist_Test where + +import Lib.Wordlist (blacklisted', addWords', removeWord') +import Test.HUnit + +blacklistedTests = TestList [testEmptyFalse, testContained, testNotContained] +testEmptyFalse = TestCase (do r <- blacklisted' "foo" [] + assertEqual "always false on empty list" False r) +testContained = TestCase (do r <- blacklisted' "i am foo" ["bar", "foo"] + assertEqual "should match" True r) +testNotContained = TestCase (do r <- blacklisted' "quux i am" ["bar", "foo"] + assertEqual "should not match" False r) + + +addWordsTest = TestList [testAdd] +testAdd = TestCase (do r <- addWords' ["foo"], ["bar", "baz"] + assertEqual "add words to list" ["bar", "baz", "foo"] r) + +removeWordTests = TestList [testPresent, testAbsent] +testPresent = TestCase (do r <- removeWord' "foo" ["foo", "bar"] + assertEqual "remove word from list" ["bar"] r) +testAbsent = TestCase (do r <- removeWord' "foo" ["bar", "baz"] + assertEqual "don't remove absent word" ["bar", "baz"] r) + +main = runTestTT $ TestList [blackListedTests, addWordsTest, removeWordsTest] diff --git a/wordfilter.cabal b/wordfilter.cabal new file mode 100644 index 0000000..5775495 --- /dev/null +++ b/wordfilter.cabal @@ -0,0 +1,29 @@ +-- Initial wordfilter.cabal generated by cabal init. For further +-- documentation, see http://haskell.org/cabal/users-guide/ + +name: wordfilter +version: 0.1.0.0 +synopsis: Word filter +-- description: +homepage: https://github.com/dariusk/wordfilter +license: MIT +license-file: LICENSE-MIT +author: Sam Raker +maintainer: sam.raker@gmail.com +-- copyright: +category: Language +build-type: Simple +extra-source-files: ChangeLog.md, README.md +cabal-version: >=1.10 +Data-Files: lib/badwords.json + +library + exposed-modules: Wordfilter + other-modules: Paths_wordfilter + -- other-extensions: + build-depends: base >=4.9 && <4.10, + aeson, bytestring, + filepath, HUnit, + regex-pcre-builtin + hs-source-dirs: lib + default-language: Haskell2010 From dccffa23ba9658f1340eb2972716f11bc4fdf330 Mon Sep 17 00:00:00 2001 From: Sam Raker Date: Wed, 9 Nov 2016 00:32:57 -0500 Subject: [PATCH 02/13] fix example --- README.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index b15b2a2..7b97ae5 100644 --- a/README.md +++ b/README.md @@ -55,7 +55,8 @@ checkInput = do lessThanOriginalList :: String -> IO [String] lessThanOriginalList toRemove1 toRemove2 = removeWord toRemove1 >>= - removeWord toRemove2 + removeWord' toRemove2 +-- ~~~important ^ ~~~ -- functions with a trailing ' need an IO [String] wordlist getSomeOtherList :: IO [String] From cec3e836e1e7760189ef6c1542e8052ec02ad6a6 Mon Sep 17 00:00:00 2001 From: Sam Raker Date: Sat, 12 Nov 2016 12:00:52 -0500 Subject: [PATCH 03/13] debug cabal testrunning --- test/{Wordlist_Test.hs => Wordfilter_Test.hs} | 8 ++++---- wordfilter.cabal | 8 +++++++- 2 files changed, 11 insertions(+), 5 deletions(-) rename test/{Wordlist_Test.hs => Wordfilter_Test.hs} (81%) diff --git a/test/Wordlist_Test.hs b/test/Wordfilter_Test.hs similarity index 81% rename from test/Wordlist_Test.hs rename to test/Wordfilter_Test.hs index 3b679ad..c291731 100644 --- a/test/Wordlist_Test.hs +++ b/test/Wordfilter_Test.hs @@ -1,6 +1,6 @@ -module Wordlist_Test where +module Main where -import Lib.Wordlist (blacklisted', addWords', removeWord') +import Wordfilter (blacklisted', addWords', removeWord') import Test.HUnit blacklistedTests = TestList [testEmptyFalse, testContained, testNotContained] @@ -13,7 +13,7 @@ testNotContained = TestCase (do r <- blacklisted' "quux i am" ["bar", "foo"] addWordsTest = TestList [testAdd] -testAdd = TestCase (do r <- addWords' ["foo"], ["bar", "baz"] +testAdd = TestCase (do r <- addWords' ["foo"] ["bar", "baz"] assertEqual "add words to list" ["bar", "baz", "foo"] r) removeWordTests = TestList [testPresent, testAbsent] @@ -22,4 +22,4 @@ testPresent = TestCase (do r <- removeWord' "foo" ["foo", "bar"] testAbsent = TestCase (do r <- removeWord' "foo" ["bar", "baz"] assertEqual "don't remove absent word" ["bar", "baz"] r) -main = runTestTT $ TestList [blackListedTests, addWordsTest, removeWordsTest] +main = runTestTT $ TestList [blacklistedTests, addWordsTest, removeWordTests] diff --git a/wordfilter.cabal b/wordfilter.cabal index 5775495..64a5758 100644 --- a/wordfilter.cabal +++ b/wordfilter.cabal @@ -17,13 +17,19 @@ extra-source-files: ChangeLog.md, README.md cabal-version: >=1.10 Data-Files: lib/badwords.json +Test-Suite test-wordfilter + type: exitcode-stdio-1.0 + main-is: test/Wordfilter_Test.hs + build-depends: base, HUnit, wordfilter + default-language: Haskell2010 + library exposed-modules: Wordfilter other-modules: Paths_wordfilter -- other-extensions: build-depends: base >=4.9 && <4.10, aeson, bytestring, - filepath, HUnit, + filepath, regex-pcre-builtin hs-source-dirs: lib default-language: Haskell2010 From a8946c9583089a260f41e1365053006402fc266b Mon Sep 17 00:00:00 2001 From: Sam Raker Date: Sat, 12 Nov 2016 12:03:21 -0500 Subject: [PATCH 04/13] add haskell tests to travis --- .travis.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.travis.yml b/.travis.yml index 4f4b24d..01b1cd6 100644 --- a/.travis.yml +++ b/.travis.yml @@ -25,6 +25,8 @@ script: - grunt # Test Ruby: - rake test +# Test Haskell +- cabal configure --enable-tests && cabal build && cabal test after_success: - coverage report From 1572c12ccf4641d345e451dd935941ef5efd6894 Mon Sep 17 00:00:00 2001 From: Sam Raker Date: Sat, 12 Nov 2016 12:05:23 -0500 Subject: [PATCH 05/13] readme fix --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 7b97ae5..cbe3bfa 100644 --- a/README.md +++ b/README.md @@ -44,7 +44,7 @@ Clone this repo and then `cabal install` (or `stack build`) module MightBeNaughty where import System.IO -import Wordlist +import Wordfilter -- functions without trailing ' use Darius' wordlist checkInput :: IO String -> IO () From cd1a429299275d6234108c2a36f3d2481cc4996b Mon Sep 17 00:00:00 2001 From: Sam Raker Date: Sat, 12 Nov 2016 16:51:04 -0500 Subject: [PATCH 06/13] install cabal-install --- .travis.yml | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/.travis.yml b/.travis.yml index 01b1cd6..f10f645 100644 --- a/.travis.yml +++ b/.travis.yml @@ -12,6 +12,11 @@ python: sudo: false +addons: + apt: + packages: + - cabal-install + install: # Coveralls 4.0 doesn't support Python 3.2 - if [ "$TRAVIS_PYTHON_VERSION" == "3.2" ]; then travis_retry pip install coverage==3.7.1; fi From b574ab15a4c4f9d7a06ce2ac946685799265453c Mon Sep 17 00:00:00 2001 From: Sam Raker Date: Sat, 12 Nov 2016 16:52:52 -0500 Subject: [PATCH 07/13] install ghc --- .travis.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.travis.yml b/.travis.yml index f10f645..d7bd8d7 100644 --- a/.travis.yml +++ b/.travis.yml @@ -16,6 +16,7 @@ addons: apt: packages: - cabal-install + - ghc install: # Coveralls 4.0 doesn't support Python 3.2 From 67014e8226f8c3007ca92cd7e70374825af38f18 Mon Sep 17 00:00:00 2001 From: Sam Raker Date: Tue, 22 Nov 2016 23:19:54 -0500 Subject: [PATCH 08/13] fix test imports, remove reference to ChangeLog --- test/Wordlist_Test.hs | 2 +- wordfilter.cabal | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/test/Wordlist_Test.hs b/test/Wordlist_Test.hs index 3b679ad..a148645 100644 --- a/test/Wordlist_Test.hs +++ b/test/Wordlist_Test.hs @@ -1,6 +1,6 @@ module Wordlist_Test where -import Lib.Wordlist (blacklisted', addWords', removeWord') +import Wordlist (blacklisted', addWords', removeWord') import Test.HUnit blacklistedTests = TestList [testEmptyFalse, testContained, testNotContained] diff --git a/wordfilter.cabal b/wordfilter.cabal index 5775495..0cea7e2 100644 --- a/wordfilter.cabal +++ b/wordfilter.cabal @@ -13,7 +13,7 @@ maintainer: sam.raker@gmail.com -- copyright: category: Language build-type: Simple -extra-source-files: ChangeLog.md, README.md +extra-source-files: README.md cabal-version: >=1.10 Data-Files: lib/badwords.json From b2ff2db2cbaff06eda6d5a34dfdf7655be5583c8 Mon Sep 17 00:00:00 2001 From: Sam Raker Date: Tue, 29 Nov 2016 23:33:30 -0500 Subject: [PATCH 09/13] debug data-files --- wordfilter.cabal | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/wordfilter.cabal b/wordfilter.cabal index 65bcd2c..1c401ac 100644 --- a/wordfilter.cabal +++ b/wordfilter.cabal @@ -2,7 +2,7 @@ -- documentation, see http://haskell.org/cabal/users-guide/ name: wordfilter -version: 0.1.0.0 +version: 0.1.0.1 synopsis: Word filter -- description: homepage: https://github.com/dariusk/wordfilter @@ -15,7 +15,7 @@ category: Language build-type: Simple extra-source-files: README.md cabal-version: >=1.10 -Data-Files: lib/badwords.json +data-Files: lib/badwords.json Test-Suite test-wordfilter type: exitcode-stdio-1.0 From 1bf8aa2c5e07a5d244d6422c5bb3cc7f897f2497 Mon Sep 17 00:00:00 2001 From: Sam Raker Date: Tue, 29 Nov 2016 23:36:51 -0500 Subject: [PATCH 10/13] still trying to debug data-files --- lib/Paths_wordfilter.hs | 6 ------ lib/Wordfilter.hs | 4 ++-- wordfilter.cabal | 5 ++--- 3 files changed, 4 insertions(+), 11 deletions(-) delete mode 100644 lib/Paths_wordfilter.hs diff --git a/lib/Paths_wordfilter.hs b/lib/Paths_wordfilter.hs deleted file mode 100644 index eb72b1c..0000000 --- a/lib/Paths_wordfilter.hs +++ /dev/null @@ -1,6 +0,0 @@ -module Paths_wordfilter where - -import System.FilePath (()) - -getDataFileName :: FilePath -> IO FilePath -getDataFileName = (return . ("lib" )) diff --git a/lib/Wordfilter.hs b/lib/Wordfilter.hs index 4cc40b5..1e24ed6 100644 --- a/lib/Wordfilter.hs +++ b/lib/Wordfilter.hs @@ -21,7 +21,7 @@ module Wordfilter -- -- blacklisted "foo" // IO False -- clearList >>= addWords ["foo", "bar"] >>= blacklisted' "foo" // IO True - -- + -- -- real blacklist blacklist -- empty "blacklist" @@ -47,7 +47,7 @@ import Text.Regex.PCRE import Paths_wordfilter (getDataFileName) blacklist :: IO [String] -blacklist = getDataFileName "badwords.json" >>= +blacklist = getDataFileName "lib/badwords.json" >>= B.readFile >>= (return . concat . maybeToList . decode) diff --git a/wordfilter.cabal b/wordfilter.cabal index 1c401ac..68d7d82 100644 --- a/wordfilter.cabal +++ b/wordfilter.cabal @@ -2,7 +2,7 @@ -- documentation, see http://haskell.org/cabal/users-guide/ name: wordfilter -version: 0.1.0.1 +version: 0.1.0.2 synopsis: Word filter -- description: homepage: https://github.com/dariusk/wordfilter @@ -15,7 +15,7 @@ category: Language build-type: Simple extra-source-files: README.md cabal-version: >=1.10 -data-Files: lib/badwords.json +data-files: lib/badwords.json Test-Suite test-wordfilter type: exitcode-stdio-1.0 @@ -25,7 +25,6 @@ Test-Suite test-wordfilter library exposed-modules: Wordfilter - other-modules: Paths_wordfilter -- other-extensions: build-depends: base >=4.9 && <4.10, aeson, bytestring, From 907de9034a2aa6dfe0d1ac794c3ab6e1a0b3ea08 Mon Sep 17 00:00:00 2001 From: Sam Raker Date: Sat, 3 Dec 2016 18:04:11 -0500 Subject: [PATCH 11/13] version bump --- wordfilter.cabal | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/wordfilter.cabal b/wordfilter.cabal index 68d7d82..5cb1a55 100644 --- a/wordfilter.cabal +++ b/wordfilter.cabal @@ -2,7 +2,7 @@ -- documentation, see http://haskell.org/cabal/users-guide/ name: wordfilter -version: 0.1.0.2 +version: 0.1.0.3 synopsis: Word filter -- description: homepage: https://github.com/dariusk/wordfilter From 89f9700fbcaf8ebf817f7be47321c4595ba33abc Mon Sep 17 00:00:00 2001 From: Sam Raker Date: Sat, 3 Dec 2016 19:36:24 -0500 Subject: [PATCH 12/13] version bump, allow ghc-4.8 --- wordfilter.cabal | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/wordfilter.cabal b/wordfilter.cabal index 5cb1a55..39ee576 100644 --- a/wordfilter.cabal +++ b/wordfilter.cabal @@ -2,7 +2,7 @@ -- documentation, see http://haskell.org/cabal/users-guide/ name: wordfilter -version: 0.1.0.3 +version: 0.1.0.4 synopsis: Word filter -- description: homepage: https://github.com/dariusk/wordfilter @@ -26,7 +26,7 @@ Test-Suite test-wordfilter library exposed-modules: Wordfilter -- other-extensions: - build-depends: base >=4.9 && <4.10, + build-depends: base >=4.8 && <4.10, aeson, bytestring, filepath, regex-pcre-builtin From 1131ff6dae1e0d368d84ae216624493cc42d98eb Mon Sep 17 00:00:00 2001 From: Sam Raker Date: Sat, 3 Dec 2016 21:19:58 -0500 Subject: [PATCH 13/13] expose Paths_wordfilter --- wordfilter.cabal | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/wordfilter.cabal b/wordfilter.cabal index 39ee576..a985d8f 100644 --- a/wordfilter.cabal +++ b/wordfilter.cabal @@ -2,7 +2,7 @@ -- documentation, see http://haskell.org/cabal/users-guide/ name: wordfilter -version: 0.1.0.4 +version: 0.1.0.5 synopsis: Word filter -- description: homepage: https://github.com/dariusk/wordfilter @@ -24,7 +24,7 @@ Test-Suite test-wordfilter default-language: Haskell2010 library - exposed-modules: Wordfilter + exposed-modules: Wordfilter, Paths_wordfilter -- other-extensions: build-depends: base >=4.8 && <4.10, aeson, bytestring,