diff --git a/.travis.yml b/.travis.yml index 4f4b24d..d7bd8d7 100644 --- a/.travis.yml +++ b/.travis.yml @@ -12,6 +12,12 @@ python: sudo: false +addons: + apt: + packages: + - cabal-install + - ghc + install: # Coveralls 4.0 doesn't support Python 3.2 - if [ "$TRAVIS_PYTHON_VERSION" == "3.2" ]; then travis_retry pip install coverage==3.7.1; fi @@ -25,6 +31,8 @@ script: - grunt # Test Ruby: - rake test +# Test Haskell +- cabal configure --enable-tests && cabal build && cabal test after_success: - coverage report diff --git a/README.md b/README.md index 87a2b9b..cbe3bfa 100644 --- a/README.md +++ b/README.md @@ -37,6 +37,47 @@ wordfilter.addWords(['zebra','elephant']) wordfilter.blacklisted('this string has zebra in it') # True ``` +Or with Haskell: +Clone this repo and then `cabal install` (or `stack build`) + +```haskell +module MightBeNaughty where + +import System.IO +import Wordfilter + +-- functions without trailing ' use Darius' wordlist +checkInput :: IO String -> IO () +checkInput = do + input <- getLine + ok <- blacklisted input + printLn $ if ok then "cool :)" else "not cool >:(" + +lessThanOriginalList :: String -> IO [String] +lessThanOriginalList toRemove1 toRemove2 = removeWord toRemove1 >>= + removeWord' toRemove2 +-- ~~~important ^ ~~~ + +-- functions with a trailing ' need an IO [String] wordlist +getSomeOtherList :: IO [String] +getSomeOtherList = ... + +otherListAndMore :: [String] -> IO Bool +otherListAndMore otherWords toCheck = getSomeOtherList >>= + addWords' otherWords >>= + blacklisted' toCheck + +-- clearList is just an empty IO [String] for compatability/convenience(?) + +-- blacklist is original +checkInputParticular :: String -> String -> [String] -> IO Bool +checkInputParticular toTest toRemove toAdd = blacklist >>= + removeWord' toRemove >>= + addWords' toAdd >>= + blacklisted' toTest +``` + + ## Documentation This is a word filter adapted from code that I use in a lot of my twitter bots. It is based on [a list of words that I've hand-picked](https://github.com/dariusk/wordfilter/blob/master/lib/badwords.json) for exclusion from my bots: essentially, it's a list of things that I would not say myself. Generally speaking, they are "words of oppression", aka racist/sexist/ableist things that I would not say. diff --git a/Setup.hs b/Setup.hs new file mode 100644 index 0000000..9a994af --- /dev/null +++ b/Setup.hs @@ -0,0 +1,2 @@ +import Distribution.Simple +main = defaultMain diff --git a/lib/Wordfilter.hs b/lib/Wordfilter.hs new file mode 100644 index 0000000..1e24ed6 --- /dev/null +++ b/lib/Wordfilter.hs @@ -0,0 +1,78 @@ +-- | +-- Module: Wordfilter +-- License: MIT +-- Portability: portable +-- +-- Haskell port of Darius Kazemi's Wordfilter + +module Wordfilter + ( + -- Immutability changes some of the functionality: + -- addWords and removeWord return changed copies of + -- the list instead of changing the list itself. To + -- address this, we export "raw" and "convenience" + -- versions of those functions. The "raw" versions + -- (marked with a ') take an IO [String] wordlist, + -- while the "convenience" versions "bake in" the + -- original blacklist. Similarly, clearList is + -- just an empty list, which can be passed to + -- the "raw" functions to build up a fresh list. + -- Examples: + -- + -- blacklisted "foo" // IO False + -- clearList >>= addWords ["foo", "bar"] >>= blacklisted' "foo" // IO True + -- + -- real blacklist + blacklist + -- empty "blacklist" + , clearList + -- "convenience" functions + , blacklisted + , addWords + , removeWord + -- "raw" functions + , blacklisted' + , addWords' + , removeWord' + ) where + + +import Data.Aeson +import Data.Bits ((.|.)) +import qualified Data.ByteString.Lazy as B +import Data.Maybe (maybeToList) +import Data.List (intersperse) +import Text.Regex.PCRE + +import Paths_wordfilter (getDataFileName) + +blacklist :: IO [String] +blacklist = getDataFileName "lib/badwords.json" >>= + B.readFile >>= + (return . concat . maybeToList . decode) + +clearList :: IO [String] +clearList = return [] + +blacklisted' :: String -> [String] -> IO Bool +blacklisted' _ [] = return False +blacklisted' s bl = return $ matchTest re s where + re = makeRegexOpts (defaultCompOpt .|. compCaseless) + defaultExecOpt + (concat $ intersperse "|" bl) + +blacklisted :: String -> IO Bool +blacklisted s = blacklist >>= (blacklisted' s) + +addWords' :: [String] -> [String] -> IO [String] +addWords' ws bl = return $ bl ++ ws + +addWords :: [String] -> IO [String] +addWords ws = blacklist >>= addWords' ws + +removeWord' :: String -> [String] -> IO [String] +removeWord' w bl = return $ filter (not . (== w)) bl + +removeWord :: String -> IO [String] +removeWord w = blacklist >>= (removeWord' w) + diff --git a/stack.yaml b/stack.yaml new file mode 100644 index 0000000..eff642c --- /dev/null +++ b/stack.yaml @@ -0,0 +1,66 @@ +# This file was automatically generated by 'stack init' +# +# Some commonly used options have been documented as comments in this file. +# For advanced use and comprehensive documentation of the format, please see: +# http://docs.haskellstack.org/en/stable/yaml_configuration/ + +# Resolver to choose a 'specific' stackage snapshot or a compiler version. +# A snapshot resolver dictates the compiler version and the set of packages +# to be used for project dependencies. For example: +# +# resolver: lts-3.5 +# resolver: nightly-2015-09-21 +# resolver: ghc-7.10.2 +# resolver: ghcjs-0.1.0_ghc-7.10.2 +# resolver: +# name: custom-snapshot +# location: "./custom-snapshot.yaml" +resolver: lts-7.8 + +# User packages to be built. +# Various formats can be used as shown in the example below. +# +# packages: +# - some-directory +# - https://example.com/foo/bar/baz-0.0.2.tar.gz +# - location: +# git: https://github.com/commercialhaskell/stack.git +# commit: e7b331f14bcffb8367cd58fbfc8b40ec7642100a +# - location: https://github.com/commercialhaskell/stack/commit/e7b331f14bcffb8367cd58fbfc8b40ec7642100a +# extra-dep: true +# subdirs: +# - auto-update +# - wai +# +# A package marked 'extra-dep: true' will only be built if demanded by a +# non-dependency (i.e. a user package), and its test suites and benchmarks +# will not be run. This is useful for tweaking upstream packages. +packages: +- '.' +# Dependency packages to be pulled from upstream that are not in the resolver +# (e.g., acme-missiles-0.3) +extra-deps: [] + +# Override default flag values for local packages and extra-deps +flags: {} + +# Extra package databases containing global packages +extra-package-dbs: [] + +# Control whether we use the GHC we find on the path +# system-ghc: true +# +# Require a specific version of stack, using version ranges +# require-stack-version: -any # Default +# require-stack-version: ">=1.2" +# +# Override the architecture used by stack, especially useful on Windows +# arch: i386 +# arch: x86_64 +# +# Extra directories used by stack for building +# extra-include-dirs: [/path/to/dir] +# extra-lib-dirs: [/path/to/dir] +# +# Allow a newer minor version of GHC than the snapshot specifies +# compiler-check: newer-minor \ No newline at end of file diff --git a/test/Wordfilter_Test.hs b/test/Wordfilter_Test.hs new file mode 100644 index 0000000..c291731 --- /dev/null +++ b/test/Wordfilter_Test.hs @@ -0,0 +1,25 @@ +module Main where + +import Wordfilter (blacklisted', addWords', removeWord') +import Test.HUnit + +blacklistedTests = TestList [testEmptyFalse, testContained, testNotContained] +testEmptyFalse = TestCase (do r <- blacklisted' "foo" [] + assertEqual "always false on empty list" False r) +testContained = TestCase (do r <- blacklisted' "i am foo" ["bar", "foo"] + assertEqual "should match" True r) +testNotContained = TestCase (do r <- blacklisted' "quux i am" ["bar", "foo"] + assertEqual "should not match" False r) + + +addWordsTest = TestList [testAdd] +testAdd = TestCase (do r <- addWords' ["foo"] ["bar", "baz"] + assertEqual "add words to list" ["bar", "baz", "foo"] r) + +removeWordTests = TestList [testPresent, testAbsent] +testPresent = TestCase (do r <- removeWord' "foo" ["foo", "bar"] + assertEqual "remove word from list" ["bar"] r) +testAbsent = TestCase (do r <- removeWord' "foo" ["bar", "baz"] + assertEqual "don't remove absent word" ["bar", "baz"] r) + +main = runTestTT $ TestList [blacklistedTests, addWordsTest, removeWordTests] diff --git a/wordfilter.cabal b/wordfilter.cabal new file mode 100644 index 0000000..a985d8f --- /dev/null +++ b/wordfilter.cabal @@ -0,0 +1,34 @@ +-- Initial wordfilter.cabal generated by cabal init. For further +-- documentation, see http://haskell.org/cabal/users-guide/ + +name: wordfilter +version: 0.1.0.5 +synopsis: Word filter +-- description: +homepage: https://github.com/dariusk/wordfilter +license: MIT +license-file: LICENSE-MIT +author: Sam Raker +maintainer: sam.raker@gmail.com +-- copyright: +category: Language +build-type: Simple +extra-source-files: README.md +cabal-version: >=1.10 +data-files: lib/badwords.json + +Test-Suite test-wordfilter + type: exitcode-stdio-1.0 + main-is: test/Wordfilter_Test.hs + build-depends: base, HUnit, wordfilter + default-language: Haskell2010 + +library + exposed-modules: Wordfilter, Paths_wordfilter + -- other-extensions: + build-depends: base >=4.8 && <4.10, + aeson, bytestring, + filepath, + regex-pcre-builtin + hs-source-dirs: lib + default-language: Haskell2010