Skip to content
8 changes: 8 additions & 0 deletions .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,12 @@ python:

sudo: false

addons:
apt:
packages:
- cabal-install
- ghc

install:
# Coveralls 4.0 doesn't support Python 3.2
- if [ "$TRAVIS_PYTHON_VERSION" == "3.2" ]; then travis_retry pip install coverage==3.7.1; fi
Expand All @@ -25,6 +31,8 @@ script:
- grunt
# Test Ruby:
- rake test
# Test Haskell
- cabal configure --enable-tests && cabal build && cabal test

after_success:
- coverage report
Expand Down
41 changes: 41 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,47 @@ wordfilter.addWords(['zebra','elephant'])
wordfilter.blacklisted('this string has zebra in it') # True
```

Or with Haskell:
Clone this repo and then `cabal install` (or `stack build`)

```haskell
module MightBeNaughty where

import System.IO
import Wordfilter

-- functions without trailing ' use Darius' wordlist
checkInput :: IO String -> IO ()
checkInput = do
input <- getLine
ok <- blacklisted input
printLn $ if ok then "cool :)" else "not cool >:("

lessThanOriginalList :: String -> IO [String]
lessThanOriginalList toRemove1 toRemove2 = removeWord toRemove1 >>=
removeWord' toRemove2
-- ~~~important ^ ~~~

-- functions with a trailing ' need an IO [String] wordlist
getSomeOtherList :: IO [String]
getSomeOtherList = ...

otherListAndMore :: [String] -> IO Bool
otherListAndMore otherWords toCheck = getSomeOtherList >>=
addWords' otherWords >>=
blacklisted' toCheck

-- clearList is just an empty IO [String] for compatability/convenience(?)

-- blacklist is original
checkInputParticular :: String -> String -> [String] -> IO Bool
checkInputParticular toTest toRemove toAdd = blacklist >>=
removeWord' toRemove >>=
addWords' toAdd >>=
blacklisted' toTest
```


## Documentation
This is a word filter adapted from code that I use in a lot of my twitter bots. It is based on [a list of words that I've hand-picked](https://github.com/dariusk/wordfilter/blob/master/lib/badwords.json) for exclusion from my bots: essentially, it's a list of things that I would not say myself. Generally speaking, they are "words of oppression", aka racist/sexist/ableist things that I would not say.

Expand Down
2 changes: 2 additions & 0 deletions Setup.hs
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
import Distribution.Simple
main = defaultMain
78 changes: 78 additions & 0 deletions lib/Wordfilter.hs
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
-- |
-- Module: Wordfilter
-- License: MIT
-- Portability: portable
--
-- Haskell port of Darius Kazemi's Wordfilter

module Wordfilter
(
-- Immutability changes some of the functionality:
-- addWords and removeWord return changed copies of
-- the list instead of changing the list itself. To
-- address this, we export "raw" and "convenience"
-- versions of those functions. The "raw" versions
-- (marked with a ') take an IO [String] wordlist,
-- while the "convenience" versions "bake in" the
-- original blacklist. Similarly, clearList is
-- just an empty list, which can be passed to
-- the "raw" functions to build up a fresh list.
-- Examples:
--
-- blacklisted "foo" // IO False
-- clearList >>= addWords ["foo", "bar"] >>= blacklisted' "foo" // IO True
--
-- real blacklist
blacklist
-- empty "blacklist"
, clearList
-- "convenience" functions
, blacklisted
, addWords
, removeWord
-- "raw" functions
, blacklisted'
, addWords'
, removeWord'
) where


import Data.Aeson
import Data.Bits ((.|.))
import qualified Data.ByteString.Lazy as B
import Data.Maybe (maybeToList)
import Data.List (intersperse)
import Text.Regex.PCRE

import Paths_wordfilter (getDataFileName)

blacklist :: IO [String]
blacklist = getDataFileName "lib/badwords.json" >>=
B.readFile >>=
(return . concat . maybeToList . decode)

clearList :: IO [String]
clearList = return []

blacklisted' :: String -> [String] -> IO Bool
blacklisted' _ [] = return False
blacklisted' s bl = return $ matchTest re s where
re = makeRegexOpts (defaultCompOpt .|. compCaseless)
defaultExecOpt
(concat $ intersperse "|" bl)

blacklisted :: String -> IO Bool
blacklisted s = blacklist >>= (blacklisted' s)

addWords' :: [String] -> [String] -> IO [String]
addWords' ws bl = return $ bl ++ ws

addWords :: [String] -> IO [String]
addWords ws = blacklist >>= addWords' ws

removeWord' :: String -> [String] -> IO [String]
removeWord' w bl = return $ filter (not . (== w)) bl

removeWord :: String -> IO [String]
removeWord w = blacklist >>= (removeWord' w)

66 changes: 66 additions & 0 deletions stack.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
# This file was automatically generated by 'stack init'
#
# Some commonly used options have been documented as comments in this file.
# For advanced use and comprehensive documentation of the format, please see:
# http://docs.haskellstack.org/en/stable/yaml_configuration/

# Resolver to choose a 'specific' stackage snapshot or a compiler version.
# A snapshot resolver dictates the compiler version and the set of packages
# to be used for project dependencies. For example:
#
# resolver: lts-3.5
# resolver: nightly-2015-09-21
# resolver: ghc-7.10.2
# resolver: ghcjs-0.1.0_ghc-7.10.2
# resolver:
# name: custom-snapshot
# location: "./custom-snapshot.yaml"
resolver: lts-7.8

# User packages to be built.
# Various formats can be used as shown in the example below.
#
# packages:
# - some-directory
# - https://example.com/foo/bar/baz-0.0.2.tar.gz
# - location:
# git: https://github.com/commercialhaskell/stack.git
# commit: e7b331f14bcffb8367cd58fbfc8b40ec7642100a
# - location: https://github.com/commercialhaskell/stack/commit/e7b331f14bcffb8367cd58fbfc8b40ec7642100a
# extra-dep: true
# subdirs:
# - auto-update
# - wai
#
# A package marked 'extra-dep: true' will only be built if demanded by a
# non-dependency (i.e. a user package), and its test suites and benchmarks
# will not be run. This is useful for tweaking upstream packages.
packages:
- '.'
# Dependency packages to be pulled from upstream that are not in the resolver
# (e.g., acme-missiles-0.3)
extra-deps: []

# Override default flag values for local packages and extra-deps
flags: {}

# Extra package databases containing global packages
extra-package-dbs: []

# Control whether we use the GHC we find on the path
# system-ghc: true
#
# Require a specific version of stack, using version ranges
# require-stack-version: -any # Default
# require-stack-version: ">=1.2"
#
# Override the architecture used by stack, especially useful on Windows
# arch: i386
# arch: x86_64
#
# Extra directories used by stack for building
# extra-include-dirs: [/path/to/dir]
# extra-lib-dirs: [/path/to/dir]
#
# Allow a newer minor version of GHC than the snapshot specifies
# compiler-check: newer-minor
25 changes: 25 additions & 0 deletions test/Wordfilter_Test.hs
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
module Main where

import Wordfilter (blacklisted', addWords', removeWord')
import Test.HUnit

blacklistedTests = TestList [testEmptyFalse, testContained, testNotContained]
testEmptyFalse = TestCase (do r <- blacklisted' "foo" []
assertEqual "always false on empty list" False r)
testContained = TestCase (do r <- blacklisted' "i am foo" ["bar", "foo"]
assertEqual "should match" True r)
testNotContained = TestCase (do r <- blacklisted' "quux i am" ["bar", "foo"]
assertEqual "should not match" False r)


addWordsTest = TestList [testAdd]
testAdd = TestCase (do r <- addWords' ["foo"] ["bar", "baz"]
assertEqual "add words to list" ["bar", "baz", "foo"] r)

removeWordTests = TestList [testPresent, testAbsent]
testPresent = TestCase (do r <- removeWord' "foo" ["foo", "bar"]
assertEqual "remove word from list" ["bar"] r)
testAbsent = TestCase (do r <- removeWord' "foo" ["bar", "baz"]
assertEqual "don't remove absent word" ["bar", "baz"] r)

main = runTestTT $ TestList [blacklistedTests, addWordsTest, removeWordTests]
34 changes: 34 additions & 0 deletions wordfilter.cabal
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
-- Initial wordfilter.cabal generated by cabal init. For further
-- documentation, see http://haskell.org/cabal/users-guide/

name: wordfilter
version: 0.1.0.5
synopsis: Word filter
-- description:
homepage: https://github.com/dariusk/wordfilter
license: MIT
license-file: LICENSE-MIT
author: Sam Raker
maintainer: sam.raker@gmail.com
-- copyright:
category: Language
build-type: Simple
extra-source-files: README.md
cabal-version: >=1.10
data-files: lib/badwords.json

Test-Suite test-wordfilter
type: exitcode-stdio-1.0
main-is: test/Wordfilter_Test.hs
build-depends: base, HUnit, wordfilter
default-language: Haskell2010

library
exposed-modules: Wordfilter, Paths_wordfilter
-- other-extensions:
build-depends: base >=4.8 && <4.10,
aeson, bytestring,
filepath,
regex-pcre-builtin
hs-source-dirs: lib
default-language: Haskell2010