Add word count (#84)

exercism · Aug 15, 2023 · 9bb596f · 9bb596f
1 parent 261c158
commit 9bb596f
Show file tree

Hide file tree

Showing 8 changed files with 329 additions and 0 deletions.
diff --git a/config.json b/config.json
@@ -326,6 +326,14 @@
         "prerequisites": [],
         "difficulty": 3
       },
+      {
+        "slug": "word-count",
+        "name": "Word Count",
+        "uuid": "df93be4e-a862-4636-bb1c-e51e0e294797",
+        "practices": [],
+        "prerequisites": [],
+        "difficulty": 2
+      },
       {
         "slug": "two-fer",
         "name": "Two Fer",

diff --git a/exercises/practice/word-count/.docs/instructions.md b/exercises/practice/word-count/.docs/instructions.md
@@ -0,0 +1,47 @@
+# Instructions
+
+Your task is to count how many times each word occurs in a subtitle of a drama.
+
+The subtitles from these dramas use only ASCII characters.
+
+The characters often speak in casual English, using contractions like _they're_ or _it's_.
+Though these contractions come from two words (e.g. _we are_), the contraction (_we're_) is considered a single word.
+
+Words can be separated by any form of punctuation (e.g. ":", "!", or "?") or whitespace (e.g. "\t", "\n", or " ").
+The only punctuation that does not separate words is the apostrophe in contractions.
+
+Numbers are considered words.
+If the subtitles say _It costs 100 dollars._ then _100_ will be its own word.
+
+Words are case insensitive.
+For example, the word _you_ occurs three times in the following sentence:
+
+> You come back, you hear me? DO YOU HEAR ME?
+
+The ordering of the word counts in the results doesn't matter.
+
+Here's an example that incorporates several of the elements discussed above:
+
+- simple words
+- contractions
+- numbers
+- case insensitive words
+- punctuation (including apostrophes) to separate words
+- different forms of whitespace to separate words
+
+`"That's the password: 'PASSWORD 123'!", cried the Special Agent.\nSo I fled.`
+
+The mapping for this subtitle would be:
+
+```text
+123: 1
+agent: 1
+cried: 1
+fled: 1
+i: 1
+password: 2
+so: 1
+special: 1
+that's: 1
+the: 2
+```
diff --git a/exercises/practice/word-count/.docs/introduction.md b/exercises/practice/word-count/.docs/introduction.md
@@ -0,0 +1,8 @@
+# Introduction
+
+You teach English as a foreign language to high school students.
+
+You've decided to base your entire curriculum on TV shows.
+You need to analyze which words are used, and how often they're repeated.
+
+This will let you choose the simplest shows to start with, and to gradually increase the difficulty as time passes.
diff --git a/exercises/practice/word-count/.meta/config.json b/exercises/practice/word-count/.meta/config.json
@@ -0,0 +1,18 @@
+{
+  "authors": [
+    "BNAndras"
+  ],
+  "files": {
+    "solution": [
+      "word-count.arr"
+    ],
+    "test": [
+      "word-count-test.arr"
+    ],
+    "example": [
+      ".meta/example.arr"
+    ]
+  },
+  "blurb": "Given a phrase, count the occurrences of each word in that phrase.",
+  "source": "This is a classic toy problem, but we were reminded of it by seeing it in the Go Tour."
+}
diff --git a/exercises/practice/word-count/.meta/example.arr b/exercises/practice/word-count/.meta/example.arr
@@ -0,0 +1,58 @@
+provide: word-count end
+
+include string-dict
+
+fun word-count(phrase):
+  remove-trailing-quote = lam(fragment):
+    last-index = string-length(fragment) - 1
+    if string-char-at(fragment, last-index) == "'":
+      string-substring(fragment, 0, last-index)
+    else:
+      fragment
+    end
+  end
+
+  remove-leading-quote = lam(fragment):
+    last-index = string-length(fragment)
+    if string-char-at(fragment, 0) == "'":
+      string-substring(fragment, 1, last-index)
+    else:
+      fragment
+    end
+  end
+
+  remove-surrounding-quotes = lam(fragment):
+    fragment
+      ^ remove-trailing-quote
+      ^ remove-leading-quote
+  end
+
+  phrase
+    ^ string-replace(_, ",", " ")
+    ^ string-replace(_, "\n", " ")
+    ^ string-replace(_, ": ", " ")
+    ^ string-replace(_, " '", " ")
+    ^ string-replace(_, "' ", " ")
+    ^ string-to-lower
+    ^ string-split-all(_, " ")
+    ^ _.foldl(
+    lam(elt, acc):
+      if elt == "":
+        acc
+      else:
+        cleaned = elt
+          ^ string-to-code-points
+          ^ _.filter(
+          lam(cp):
+            # a-z or 0-9 or '
+            ((cp >= 97) and (cp <= 122)) or ((cp >= 48) and (cp <= 57)) or (cp == 39)
+          end)
+          ^ string-from-code-points
+          ^ remove-surrounding-quotes
+
+        value = acc.get(cleaned).or-else(0) + 1
+        acc.set(cleaned, value)
+      end
+  end,
+  [string-dict: ])
+end
diff --git a/exercises/practice/word-count/.meta/tests.toml b/exercises/practice/word-count/.meta/tests.toml
@@ -0,0 +1,57 @@
+# This is an auto-generated file.
+#
+# Regenerating this file via `configlet sync` will:
+# - Recreate every `description` key/value pair
+# - Recreate every `reimplements` key/value pair, where they exist in problem-specifications
+# - Remove any `include = true` key/value pair (an omitted `include` key implies inclusion)
+# - Preserve any other key/value pair
+#
+# As user-added comments (using the # character) will be removed when this file
+# is regenerated, comments can be added via a `comment` key.
+
+[61559d5f-2cad-48fb-af53-d3973a9ee9ef]
+description = "count one word"
+
+[5abd53a3-1aed-43a4-a15a-29f88c09cbbd]
+description = "count one of each word"
+
+[2a3091e5-952e-4099-9fac-8f85d9655c0e]
+description = "multiple occurrences of a word"
+
+[e81877ae-d4da-4af4-931c-d923cd621ca6]
+description = "handles cramped lists"
+
+[7349f682-9707-47c0-a9af-be56e1e7ff30]
+description = "handles expanded lists"
+
+[a514a0f2-8589-4279-8892-887f76a14c82]
+description = "ignore punctuation"
+
+[d2e5cee6-d2ec-497b-bdc9-3ebe092ce55e]
+description = "include numbers"
+
+[dac6bc6a-21ae-4954-945d-d7f716392dbf]
+description = "normalize case"
+
+[4185a902-bdb0-4074-864c-f416e42a0f19]
+description = "with apostrophes"
+include = false
+
+[4ff6c7d7-fcfc-43ef-b8e7-34ff1837a2d3]
+description = "with apostrophes"
+reimplements = "4185a902-bdb0-4074-864c-f416e42a0f19"
+
+[be72af2b-8afe-4337-b151-b297202e4a7b]
+description = "with quotations"
+
+[8d6815fe-8a51-4a65-96f9-2fb3f6dc6ed6]
+description = "substrings from the beginning"
+
+[c5f4ef26-f3f7-4725-b314-855c04fb4c13]
+description = "multiple spaces not detected as a word"
+
+[50176e8a-fe8e-4f4c-b6b6-aa9cf8f20360]
+description = "alternating word separators not detected as a word"
+
+[6d00f1db-901c-4bec-9829-d20eb3044557]
+description = "quotation for word with apostrophe"
diff --git a/exercises/practice/word-count/word-count-test.arr b/exercises/practice/word-count/word-count-test.arr
@@ -0,0 +1,128 @@
+include file("word-count.arr")
+
+include string-dict
+
+check "count one word":
+  input = "word"
+  expected = [string-dict: "word", 1]
+
+  word-count(input) is expected
+end
+
+check "count one of each word":
+  input = "one of each"
+  expected = [string-dict: "one", 1, "of", 1, "each", 1]
+
+  word-count(input) is expected
+end
+
+check "multiple occurrences of a word":
+  input = "one fish two fish red fish blue fish"
+  expected = [string-dict: "one", 1, "fish", 4, "two", 1, "red", 1, "blue", 1]
+
+  word-count(input) is expected
+end
+
+check "handles cramped lists":
+  input = "one,two,three"
+  expected = [string-dict: "one", 1, "two", 1, "three", 1]
+
+  word-count(input) is expected
+end
+
+check "handles expanded lists":
+  input = "one,\ntwo,\nthree"
+  expected = [string-dict: "one", 1, "two", 1, "three", 1]
+
+  word-count(input) is expected
+end
+
+check "ignore punctuation":
+  input = "car: carpet as java: javascript!!&@$%^&"
+  expected = [string-dict:
+   "car", 1,
+   "carpet", 1,
+   "as", 1,
+   "java", 1,
+   "javascript", 1]
+
+  word-count(input) is expected
+end
+
+check "include numbers":
+  input = "testing, 1, 2 testing"
+  expected = [string-dict: "testing", 2, "1", 1, "2", 1]
+
+  word-count(input) is expected
+end
+
+check "normalize case":
+  input = "go Go GO Stop stop"
+  expected = [string-dict: "go", 3, "stop", 2]
+
+  word-count(input) is expected
+end
+
+check "with apostrophes":
+  input = "'First: don't laugh. Then: don't cry. You're getting it.'"
+  expected = [string-dict: 
+    "first", 1,
+    "don't", 2,
+    "laugh", 1,
+    "then", 1,
+    "cry", 1,
+    "you're", 1,
+    "getting", 1,
+    "it", 1]
+
+  word-count(input) is expected
+end
+
+check "with quotations":
+  input = "Joe can't tell between 'large' and large."
+  expected = [string-dict:
+    "joe", 1,
+    "can't", 1,
+    "tell", 1,
+    "between", 1,
+    "large", 2,
+    "and", 1]
+
+  word-count(input) is expected
+end
+
+check "substrings from the beginning":
+  input = "Joe can't tell between app, apple and a."
+  expected = [string-dict:
+    "joe", 1,
+    "can't", 1,
+    "tell", 1,
+    "between", 1,
+    "app", 1,
+    "apple", 1,
+    "and", 1,
+    "a", 1]
+
+  word-count(input) is expected
+end
+
+check "multiple spaces not detected as a word":
+  input = " multiple   whitespaces"
+  expected = [string-dict: "multiple", 1, "whitespaces", 1]
+
+  word-count(input) is expected
+end
+
+check "alternating word separators not detected as a word":
+  input = ",\n,one,\n ,two \n 'three'"
+  expected = [string-dict: "one", 1, "two", 1, "three", 1]
+
+  word-count(input) is expected
+end
+
+check "quotation for word with apostrophe":
+  input = "can, can't, 'can't'"
+  expected = [string-dict: "can", 1, "can't", 2]
+
+  word-count(input) is expected
+end
diff --git a/exercises/practice/word-count/word-count.arr b/exercises/practice/word-count/word-count.arr
@@ -0,0 +1,5 @@
+provide: word-count end
+
+fun word-count(phrase):
+ raise("please implement the word-count function")
+end