1.6. character length compression

python and java solutions
2019-03-30 23:34:31 +01:00 · 2019-03-30 23:34:31 +01:00 · d667782f5c
parent 3c638942ec
commit d667782f5c
3 changed files with 126 additions and 0 deletions
--- a/string_compression.java
+++ b/string_compression.java
@ -0,0 +1,41 @@
+public class QuestionC {	
+	public static String compress(String str) {
+		int finalLength = countCompression(str);
+		if (finalLength >= str.length()) return str;
+		
+		StringBuffer compressed = new StringBuffer(finalLength); // initialize capacity
+		int countConsecutive = 0;
+		for (int i = 0; i < str.length(); i++) {
+			countConsecutive++;
+			
+			/* If next character is different than current, append this char to result.*/
+			if (i + 1 >= str.length() || str.charAt(i) != str.charAt(i + 1)) {
+				compressed.append(str.charAt(i));
+				compressed.append(countConsecutive);
+				countConsecutive = 0;
+			}
+		}
+		return compressed.toString();
+	}
+	
+	public static int countCompression(String str) {
+		int compressedLength = 0;
+		int countConsecutive = 0;
+		for (int i = 0; i < str.length(); i++) {
+			countConsecutive++;
+			
+			/* If next character is different than current, append this char to result.*/
+			if (i + 1 >= str.length() || str.charAt(i) != str.charAt(i + 1)) {
+				compressedLength += 1 + String.valueOf(countConsecutive).length();
+				countConsecutive = 0;
+			}
+		}
+		return compressedLength;
+	}		
+	
+	public static void main(String[] args) {
+		String str = "aa";
+		System.out.println(str);
+		System.out.println(compress(str));
+	}
+}
--- a/string_compression.md
+++ b/string_compression.md
@ -0,0 +1,54 @@
+# 1.6. String compression
+
+## Compression of count of repeated characters. If the compressed smaller isn't smaller than the original string, return the original string. The string only has lowercase and uppercase letters
+
+> example:
+
+"aabcccccaaa" -> "a2b1c5a3"
+
+```python
+return compr if len(compr) < slen else s
+```
+
+## First idea
+
+O(n), create a new empty string and add to that. Iterate through the string, keep count and add to the string. Start with `compr=s[0]` and `count=1`, and start at index=1.
+
+```python
+def compress(s):
+    slen = len(s)
+    if slen < 3:
+        return s
+    compr = s[0]
+    count = 1
+    for i in range(1, slen):
+        if len(compr) >= slen:
+            return s
+        if s[i] == compr[-1]:
+            count += 1
+            if i == slen - 1:
+                compr += str(count)
+        else:
+            compr += str(count)
+            compr += s[i]
+            count = 1
+            if i == slen - 1:
+                compr += str(count)
+    return compr if len(compr) < slen else s
+```
+
+If character changes, update count in `compr` and reset to 1. If the string ends, add the count at the end.
+
+> Tests:
+
+* "" -> "", correct
+* "a" -> "a", correct
+* "aa" -> "a2", correct
+* "aabbb" -> "a2b3", correct
+* "aabbbc" -> "aabbbc", correct
+
+Remember that `if len(compr) < len(s), return s`. This has O(n) time, O(n) space.
+
+## Solution
+
+Uses `StringBuilder` as optimal solution, because in the original (my) case, runtime is O(n + k<sup>2</sup>), where `k` is the number of character sequences. An optimization is, while creating the compressed string, if it gets bigger than the original one, stop and return s.
--- a/string_compression.py
+++ b/string_compression.py
@ -0,0 +1,31 @@
+import unittest
+
+def string_compression(s):
+    compr = []
+    count = 0
+
+    for i in range(len(s)):
+        if i != 0 and s[i] != s[i - 1]:
+            compr.append(s[i - 1] + str(count))
+            count = 0
+        count += 1
+
+    compr.append(s[-1] + str(count))
+    return min(s, ''.join(compr), key=len)
+
+class Test(unittest.TestCase):
+
+    data = [("" , ""),
+            ("a", "a"),
+            ("aa", "aa"),
+            ("aabbb", "a2b3"),
+            ("aabbbc", "aabbbc")]
+
+    def test_unique(self):
+        for test in self.data:
+            res = compress(test[0])
+            self.assertEqual(res, test[1])
+        return
+
+if __name__ == "__main__":
+    unittest.main()