diff --git a/Chapter 1 Arrays and strings/1.2. check_permutation.md b/Chapter 1 Arrays and strings/1.2. check_permutation.md new file mode 100644 index 0000000..24830dc --- /dev/null +++ b/Chapter 1 Arrays and strings/1.2. check_permutation.md @@ -0,0 +1,121 @@ +# 1.2. Check permutation + +## given two strings, decide if one is a permutation of the other + +## First idea + +if one is a permutation of the other, they need to have the same length: + +```python +if len(s1) != len(s2): + return False +``` + +first sort, then check: O(nlogn + n) = O(nlogn). + +Can it be done in O(1)? BCR? don't think so, even doing it manually takes O(n). + +> example: s1 = "ane", s2= "ena" + +## array to keep count of characters + +array of ints with count of each character? + +```python +alphabet = 26 +def check_permutation(s1, s2): + if len(s1) != len(s2): + return False + checker = [0 for _ in range(alphabet)] + for char in s1: + checker[ord(char)] += 1 + for char in s2: + checker[ord(char)] -= 1 + if checker[ord(char)] < 0: + return False + return True +``` + +## optimization from this + +how many characters do we admit? 128 for starters in ASCII. + +```python +alphabet = 128 +... +if ord(char) >= 128: + sys.exit(1) +``` + +> Tests: + +* empty strings: correct +* one character strings: correct +* different length strings: correct +* same length strings, True: correct +* same length strings, False: correct + +This takes O(128 + n + n) = O(n) time and O(128) space. We can't beat BCR but maybe less space? + +## Use hash tables + +substitute array of 128 by hash table of 128 in worst case, but generally less. O(1). no need to limit string size to 128. + +```python +def check_permutation(s1, s2): + if len(s1) != len(s2): + return False + checker = dict() + + for char in s1: + ascii_val = ord(char) + if ascii_val not in checker: + checker[ascii_val] = 1 + else: + checker[ascii_val] += 1 + + for char in s2: + ascii_val = ord(char) + if ascii_val not in checker: + return False + checker[ascii_val] -= 1 + if checker[ascii_val] < 0: + return False + return True +``` + +O(n) time, O(1) space. + +## Forgot about detail (remembered reading the solution) + +I don't check that the dictionary is empty at the end. I should delete the key when the count reaches 0, and check that it's an empty hash table at the end. Tests: + +> 'aane' vs 'ane' + +There'll be count of 1 in checker[0] at the end. But it'll be discarded because of different length. + +> 'aane' vs 'anee' + +It passes the test, but why? Ah! If both strings are the same length, the first string might have a bigger count of one character, but the second will have a bigger count of same/another character, so this character (`e` in this case), will make the count negative, thus returning False. + +## Solution + +Check if permutation is case sensitive: + +> is 'God' a permutation of 'dog'? + +Also check if whitespace if significant: + +> 'god ' is a permutation of 'dog'? + +The solution assumes that the comparison is case sensitive and whitespace is significant (like my solution). It also checks that the lengths are different. + +### First solution + +If two strings are permutations, they have the same characters in different orders. Sort them, and compare. O(nlogn) + +### Check count of characters + +The definition of a permutation is: two words with the same character counts. Create an array that operates as a hash table, mapping each character to its frequency. Increment through the first string, decrement through the second, and at the end that the array is all zeroes. Terminate early if a value turns negative. + +Solved it right! \ No newline at end of file diff --git a/Chapter 1 Arrays and strings/1.2. check_permutation.py b/Chapter 1 Arrays and strings/1.2. check_permutation.py new file mode 100644 index 0000000..bb672cb --- /dev/null +++ b/Chapter 1 Arrays and strings/1.2. check_permutation.py @@ -0,0 +1,46 @@ +import unittest + +def check_permutation(s1, s2): + if len(s1) != len(s2): + return False + checker = dict() + + for char in s1: + ascii_val = ord(char) + if ascii_val not in checker: + checker[ascii_val] = 1 + else: + checker[ascii_val] += 1 + + for char in s2: + ascii_val = ord(char) + if ascii_val not in checker: + return False + checker[ascii_val] -= 1 + if checker[ascii_val] == 0: + del checker[ascii_val] + if checker[ascii_val] < 0: + return False + if checker == {}: + return True + else: + return False + +class Test(unittest.TestCase): + dataT = [('abcd', 'dbca'), ('dfgh', 'dfgh'), ('', '')] + dataF = [('aane', 'anee'), ('ab', 'ca'), ('ane', 'aane')] + + def test(self): + # true check + for test_string in self.dataT: + s1, s2 = test_string + res = check_permutation(s1, s2) + self.assertTrue(res) + # false check + for test_string in self.dataF: + s1, s2 = test_string + res = check_permutation(s1, s2) + self.assertFalse(res) + +if __name__ == "__main__": + unittest.main()