parent
e306c3ac9e
commit
78c1fd4eef
|
|
@ -0,0 +1,121 @@
|
||||||
|
# 1.2. Check permutation
|
||||||
|
|
||||||
|
## given two strings, decide if one is a permutation of the other
|
||||||
|
|
||||||
|
## First idea
|
||||||
|
|
||||||
|
if one is a permutation of the other, they need to have the same length:
|
||||||
|
|
||||||
|
```python
|
||||||
|
if len(s1) != len(s2):
|
||||||
|
return False
|
||||||
|
```
|
||||||
|
|
||||||
|
first sort, then check: O(nlogn + n) = O(nlogn).
|
||||||
|
|
||||||
|
Can it be done in O(1)? BCR? don't think so, even doing it manually takes O(n).
|
||||||
|
|
||||||
|
> example: s1 = "ane", s2= "ena"
|
||||||
|
|
||||||
|
## array to keep count of characters
|
||||||
|
|
||||||
|
array of ints with count of each character?
|
||||||
|
|
||||||
|
```python
|
||||||
|
alphabet = 26
|
||||||
|
def check_permutation(s1, s2):
|
||||||
|
if len(s1) != len(s2):
|
||||||
|
return False
|
||||||
|
checker = [0 for _ in range(alphabet)]
|
||||||
|
for char in s1:
|
||||||
|
checker[ord(char)] += 1
|
||||||
|
for char in s2:
|
||||||
|
checker[ord(char)] -= 1
|
||||||
|
if checker[ord(char)] < 0:
|
||||||
|
return False
|
||||||
|
return True
|
||||||
|
```
|
||||||
|
|
||||||
|
## optimization from this
|
||||||
|
|
||||||
|
how many characters do we admit? 128 for starters in ASCII.
|
||||||
|
|
||||||
|
```python
|
||||||
|
alphabet = 128
|
||||||
|
...
|
||||||
|
if ord(char) >= 128:
|
||||||
|
sys.exit(1)
|
||||||
|
```
|
||||||
|
|
||||||
|
> Tests:
|
||||||
|
|
||||||
|
* empty strings: correct
|
||||||
|
* one character strings: correct
|
||||||
|
* different length strings: correct
|
||||||
|
* same length strings, True: correct
|
||||||
|
* same length strings, False: correct
|
||||||
|
|
||||||
|
This takes O(128 + n + n) = O(n) time and O(128) space. We can't beat BCR but maybe less space?
|
||||||
|
|
||||||
|
## Use hash tables
|
||||||
|
|
||||||
|
substitute array of 128 by hash table of 128 in worst case, but generally less. O(1). no need to limit string size to 128.
|
||||||
|
|
||||||
|
```python
|
||||||
|
def check_permutation(s1, s2):
|
||||||
|
if len(s1) != len(s2):
|
||||||
|
return False
|
||||||
|
checker = dict()
|
||||||
|
|
||||||
|
for char in s1:
|
||||||
|
ascii_val = ord(char)
|
||||||
|
if ascii_val not in checker:
|
||||||
|
checker[ascii_val] = 1
|
||||||
|
else:
|
||||||
|
checker[ascii_val] += 1
|
||||||
|
|
||||||
|
for char in s2:
|
||||||
|
ascii_val = ord(char)
|
||||||
|
if ascii_val not in checker:
|
||||||
|
return False
|
||||||
|
checker[ascii_val] -= 1
|
||||||
|
if checker[ascii_val] < 0:
|
||||||
|
return False
|
||||||
|
return True
|
||||||
|
```
|
||||||
|
|
||||||
|
O(n) time, O(1) space.
|
||||||
|
|
||||||
|
## Forgot about detail (remembered reading the solution)
|
||||||
|
|
||||||
|
I don't check that the dictionary is empty at the end. I should delete the key when the count reaches 0, and check that it's an empty hash table at the end. Tests:
|
||||||
|
|
||||||
|
> 'aane' vs 'ane'
|
||||||
|
|
||||||
|
There'll be count of 1 in checker[0] at the end. But it'll be discarded because of different length.
|
||||||
|
|
||||||
|
> 'aane' vs 'anee'
|
||||||
|
|
||||||
|
It passes the test, but why? Ah! If both strings are the same length, the first string might have a bigger count of one character, but the second will have a bigger count of same/another character, so this character (`e` in this case), will make the count negative, thus returning False.
|
||||||
|
|
||||||
|
## Solution
|
||||||
|
|
||||||
|
Check if permutation is case sensitive:
|
||||||
|
|
||||||
|
> is 'God' a permutation of 'dog'?
|
||||||
|
|
||||||
|
Also check if whitespace if significant:
|
||||||
|
|
||||||
|
> 'god ' is a permutation of 'dog'?
|
||||||
|
|
||||||
|
The solution assumes that the comparison is case sensitive and whitespace is significant (like my solution). It also checks that the lengths are different.
|
||||||
|
|
||||||
|
### First solution
|
||||||
|
|
||||||
|
If two strings are permutations, they have the same characters in different orders. Sort them, and compare. O(nlogn)
|
||||||
|
|
||||||
|
### Check count of characters
|
||||||
|
|
||||||
|
The definition of a permutation is: two words with the same character counts. Create an array that operates as a hash table, mapping each character to its frequency. Increment through the first string, decrement through the second, and at the end that the array is all zeroes. Terminate early if a value turns negative.
|
||||||
|
|
||||||
|
Solved it right!
|
||||||
|
|
@ -0,0 +1,46 @@
|
||||||
|
import unittest
|
||||||
|
|
||||||
|
def check_permutation(s1, s2):
|
||||||
|
if len(s1) != len(s2):
|
||||||
|
return False
|
||||||
|
checker = dict()
|
||||||
|
|
||||||
|
for char in s1:
|
||||||
|
ascii_val = ord(char)
|
||||||
|
if ascii_val not in checker:
|
||||||
|
checker[ascii_val] = 1
|
||||||
|
else:
|
||||||
|
checker[ascii_val] += 1
|
||||||
|
|
||||||
|
for char in s2:
|
||||||
|
ascii_val = ord(char)
|
||||||
|
if ascii_val not in checker:
|
||||||
|
return False
|
||||||
|
checker[ascii_val] -= 1
|
||||||
|
if checker[ascii_val] == 0:
|
||||||
|
del checker[ascii_val]
|
||||||
|
if checker[ascii_val] < 0:
|
||||||
|
return False
|
||||||
|
if checker == {}:
|
||||||
|
return True
|
||||||
|
else:
|
||||||
|
return False
|
||||||
|
|
||||||
|
class Test(unittest.TestCase):
|
||||||
|
dataT = [('abcd', 'dbca'), ('dfgh', 'dfgh'), ('', '')]
|
||||||
|
dataF = [('aane', 'anee'), ('ab', 'ca'), ('ane', 'aane')]
|
||||||
|
|
||||||
|
def test(self):
|
||||||
|
# true check
|
||||||
|
for test_string in self.dataT:
|
||||||
|
s1, s2 = test_string
|
||||||
|
res = check_permutation(s1, s2)
|
||||||
|
self.assertTrue(res)
|
||||||
|
# false check
|
||||||
|
for test_string in self.dataF:
|
||||||
|
s1, s2 = test_string
|
||||||
|
res = check_permutation(s1, s2)
|
||||||
|
self.assertFalse(res)
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
unittest.main()
|
||||||
Loading…
Reference in New Issue