Let's write a function that compares two lists to see if they are the same
def lists_equal(L1, L2):
'''Return True iff L1 and L2 have the same contents
Arguments:
L1, L2 -- lists of integers
'''
#Need to check whether the two lists are the same length first,
#since otherwise we'll get an out-of-range error in the for-loop
if len(L1) != len(L2):
return False
for i in range(len(L1)):
if L1[i] != L2[i]:
return False
return True
We store patient data in two lists of the same length. For example:
sex_data = ["m", "f", "f", "m", "m", "f"]
ward_data = [ 1, 3, 2, 2, 1, 2]
length_of_stay_data = [ 10, 5, 7, 2, 3, 4]
Write a function that computes the average length of stay in the hospital for a given sex, in a given ward. The function signature is
def avg_stay(sex, ward, sex_data, ward_data, length_of_stay_data):
'''Return the average length of stay for patients of sex sex in ward ward
Arguments:
sex_data -- a list containing N strings that correspond to patients' sex ("m" or "f")
ward_data -- a list of N ints that correspond to patients' ward
length_of_stay_data -- a list of N ints that correspond to patients' legnths of
stay
'''
def avg_stay(sex, ward, sex_data, ward_data, length_of_stay_data):
'''Return the average length of stay for patients of sex sex in ward ward
Arguments:
sex_data -- a list containing N strings that correspond to patients' sex ("m" or "f")
ward_data -- a list of N ints that correspond to patients' ward
length_of_stay_data -- a list of N ints that correspond to patients' legnths of
stay
'''
s = 0
count = 0
for i in range(len(length_of_stay_data)):
if sex_data[i] == sex and ward_data[i] == ward:
s += length_of_stay_data[i]
count += 1
return s/count
Write a function that returns True iff string s1
starts with string s2
. For example,
starts_with("abc", "ab")
should return True
starts_with("ad", "ab")
should return False
def starts_with(s1, s2):
'''Return True iff the string s1 starts with the string s2
Arguments:
s1, s2 -- strings
'''
if len(s2) > len(s1):
return False
for i in range(len(s2)):
if s1[i] != s2[i]:
return False
return True
Recall that you can estimate the extend to which the data is spread by computing the standard deviation of the data.
The standard deviation of can be estimated as . Write a function to compute the standard deviation of the length of stay in a given ward.
def avg_stay_ward(ward, ward_data, length_of_stay_data):
'''Return the average length of stay in ward ward
Arguments:
ward_data -- a list of N ints that correspond to patients' ward
length_of_stay_data -- a list of N ints that correspond to patients' legnths of
stay
'''
s = 0
count = 0
for i in range(len(length_of_stay_data)):
if ward[i] == ward:
s += length_of_stay
count += 1
return s/count
def sd_stay(ward, ward_data, length_of_stay_data):
'''Return the estimate of the sd of the length of stay for patients in ward ward
Arguments:
ward_data -- a list of N ints that correspond to patients' ward
length_of_stay_data -- a list of N ints that correspond to patients' legnths of
stay
'''
s_sq_diff = 0
count = 0
avg_stay = avg_stay_ward(ward, ward_data, length_of_stay_data)
for i in range(len(length_of_stay_data)):
if ward_data[i] == ward:
s_sq_diff += (ward_data[i] - avg_stay)**2
count += 1
return (s_sq_diff/count)**.5
Write a function that returns True
iff a DNA subsequence matches a DNA sequences somewhere. Both the subsequence and the sequence are represented as strings. For example, if
seq = "CGGGGAATAGCCCCC"
subseq = "AATA",
then match_subseq(seq, subseq)
should return True
since you can match subseq
to seq
, but if
seq = "CGGGTCGGGCGC"
subseq = "AAA"
then match_subseq(seq, subseq)
should return False
.
Hint: think of what a useful helper function would be that's similar to what we already wrote.
def match_subseq_to_subseq(seq, subseq, start_i):
'''Return True iff the subsequence of seq that starts at start_i
and is of the same length as subseq is equal to subseq
Arugments:
start_i -- the starting index in seq, an integer
seq, subseq -- two strings. seq is no shorter than start_i+len(subseq)
'''
for i in range(len(subseq)):
if subseq[i] != seq[start_i + i]:
return False
return True
def match_subseq(seq, subseq):
'''Return True iff the subsequence seq matches a the sequence seq
at some index
Arguments:
seq, subseq -- sequences of DNA bases, represented as a string consisting
of the characters "A", "T", "G", "C"
'''
for i in range(len(seq)-len(subseq)):
if match_subseq_to_subseq(seq, subseq, i):
return True
return False