from WebSearch import *
import random, unittest

# see http://docs.python.org/3.3/library/unittest.html

class TestWebSearch(unittest.TestCase):
	def setUp(self): # run before each test
		self.ws=WebSearch() # yes, you can add attributes as you wish

		# we create some very large documents with 'words' and index them.
		self.num_documents=1000
		self.num_words=100 # maximum number of words in a document
		self.dictionary_size=100 # number of words in our dictionary

		self.index={}
		for doc in range(self.num_documents):
			document_name="document_"+str(doc)
			# print("creating document "+document_name)

			document=""

			# create a document, with all of its words, add its words to our own index
			document_number=self.ws.get_next_document_number() # so we know which document number these words are associated with
			for i in range(random.randrange(self.num_words)):

				# choose words so that some of them do not appear in any document
				word=self.get_word(random.randrange(self.dictionary_size/2)*2+2)
				try:
					self.index[word].add(document_number) # add to our own index
				except KeyError as e:
					self.index[word]=set()
					self.index[word].add(document_number) # add to our own index
					
				document+=word+" "
			# print(document)
			self.ws.load(document, document_name)

	def get_word(self, i):
		return "word"+str(i)

	def search(self, keyword):
		if keyword in self.index:
			return sorted(list(self.index[keyword]))
		else:
			return []
		
	def intersect(self, L):
		''' return the intersection of the lists of documents in L '''
		if L==[]:
			return []
		else:
			s=set(L[0])
			for r in L:
				s=s.intersection(r)
			return sorted(list(s))

	def union(self, L):
		''' return the union of the lists of documents in L '''
		# re-write this code yourself, we are yielding them in order
		s=set()
		for r in L:
			s=s.union(r)
		return sorted(list(s))

	def test_search(self):
		# print("test search")
		for i in range(self.dictionary_size+2):
			word=self.get_word(i)

			search_results_1=list(self.ws.search(word))
			search_results_2=self.search(word)

			# print("testing "+word+" "+str(len(search_results_1))+" "+str(len(search_results_2)))

			self.assertEquals(search_results_1, search_results_2, "search failed on "+word)

	def test_union(self):
		# print("test union")
		query=[self.search("word2"), self.search("word4"), self.search("word6")]
		search_results_1=list(self.union(query))
		# print(search_results_1)

		query=[self.ws.search("word2"), self.ws.search("word4"), self.ws.search("word6")]
		search_results_2=list(self.ws.union(query))
		# print(search_results_2)
		
		self.assertEquals(search_results_1, search_results_2, "union failed")

	def test_intersect(self):
		# print("test intersect")
		query=[self.search("word2"), self.search("word4"), self.search("word6")]
		search_results_1=list(self.intersect(query))
		# print(search_results_1)

		query=[self.ws.search("word2"), self.ws.search("word4"), self.ws.search("word6")]
		search_results_2=list(self.ws.intersect(query))
		# print(search_results_2)
		
		self.assertEquals(search_results_1, search_results_2, "intersect failed")

def WebSearch_suite():
	return unittest.TestLoader().loadTestsFromTestCase(TestWebSearch)

if __name__=='__main__':
	runner = unittest.TextTestRunner()
	runner.run(WebSearch_suite())

