#!/usr/bin/env python

# PLEASE NOTE: THIS HAS NOT BEEN TESTED (EXTENSIVELY). It seems to work on 
# Ubuntu 8.04 with Mediawiki v. 1.15, but please use at your own risk.
# I have no idea if this works on Windows.


# Goes through the files located in the specified pages_path
# and imports each one into the Mediawiki MySQL database.
# The files that are located in pages_path are expected to be text files 
# which contain the wiki syntax for each page to be imported. 

# If you have pages which are in html syntax, you may find it helpful to use
# the following script which generates wiki syntax from html: 
# http://www.dwheeler.com/html2wikipedia/
# To apply the html2wikipedia script to a large number of pages, you can 
# use the following bash script (note that you must create the directory "pages"
# before you run this script):

# #!/bin/bash
#
# for i in $( find /path/to/htmlpages -name *.htm -type f); do
#	 FILENAME=${i:20}
#	 echo Filename: $FILENAME
#	 html2wikipedia < $i > ./pages/$FILENAME	
# done

# Of course, the script linked to above is in no way related to this 
# import script.

# DEPENDENCIES: a working Mediawiki installation >= 1.15 AND 
#               python >= 2.5 (apt-get install python-2.5) AND
#               python-mysqldb (apt-get install python-mysqldb)



import MySQLdb as mysql_db
import os
from glob import glob
import random
import time

# DATABASE PARAMETERS
database = "mysql"          # This can only be mysql. postgres not supported.
db_host='host_name'         # e.g.: "localhost"
db_name='wiki_name'         # e.g.: "wikidb"
db_user='your_username'     # e.g.: "root"
db_pass='your_password'


# PAGES PARAMETERS

# Note that pages_path is the path to the folder where you have a list of 
# text files (the pages to be imported), PLUS "/*" at the end
pages_path = "/absolute/path/to/your/pages/*" # e.g.: "/usr/local/src/importing/pages/*"

# Most likely you won't need to change the following default options:
page_namespace = 0            # The default Main namespace.
page_restrictions = ""        # No restrictions by default.
page_counter = 0              # Number of visits to the page set to 0.
page_is_redirect = 0          # A regular page, does not redirect.
page_is_new = 1               # Page is marked as new when inserted.
page_flags = "utf-8"          # Page encoding, change as desired.


# REVISION PARAMETERS
rev_comment = "/* Testing */" # Comment added to the revision, change as desired.
rev_user = 1                  # ID of the wiki user responsible for the import.
rev_user_text = "WikiSysop"   # The username of your wiki sysop.
rev_minor_edit = 0            # Not a minor edit.
rev_deleted = 0               



# DO NOT EDIT BELOW THIS LINE *************************************************

# Page-specific
page_title = ""
page_text = ""
page_len = 0
page_random = 0
page_latest = 0

# Establish database connection
db = None
db = mysql_db
conn = db.connect(host=db_host, user=db_user,passwd=db_pass,db=db_name)

cur = conn.cursor()
conn.text_factory = str

for page in glob(pages_path):
    
    page_title = (page[page.rfind("/")+1:]).split(".")[0]
    
    # Get the page content:
    readfile = open(page)
    page_text = readfile.read()
    
    page_len = len(page_text)
    
    page_random = random.random()

    readfile.close()

    # First, insert text into old_text, so as to create an old_id
    # Then, use old_id from text to create rev_id in revision (old_id = rev_text_id)
    # Use rev_id from revision to create page_latest in page
        
    try:
        cur.execute('INSERT INTO text (old_text, old_flags) VALUES ('+`page_text`+','+`page_flags`+')')
        conn.commit()
        print "Inserted: " + `page_title` + " into Table: text"
    except:
        print "Failed to create a new record in Table: text."
    
    # Get the old_id:
    cur.execute('SELECT MAX(old_id) FROM text')
    results = cur.fetchall()
    old_id = int(results[0][0])
    
    # Specific Revision parameters
    # rev_page would be known once the record is inserted into page, but for now: max(page_id)+1
    # Get the max page_id:
    cur.execute('SELECT MAX(page_id) FROM page')
    results = cur.fetchall()
    page_id = int(results[0][0]) + 1
    rev_page = page_id
    
    rev_text_id = old_id
    
    # revision time:
    timeVals = (list(time.localtime()))[:6]
    rev_timestamp = ""
    for x in timeVals:
        x = str(x)
        if len(x)==1: 
            x = "0"+x
        rev_timestamp += x
    
    try:
        cur.execute('INSERT INTO revision (rev_page, rev_text_id, rev_comment, rev_user, rev_user_text, rev_timestamp, rev_minor_edit, rev_deleted) VALUES ('+`rev_page`+','+`rev_text_id`+','+`rev_comment`+','+`rev_user`+','+`rev_user_text`+','+`rev_timestamp`+','+`rev_minor_edit`+','+`rev_deleted`+')')
        conn.commit()
        print "Inserted: " + `page_title` + " into Table: revision"
    except:
        print "Failed to create a new record in Table: revision."
    
    
    # Page parameters
    # page_latest = max(rev_id)
    cur.execute('SELECT MAX(rev_id) FROM revision')
    results = cur.fetchall()
    page_latest = int(results[0][0])
    
    try:
        cur.execute('INSERT INTO page (page_namespace, page_title, page_restrictions, page_counter, page_is_redirect, page_is_new, page_random, page_latest, page_len) VALUES ('+`page_namespace`+','+`page_title`+','+`page_restrictions`+','+`page_counter`+','+`page_is_redirect`+','+`page_is_new`+','+`page_random`+','+`page_latest`+','+`page_len`+')')
        conn.commit()
        print "Inserted: " + `page_title` + " into Table: page"
    except:
        print "Failed to register page in Table: pages."
    

conn.close()
