#/***************************************************************************
# *   Copyright (C) 2010 by matt@blackcodeseo.com                           *
# *   http://blackcodeseo.com/                                              *
# *                                                                         *
# *   Permission is hereby granted, free of charge, to any person obtaining *
# *   a copy of this software and associated documentation files (the       *
# *   "Software"), to deal in the Software without restriction, including   *
# *   without limitation the rights to use, copy, modify, merge, publish,   *
# *   distribute, sublicense, and/or sell copies of the Software, and to    *
# *   permit persons to whom the Software is furnished to do so, subject to *
# *   the following conditions:                                             *
# *                                                                         *
# *   The above copyright notice and this permission notice shall be        *
# *   included in all copies or substantial portions of the Software.       *
# *                                                                         *
# *   THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,       *
# *   EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF    *
# *   MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.*
# *   IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR     *
# *   OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, *
# *   ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR *
# *   OTHER DEALINGS IN THE SOFTWARE.                                       *
# ***************************************************************************/

from BeautifulSoup import BeautifulSoup
import urllib2
import random

def removeLinks(data):
    soup = BeautifulSoup(data)
    links = soup.findAll('a')
    for link in links:
        data = data.replace(str(link), '')
    return data

def rawTitle(soup):
    title = soup.find('h1', {'class':'post-caption'})
    if title:
        title = title.a.string
    return title

def rawBody(soup):
    body = u''
    ps = soup.findAll('p')
    for p in ps:
        body += unicode(p.string or '')
    return removeLinks(body)

def getPosts(url):
    posts = []
    f = urllib2.urlopen(url)
    data = f.read()
    soup = BeautifulSoup(data)
    f.close()
    blogPosts = soup.findAll('div', {'class':'post'})
    for p in blogPosts:
        title = rawTitle(p)
        body = rawBody(p)
        post = Post()
        post.setTitle(title)
        post.setBody(body)
        posts.append(post)
    return posts

class Post():
    def __init__(self, **kwargs):
        self.setTitle(kwargs.get('title',None))
        self.setBody(kwargs.get('body',None))

    def title(self):
        return self._title
    def setTitle(self, v):
        self._title = v

    def body(self):
        return self._body
    def setBody(self, v):
        self._body = v

    def __str__(self):
        return self._title

class Blog():
    def __init__(self, **kwargs):
        self.setUrl(kwargs.get('url', None))
        self.setPosts(kwargs.get('posts', []))

    def url(self):
        return self._url
    def setUrl(self, v):
        self._url = v

    def posts(self):
        return self._posts
    def setPosts(self, v):
        self._posts = v
    
    def addPost(self, post):
        self._posts.append(post)
    def removePost(self, post):
        self._posts.remove(post)

    def __str__(self):
        return self._url

class Mashup():
    def __init__(self, **kwargs):
        self.setSentencesPerBlogPostBody(kwargs.get('sentencesPerBlogPostBody', 3))
        self.setRandomize(kwargs.get('randomize', False))
        self.setBlogs(kwargs.get('blogs', []))

    def sentencesPerBlogPostBody(self):
        return self._sentencesPerBlogPostBody
    def setSentencesPerBlogPostBody(self, v):
        self._sentencesPerBlogPostBody = v

    def addBlog(self, blog):
        self._blogs.append(blog)
    def removeBlog(self, blog):
        self._blogs.remove(blog)

    def randomize(self):
        return self._randomize
    def setRandomize(self, v):
        self._randomize = v
    
    def blogs(self):
        return self._blogs
    def setBlogs(self, v):
        self._blogs = v

    def createMashup(self):
        mashupData = ''
        for blog in self.blogs():
            for post in blog.posts():
                sentences = post.body().replace('!', '.').replace('?', '.').replace('..', '.').split('.')    
                if len(sentences) >= self.sentencesPerBlogPostBody():
                    mashupData += '.'.join(sentences[:self.sentencesPerBlogPostBody()])
                else:
                    mashupData += '.'.join(sentences)
        if self.randomize():
            sentences = mashupData.split('.')
            random.shuffle(sentences)
            mashupData = '.'.join(sentences)
        return mashupData.encode("iso-8859-15", "replace")

def getMashup(urls):
    blogs = []
    for url in urls:
        blog = Blog()
        blog.setUrl(url)
        blog.setPosts(getPosts(url))
        blogs.append(blog)
    m = Mashup()
    m.setBlogs(blogs)
    return m

if __name__ == '__main__':
    test = False #True
    if test:
        urls = ['http://blackcodeseo.com',
                'http://blog.5ubliminal.com',
                'http://blackhatseo-blog.com',]

        mashup = getMashup(urls)
        mashup.setRandomize(True)
        print mashup.createMashup()
