<?php


function get_rss() {
    global $refilter_filtered_cache_dir, $refilter_filtered_cache_lifetime, $refilter_cache_dir, $refilter_cache_lifetime, $refilter_mime, $refilter_error, $refilter_inc;

    // Ideology aside, there's no practical reason to serve as anything else,
    // but reasons (i.e., Firefox) for doing it this way.
    header('X-Fuckoff: WTF?');
    header('Cache-Control: no-cache');
    header('Expires: -1');
    header('Content-Type: text/plain; charset=UTF-8');

    // Use request uri for (filtered) cache id
    $fid = $_SERVER['REQUEST_URI'];

    // ... and the feed URL for the unfiltered
    $uid = $_GET['feed'];


    // Create a Cache_Lite object
    $options = array(
        'cacheDir' => $refilter_filtered_cache_dir,
        'lifeTime' => $refilter_filtered_cache_lifetime
        );
    $filtered_cache = new Cache_Lite($options);
    // Test if thereis a valide cache for this id
    if ($cached = $filtered_cache->get($fid)) {

        // Accept conditional GET, save ze bandwidth
        doConditionalGet($filtered_cache->lastModified());
        echo $cached;
    } else {
        $options = array(
            'cacheDir' => $refilter_cache_dir,
            'lifeTime' => $refilter_cache_lifetime
            );
        $unfiltered_cache = new Cache_Lite($options);
        $data = $unfiltered_cache->get($uid);

        if (!$data) {
            $data = fetch($uid);
            $unfiltered_cache->save($data);
        }
        // Error pages are still cached, but we want to return a server error anyway
        // so that the error messages (probably) aren't added to feed readers. Error
        // pages are served as RSS2 just in case some aggregator ignores the status
        // code.
        if (strstr($data, '<!--ReFilter Error-->')) {
            header('HTTP/1.0 500 Internal Server Error');
            $refilter_error = true;
        }

        if (!$refilter_error) {
            // only include if needed
            require_once( $refilter_inc . 'ReFilter.php');
            $f = new ReFilter();
            $filterstring = $_GET['filter'];
            //$filterstring = urldecode($_GET['filter']);
            echo $filterstring;
            // limit the length just in case
            $filterstring = substr($filterstring, 0, 300);
            $f->set_filter($filterstring);
            $data = $f->filter_rss($data);

            // seekrit undocumented adblocker. Shhh!
            if ($pattern = $_GET['block']) {
                $data = cutout($data, urldecode($pattern));
            }
        }
        $filtered_cache->save($data);
        return $data;
    }
}


function fetch($url) {
    $ch = curl_init();
    curl_setopt ($ch, CURLOPT_URL, $url);
    curl_setopt ($ch, CURLOPT_USERAGENT, 'ReFilter 0.9/20080405 (+http://re.rephrase.net/filter/)');
    curl_setopt ($ch, CURLOPT_HEADER, TRUE);
    curl_setopt ($ch, CURLOPT_RETURNTRANSFER, TRUE);
    curl_setopt ($ch, CURLOPT_FOLLOWLOCATION, TRUE);
    curl_setopt ($ch, CURLOPT_MAXREDIRS, 3);
    curl_setopt ($ch, CURLOPT_TIMEOUT, 5);

    $response = curl_exec ($ch);
    $error = curl_error($ch);

    $result = array( 'header' => '',
                     'body' => '',
                     'curl_error' => '',
                     'http_code' => '',
                     'last_url' => '');

    if ( trim($error) != "" ) {
        $result['curl_error'] = $error;
        return refilter_error('snoopy_error', $error, $url);
        return $result;
    }
    $header_size = curl_getinfo($ch, CURLINFO_HEADER_SIZE);
    $result['header'] = substr($response, 0, $header_size);
    $result['body'] = substr( $response, $header_size );
    $result['http_code'] = curl_getinfo($ch, CURLINFO_HTTP_CODE);
    $result['last_url'] = curl_getinfo($ch, CURLINFO_EFFECTIVE_URL);

    curl_close ($ch);

    if ($result['http_code'] != 200) return refilter_error('status', $result['http_code'], $url);

    /*
    // Keep this around for a while in case it turns out to be needed after all.
    //
    // Spring error if retrieved page isn't XML (or, at least, served as XML)
    foreach ($snoopy->headers as $header) {
        if (preg_match("#Content-Type: (.*?/[^\s;]*)#i", $header, $matches)) {
            $ct = $matches[1];
            if (!strstr($matches[1], 'xml')) {
                return refilter_error('content-type', $matches[1], $url);
            }
        }
    }
    */

    return $result['body'];
}


// Expects a string with * wildcards, turns it into a regex.
// Leaves '.' alone; turns '*' into '.*?'. Can't allow full
// regular expression input since you can craft one to crash PHP.
function cutout($str, $pattern) {
    $pattern = '/' . preg_quote($pattern, '/') . '/s';
    $pattern = str_replace( array('\.', '\*'), array('.', '.*?'), $pattern);
    return preg_replace($pattern, '', $str);
}

// Return errors if the feed can't be retrieved.
// The user-agent is most likely to be a feed reader, and most
// will respond intelligently to the HTTP 500 Internal Server Error;
// for those that don't we return an RSS2 feed with error information.
// It's transformed with XSL for actual users with the misfortune to see it.
function refilter_error($error, $more, $url) {
    global $refilter_error;
    header('HTTP/1.1 500 Internal Server Error');

    $refilter_error = true;
    $url = htmlentities($url);
    switch ($error) {
        case 'content-type':
        $error = 'Wrong Content Type';
        $description = "ReFilter expects an XML feed, but found a document of type ". htmlentities(trim($more)) .". Please make sure the feed URL is correct.";
        break;
        case 'status':
        $error = 'Feed Retrieval Failed';
        $description = "ReFilter was unable to retrieve the feed. The remote server responded with error code ". htmlentities($more).". Please make sure the feed URL is correct.";
        break;
        case 'snoopy_error':
        $error = 'Feed Retrieval Failed';
        $description = "ReFilter was unable to retrieve the feed. The HTTP client said: ". htmlentities($more);
        break;
    }

$xml = <<<XML
<?xml version="1.0"?>
<!--ReFilter Error-->
<?xml-stylesheet href="./error.xsl" type="text/xsl" media="screen"?>

<rss version="2.0">
    <channel>
        <title>ReFilter Error</title>
        <link>http://re.rephrase.net/filter/</link>
        <description>ReFilter error message.</description>
        <item>
            <link>$url</link>
            <title>$error</title>
            <description>$description</description>
        </item>
    </channel>
</rss>

XML;

    return $xml;
}

//
// see: http://simon.incutio.com/archive/2003/04/23/conditionalGet
//
function doConditionalGet($timestamp) {
    // A PHP implementation of conditional get, see
    //   http://fishbowl.pastiche.org/archives/001132.html
    $last_modified = substr(date('r', $timestamp), 0, -5).'GMT';
    $etag = '"'.md5($last_modified).'"';
    // Send the headers
    header("Last-Modified: $last_modified");
    header("ETag: $etag");
    // See if the client has provided the required headers
    $if_modified_since = isset($_SERVER['HTTP_IF_MODIFIED_SINCE']) ?
        stripslashes($_SERVER['HTTP_IF_MODIFIED_SINCE']) :
        false;
    $if_none_match = isset($_SERVER['HTTP_IF_NONE_MATCH']) ?
        stripslashes($_SERVER['HTTP_IF_NONE_MATCH']) :
        false;
    if (!$if_modified_since && !$if_none_match) {
        return;
    }
    // At least one of the headers is there - check them
    if ($if_none_match && $if_none_match != $etag) {
        return; // etag is there but doesn't match
    }
    if ($if_modified_since && $if_modified_since != $last_modified) {
        return; // if-modified-since is there but doesn't match
    }
    // Nothing has changed since their last request - serve a 304 and exit
    header('HTTP/1.0 304 Not Modified');
    exit;
}

?>