/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 * 
 *      http://www.apache.org/licenses/LICENSE-2.0
 * 
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.portals.applications.webcontent.proxy.impl;

import java.util.HashMap;
import java.util.HashSet;
import java.util.Map;
import java.util.Set;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import org.apache.commons.lang.StringUtils;
import org.apache.portals.applications.webcontent.proxy.HttpReverseProxyPathMapper;
import org.apache.portals.applications.webcontent.util.CharArraySegment;

/**
 * A simple reverse proxy link rewriting parser adaptor implementation.
 * <P>
 * You can refer to this example to implement more sophisticated rewriting parser adaptor.
 * </P>
 * 
 * @version $Id: DefaultReverseProxyLinkRewritingParserAdaptor.java 1372685 2012-08-14 00:26:42Z woonsan $
 */
public class DefaultReverseProxyLinkRewritingParserAdaptor extends AbstractReverseProxyTextLinesParserAdaptor
{
    
    protected static final Pattern LINK_ABS_PATH_PATTERN = 
        Pattern.compile("(\\s|^)(href|src|action)\\s*=\\s*(['\"])((\\/)[^'\"]*)['\"]", Pattern.CASE_INSENSITIVE);
    
    protected static final Pattern LINK_HTTP_ABS_URL_PATTERN = 
        Pattern.compile("(\\s|^)(href|src|action)\\s*=\\s*['\"](https?:\\/\\/[^'\"]+)['\"]", Pattern.CASE_INSENSITIVE);
    
    protected static final Pattern HTTP_DOMAIN_ADDRESS_ONLY_PATTERN = 
        Pattern.compile("^https?:\\/\\/[^\\/]+$", Pattern.CASE_INSENSITIVE);
    
    protected static final Pattern IMPORT_URL_PATTERN =
        Pattern.compile("(\\s|^)(url)\\s*\\(", Pattern.CASE_INSENSITIVE);
    
    protected static final Pattern IMPORT_URL_PATH_PATTERN = 
        Pattern.compile("(\\s|^)(url)\\s*\\(\\s*(['\"])((\\/)[^'\"]*)['\"]\\)", Pattern.CASE_INSENSITIVE);
    
    protected static final Pattern IMPORT_URL_ABS_URL_PATTERN = 
        Pattern.compile("(\\s|^)(url)\\s*\\(\\s*(['\"])(https?:\\/\\/[^'\"]+)['\"]\\)", Pattern.CASE_INSENSITIVE);
    
    protected boolean lookUpAllMappings;
    
    protected String localPathMatchingReplaces;
    
    protected String [] linkRemoteBaseURLSearches = { "/", "." };
    protected String [] linkRemoteBaseURLReplaces = { "\\/", "\\." };
    
    private Pattern defaultRemoteURLPattern;
    private String defaultRemoteURLReplaces;
    
    private Set<String> blacklist = new HashSet<String>();
    
    private Map<HttpReverseProxyPathMapper, Pattern> remoteURLMatchingPatternMap = new HashMap<HttpReverseProxyPathMapper, Pattern>();
    private Map<HttpReverseProxyPathMapper, Pattern> remoteImportURLMatchingPatternMap = new HashMap<HttpReverseProxyPathMapper, Pattern>();
    private Map<HttpReverseProxyPathMapper, String> localPathMatchingReplacesMap = new HashMap<HttpReverseProxyPathMapper, String>();
    
    private Pattern [] customPatterns;
    private String [] customReplaces;
    
    public DefaultReverseProxyLinkRewritingParserAdaptor()
    {
        this(true, null);
    }
    
    public DefaultReverseProxyLinkRewritingParserAdaptor(boolean lookUpAllMappings, Set<String> blacklist)
    {
        setLookUpAllMappings(lookUpAllMappings);
        setBlacklist(blacklist);
    }
    
    public void setLookUpAllMappings(boolean lookUpAllMappings)
    {
        this.lookUpAllMappings = lookUpAllMappings;
    }
    
    public boolean getLookUpAllMappings()
    {
        return lookUpAllMappings;
    }
    
    public void setBlacklist(Set<String> blacklist)
    {
        this.blacklist = blacklist;
    }
    
    public void setCustomPatterns(String [] patterns)
    {
        customPatterns = new Pattern[patterns.length];
        
        for (int i = 0; i < patterns.length; i++)
        {
            customPatterns[i] = Pattern.compile(patterns[i], Pattern.CASE_INSENSITIVE);
        }
    }
    
    public void setCustomReplaces(String [] customReplaces)
    {
        this.customReplaces = customReplaces;
    }
    
    @Override
    protected String rewriteLine(String line) throws Exception
    {
        if (defaultRemoteURLPattern == null)
        {
            defaultRemoteURLPattern = createRemoteURLMatchingPattern(getHttpReverseProxyPathMapper());
            defaultRemoteURLReplaces = createLocalPathMatchingReplaces(getHttpReverseProxyPathMapper());
        }
        
        if (IMPORT_URL_PATTERN.matcher(line).find()) {
            line = rewriteImportURLs(line);
        }
        
        return rewriteLinks(line);
    }
    
    protected String rewriteImportURLs(String line) throws Exception
    {
        // first, replace slash leading relative paths by slash leading reverse proxying relative paths.
        Matcher linkBasePathMatcher = IMPORT_URL_PATH_PATTERN.matcher(line);
        line = linkBasePathMatcher.replaceAll("$1$2($3" + defaultRemoteURLReplaces + "$4$3)");
        
        // if there's any https? absolute url link, try to find the proxy path mapper again...
        if (lookUpAllMappings)
        {
            CharSequence segment = new CharArraySegment(line);
            
            for (Matcher absURLMatcher = IMPORT_URL_ABS_URL_PATTERN.matcher(segment); absURLMatcher.find(); )
            {
                HttpReverseProxyPathMapper proxyMapper = null;
                String absURL = absURLMatcher.group(4);
                int maxMatchingPathPartCount = getMaxMatchingPathPartCount();
                String [] pathParts = StringUtils.split(absURL, "/", maxMatchingPathPartCount + 2);
                int pathPartCount = (pathParts != null ? pathParts.length : 0);
                
                if (pathPartCount < 2)
                {
                    continue;
                }
                
                String scheme = pathParts[0];
                
                for (int i = Math.min(pathPartCount, maxMatchingPathPartCount + 1); i > 1; i--)
                {
                    String remoteBaseURLKey = scheme + "//" + StringUtils.join(pathParts, "/", 1, i);
                    
                    if (blacklist != null && blacklist.contains(remoteBaseURLKey))
                    {
                        continue;
                    }

                    proxyMapper = getHttpReverseProxyPathMapperProvider().findMapperByRemoteURL(remoteBaseURLKey + "/");
                    
                    if (proxyMapper == null)
                    {
                        blacklist.add(remoteBaseURLKey);
                    }
                    else
                    {
                        Pattern pattern = remoteImportURLMatchingPatternMap.get(proxyMapper);
                        if (pattern == null)
                        {
                            pattern = createRemoteImportURLMatchingPattern(proxyMapper);
                            remoteImportURLMatchingPatternMap.put(proxyMapper, pattern);
                        }
                        
                        String replaces = localPathMatchingReplacesMap.get(proxyMapper);
                        if (replaces == null)
                        {
                            replaces = createLocalPathMatchingReplaces(proxyMapper);
                            localPathMatchingReplacesMap.put(proxyMapper, replaces);
                        }
                        
                        Matcher matcher = pattern.matcher(line);
                        line = replaceRemoteImportLinkValues(matcher, "$1$2($3" + replaces + "$6$3)", line);
                        
                        break;
                    }
                }
                
                segment = segment.subSequence(absURLMatcher.end(), segment.length());
                absURLMatcher.reset(segment);
            }
        }
        else
        {
            Matcher matcher = defaultRemoteURLPattern.matcher(line);
            line = matcher.replaceAll("$1$2($3" + defaultRemoteURLReplaces + "$6$3)");
        }
        
        if (customPatterns != null)
        {
            for (int i = 0; i < customPatterns.length; i++)
            {
                Matcher matcher = customPatterns[i].matcher(line);
                line = matcher.replaceAll(customReplaces[i]);
            }
        }
        
        return line;
    }
    
    protected String rewriteLinks(String line) throws Exception
    {
        // first, replace slash leading relative paths by slash leading reverse proxying relative paths.
        Matcher linkBasePathMatcher = LINK_ABS_PATH_PATTERN.matcher(line);
        line = linkBasePathMatcher.replaceAll("$1$2=$3" + defaultRemoteURLReplaces + "$4$3");
        
        // if there's any https? absolute url link, try to find the proxy path mapper again...
        if (lookUpAllMappings)
        {
            CharSequence segment = new CharArraySegment(line);
            
            for (Matcher absURLMatcher = LINK_HTTP_ABS_URL_PATTERN.matcher(segment); absURLMatcher.find(); )
            {
                HttpReverseProxyPathMapper proxyMapper = null;
                String absURL = absURLMatcher.group(3);
                int maxMatchingPathPartCount = getMaxMatchingPathPartCount();
                String [] pathParts = StringUtils.split(absURL, "/", maxMatchingPathPartCount + 2);
                int pathPartCount = (pathParts != null ? pathParts.length : 0);
                
                if (pathPartCount < 2)
                {
                    continue;
                }
                
                String scheme = pathParts[0];
                
                for (int i = Math.min(pathPartCount, maxMatchingPathPartCount + 1); i > 1; i--)
                {
                    String remoteBaseURLKey = scheme + "//" + StringUtils.join(pathParts, "/", 1, i);
                    
                    if (blacklist != null && blacklist.contains(remoteBaseURLKey))
                    {
                        continue;
                    }

                    proxyMapper = getHttpReverseProxyPathMapperProvider().findMapperByRemoteURL(remoteBaseURLKey + "/");
                    
                    if (proxyMapper == null)
                    {
                        blacklist.add(remoteBaseURLKey);
                    }
                    else
                    {
                        Pattern pattern = remoteURLMatchingPatternMap.get(proxyMapper);
                        if (pattern == null)
                        {
                            pattern = createRemoteURLMatchingPattern(proxyMapper);
                            remoteURLMatchingPatternMap.put(proxyMapper, pattern);
                        }
                        
                        String replaces = localPathMatchingReplacesMap.get(proxyMapper);
                        if (replaces == null)
                        {
                            replaces = createLocalPathMatchingReplaces(proxyMapper);
                            localPathMatchingReplacesMap.put(proxyMapper, replaces);
                        }
                        
                        Matcher matcher = pattern.matcher(line);
                        line = replaceRemoteLinkValues(matcher, "$1$2=$3" + replaces + "$6$3", line);
                        
                        break;
                    }
                }
                
                segment = segment.subSequence(absURLMatcher.end(), segment.length());
                absURLMatcher.reset(segment);
            }
        }
        else
        {
            Matcher matcher = defaultRemoteURLPattern.matcher(line);
            line = matcher.replaceAll("$1$2=$3" + defaultRemoteURLReplaces + "$6$3");
        }
        
        if (customPatterns != null)
        {
            for (int i = 0; i < customPatterns.length; i++)
            {
                Matcher matcher = customPatterns[i].matcher(line);
                line = matcher.replaceAll(customReplaces[i]);
            }
        }
        
        return line;
    }
    
    protected Pattern createRemoteURLMatchingPattern(HttpReverseProxyPathMapper proxyMapper)
    {
        String remoteBaseURLPattern = StringUtils.replaceEach(StringUtils.removeEnd(proxyMapper.getRemoteBaseURL(), "/"), linkRemoteBaseURLSearches, linkRemoteBaseURLReplaces);
        return Pattern.compile("(\\s|^)(href|src|action)\\s*=\\s*(['\"])((" + remoteBaseURLPattern + ")([^'\"]*))['\"]", Pattern.CASE_INSENSITIVE);
    }
    
    protected Pattern createRemoteImportURLMatchingPattern(HttpReverseProxyPathMapper proxyMapper)
    {
        String remoteBaseURLPattern = StringUtils.replaceEach(StringUtils.removeEnd(proxyMapper.getRemoteBaseURL(), "/"), linkRemoteBaseURLSearches, linkRemoteBaseURLReplaces);
        return Pattern.compile("(\\s|^)(url)\\s*\\(\\s*(['\"])((" + remoteBaseURLPattern + ")([^'\"]*))['\"]\\)", Pattern.CASE_INSENSITIVE);
    }
    
    protected String createLocalPathMatchingReplaces(HttpReverseProxyPathMapper proxyMapper)
    {
        return getRewritingContextPath() + StringUtils.removeEnd(proxyMapper.getLocalBasePath(), "/");
    }
    
    protected String replaceRemoteLinkValues(Matcher matcher, String replacement, String text)
    {
        matcher.reset();
        boolean result = matcher.find();
        
        if (result) 
        {
            StringBuffer sb = new StringBuffer();
            
            do 
            {
                // if the url is comopsed of scheme and domain name such as "http://projects.apache.org",
                // then the local path info should be appended by '/' like "/webcontent/rproxy/project_apache/".
                
                if (HTTP_DOMAIN_ADDRESS_ONLY_PATTERN.matcher(matcher.group(4)).matches())
                {
                    int offset = replacement.lastIndexOf("$3");
                    matcher.appendReplacement(sb, replacement.substring(0, offset) + "/" + replacement.substring(offset));
                }
                else
                {
                    matcher.appendReplacement(sb, replacement);
                }
                
                result = matcher.find();
            }
            while (result);
            
            matcher.appendTail(sb);
            
            return sb.toString();
        }
        
        return text;
    }
    
    protected String replaceRemoteImportLinkValues(Matcher matcher, String replacement, String text)
    {
        matcher.reset();
        boolean result = matcher.find();
        
        if (result) 
        {
            StringBuffer sb = new StringBuffer();
            
            do 
            {
                // if the url is comopsed of scheme and domain name such as "http://projects.apache.org",
                // then the local path info should be appended by '/' like "/webcontent/rproxy/project_apache/".
                if (HTTP_DOMAIN_ADDRESS_ONLY_PATTERN.matcher(matcher.group(4)).matches())
                {
                    int offset = replacement.lastIndexOf("$3");
                    matcher.appendReplacement(sb, replacement.substring(0, offset) + "/" + replacement.substring(offset));
                }
                else
                {
                    matcher.appendReplacement(sb, replacement);
                }
                
                result = matcher.find();
            }
            while (result);
            
            matcher.appendTail(sb);
            
            return sb.toString();
        }
        
        return text;
    }
}
