/*
 * Model Lucene search program implementation
 * Created on 2011-12-21, modified on 2012-03-04 
 * Jouni Tuominen <jouni.tuominen@aalto.fi>
 * Matias Frosterus <matias.frosterus@aalto.fi>
 */
package ir_course;

import java.io.IOException;
import java.util.Collections;
import java.util.LinkedList;
import java.util.List;

import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.DateTools;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.TermRangeQuery;
import org.apache.lucene.store.LockObtainFailedException;
import org.apache.lucene.store.RAMDirectory;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.Version;

public class LuceneSearchApp {
	
	private RAMDirectory dir;
	
	public LuceneSearchApp() {
		this.dir = new RAMDirectory();
	}
	
	public void index(List<RssFeedDocument> docs) {
		
		try {
			IndexWriter writer = new IndexWriter(this.dir, new IndexWriterConfig(Version.LUCENE_40, new StandardAnalyzer(Version.LUCENE_40)));
			for (RssFeedDocument rssFeedDoc : docs) {
			    Document doc = new Document();
			    doc.add(new Field("title", rssFeedDoc.getTitle(), TextField.TYPE_STORED));
			    doc.add(new Field("description", rssFeedDoc.getDescription(), TextField.TYPE_STORED));
			    // date is handled as a String, other alternative would be a numeric value
			    doc.add(new Field("pubDate", DateTools.dateToString(rssFeedDoc.getPubDate(), DateTools.Resolution.DAY), TextField.TYPE_UNSTORED));
			    
				writer.addDocument(doc);
			}
			writer.close();
		} catch (CorruptIndexException e) {
			e.printStackTrace();
		} catch (LockObtainFailedException e) {
			e.printStackTrace();
		} catch (IOException e) {
			e.printStackTrace();
		}
	}
	
	public List<String> search(List<String> inTitle, List<String> notInTitle, List<String> inDescription, List<String> notInDescription, String startDate, String endDate) {
		
		printQuery(inTitle, notInTitle, inDescription, notInDescription, startDate, endDate);
		
		try {
			IndexReader reader = IndexReader.open(this.dir);
			IndexSearcher searcher = new IndexSearcher(reader);
			BooleanQuery bq = new BooleanQuery();
			
			if (inTitle != null)
				for (String title : inTitle)
					bq.add(new TermQuery(new Term("title", title)), BooleanClause.Occur.MUST);
			if (notInTitle != null)
				for (String notTitle : notInTitle)
					bq.add(new TermQuery(new Term("title", notTitle)), BooleanClause.Occur.MUST_NOT);
			if (inDescription != null)
				for (String description : inDescription)
					bq.add(new TermQuery(new Term("description", description)), BooleanClause.Occur.MUST);
			if (notInDescription != null)
				for (String notDescription : notInDescription)
					bq.add(new TermQuery(new Term("description", notDescription)), BooleanClause.Occur.MUST_NOT);
			if (startDate != null || endDate != null) {
				// Lucene 4.0 uses BytesRef class to represent the boundaries in non-numeric range queries
				BytesRef startDateBytesRef = null;
				BytesRef endDateBytesRef = null;
				
				if (startDate != null) {
					startDate = startDate.replaceAll("-", "");
					startDateBytesRef = new BytesRef(startDate);
				}
				
				if (endDate != null) {
					endDate = endDate.replaceAll("-", "");
					endDateBytesRef = new BytesRef(endDate);
				}
				
				// for an open boundary the BytesRef should be null   
				bq.add(new TermRangeQuery("pubDate", startDateBytesRef, endDateBytesRef, true, true), BooleanClause.Occur.MUST);
			}
			
			LinkedList<String> titles = new LinkedList<String>();
			
			for (ScoreDoc scoreDoc : searcher.search(bq, 100).scoreDocs) {
				titles.add(searcher.doc(scoreDoc.doc).get("title"));
			}
			
			return titles;
				
		} catch (CorruptIndexException e) {
			e.printStackTrace();
		} catch (IOException e) {
			e.printStackTrace();
		}
		
		return null;
	}
	
	public void printQuery(List<String> inTitle, List<String> notInTitle, List<String> inDescription, List<String> notInDescription, String startDate, String endDate) {
		System.out.print("Search (");
		if (inTitle != null) {
			System.out.print("in title: "+inTitle);
			if (notInTitle != null || inDescription != null || notInDescription != null || startDate != null || endDate != null)
				System.out.print("; ");
		}
		if (notInTitle != null) {
			System.out.print("not in title: "+notInTitle);
			if (inDescription != null || notInDescription != null || startDate != null || endDate != null)
				System.out.print("; ");
		}
		if (inDescription != null) {
			System.out.print("in description: "+inDescription);
			if (notInDescription != null || startDate != null || endDate != null)
				System.out.print("; ");
		}
		if (notInDescription != null) {
			System.out.print("not in description: "+notInDescription);
			if (startDate != null || endDate != null)
				System.out.print("; ");
		}
		if (startDate != null) {
			System.out.print("startDate: "+startDate);
			if (endDate != null)
				System.out.print("; ");
		}
		if (endDate != null)
			System.out.print("endDate: "+endDate);
		System.out.println("):");
	}
	
	public void printResults(List<String> results) {
		if (results.size() > 0) {
			Collections.sort(results);
			for (int i=0; i<results.size(); i++)
				System.out.println(" " + (i+1) + ". " + results.get(i));
		}
		else
			System.out.println(" no results");
	}
	
	public static void main(String[] args) {
		if (args.length > 0) {
			LuceneSearchApp engine = new LuceneSearchApp();
			
			RssFeedParser parser = new RssFeedParser();
			parser.parse(args[0]);
			List<RssFeedDocument> docs = parser.getDocuments();
			
			engine.index(docs);

			List<String> inTitle;
			List<String> notInTitle;
			List<String> inDescription;
			List<String> notInDescription;
			List<String> results;
			
			// 1) search documents with words "kim" and "korea" in the title
			inTitle = new LinkedList<String>();
			inTitle.add("kim");
			inTitle.add("korea");
			results = engine.search(inTitle, null, null, null, null, null);
			engine.printResults(results);
			
			// 2) search documents with word "kim" in the title and no word "korea" in the description
			inTitle = new LinkedList<String>();
			notInDescription = new LinkedList<String>();
			inTitle.add("kim");
			notInDescription.add("korea");
			results = engine.search(inTitle, null, null, notInDescription, null, null);
			engine.printResults(results);

			// 3) search documents with word "us" in the title, no word "dawn" in the title and word "" and "" in the description
			inTitle = new LinkedList<String>();
			inTitle.add("us");
			notInTitle = new LinkedList<String>();
			notInTitle.add("dawn");
			inDescription = new LinkedList<String>();
			inDescription.add("american");
			inDescription.add("confession");
			results = engine.search(inTitle, notInTitle, inDescription, null, null, null);
			engine.printResults(results);
			
			// 4) search documents whose publication date is 2011-12-18
			results = engine.search(null, null, null, null, "2011-12-18", "2011-12-18");
			engine.printResults(results);
			
			// 5) search documents with word "video" in the title whose publication date is 2000-01-01 or later
			inTitle = new LinkedList<String>();
			inTitle.add("video");
			results = engine.search(inTitle, null, null, null, "2000-01-01", null);
			engine.printResults(results);
			
			// 6) search documents with no word "canada" or "iraq" or "israel" in the description whose publication date is 2011-12-18 or earlier
			notInDescription = new LinkedList<String>();
			notInDescription.add("canada");
			notInDescription.add("iraq");
			notInDescription.add("israel");
			results = engine.search(null, null, null, notInDescription, null, "2011-12-18");
			engine.printResults(results);
		}
		else
			System.out.println("ERROR: the path of a RSS Feed file has to be passed as a command line argument.");
	}
}