Unable to query local version of Linked Movie Database_问答_开发者

I am trying to query a local version of Linked Movie Database using SPARQL. The file is开发者_运维问答 in N-Triples format and its size is approximately 450mb. I am using servlets for implementation. Now when I pass the query, it takes about more than five minutes for the servlet to process it and at the end I get the following exception:

type Exception report

message 

description The server encountered an internal error () that prevented it from fulfilling this request.

exception 

javax.servlet.ServletException: Servlet execution threw an exception


root cause 

java.lang.OutOfMemoryError: Java heap space
    java.util.Arrays.copyOfRange(Arrays.java:3209)
    java.lang.String.<init>(String.java:215)
    java.lang.StringBuilder.toString(StringBuilder.java:430)
    org.openjena.riot.tokens.TokenizerText.allBetween(TokenizerText.java:732)
    org.openjena.riot.tokens.TokenizerText.parseToken(TokenizerText.java:152)
    org.openjena.riot.tokens.TokenizerText.hasNext(TokenizerText.java:69)
    org.openjena.atlas.iterator.PeekIterator.fill(PeekIterator.java:37)
    org.openjena.atlas.iterator.PeekIterator.next(PeekIterator.java:77)
    org.openjena.riot.lang.LangBase.nextToken(LangBase.java:145)
    org.openjena.riot.lang.LangNTriples.parseOne(LangNTriples.java:59)
    org.openjena.riot.lang.LangNTriples.parseOne(LangNTriples.java:21)
    org.openjena.riot.lang.LangNTuple.runParser(LangNTuple.java:58)
    org.openjena.riot.lang.LangBase.parse(LangBase.java:75)
    org.openjena.riot.system.JenaReaderNTriples2.readWorker(JenaReaderNTriples2.java:28)
    org.openjena.riot.system.JenaReaderRIOT.readImpl(JenaReaderRIOT.java:124)
    org.openjena.riot.system.JenaReaderRIOT.read(JenaReaderRIOT.java:79)
    com.hp.hpl.jena.rdf.model.impl.ModelCom.read(ModelCom.java:226)
    com.hp.hpl.jena.util.FileManager.readModelWorker(FileManager.java:395)
    com.hp.hpl.jena.util.FileManager.loadModelWorker(FileManager.java:299)
    com.hp.hpl.jena.util.FileManager.loadModel(FileManager.java:250)
    ServletExample.runQuery(ServletExample.java:92)
    ServletExample.doGet(ServletExample.java:62)
    javax.servlet.http.HttpServlet.service(HttpServlet.java:627)
    javax.servlet.http.HttpServlet.service(HttpServlet.java:729)


note The full stack trace of the root cause is available in the Apache Tomcat/5.5.31 logs.

My code is:

import java.io.IOException;
import java.io.PrintWriter;

import javax.servlet.ServletException;
import javax.servlet.http.*;

import com.hp.hpl.jena.query.*;
import com.hp.hpl.jena.rdf.model.*;
import com.hp.hpl.jena.util.FileManager;

public class ServletExample
    extends HttpServlet
{
    /***********************************/
    /* Constants                       */
    /***********************************/

    private static final long serialVersionUID = 1L;

    public static final String SPARQL_ENDPOINT = "http://data.linkedmdb.org/sparql";

    public static final String QUERY ="PREFIX m: <http://data.linkedmdb.org/resource/movie/>"
+"SELECT DISTINCT ?actorName WHERE {"+
"?dir1 m:director_name \"Sofia Coppola\"."+
"?dir2 m:director_name \"Francis Ford Coppola\"."+
"?dir1film m:director ?dir1;"+
"m:actor ?actor."+
"?dir2film m:director ?dir2;"+
"m:actor ?actor."+
"?actor m:actor_name ?actorName."+
"}";
        /*"PREFIX m: <http://data.linkedmdb.org/resource/movie/>\n" +
            "SELECT DISTINCT ?actorName WHERE {\n" +
            "  ?dir1     m:director_name %dir_name_1%.\n" +
            "  ?dir2     m:director_name %dir_name_2%.\n" +
            "  ?dir1film m:director ?dir1;\n" +
            "            m:actor ?actor.\n" +
            "  ?dir2film m:director ?dir2;\n" +
            "            m:actor ?actor.\n" +
            "  ?actor    m:actor_name ?actorName.\n" +
            "}\n" +
            "";*/

    private static final String HEADER = "<html>\n" +
            "      <head>\n" +
            "        <title>results</title>\n" +
            "          <link href=\"simple.css\" type=\"text/css\" rel=\"stylesheet\" />\n" +
            "      </head>\n" +
            "      <body>\n" +
            "";

    private static final String FOOTER = "</body></html>";

    /**
     * Respond to HTTP GET request. Will need to be mounted against some URL
     * pattern in web.xml
     */
    @Override
    protected void doGet( HttpServletRequest req, HttpServletResponse resp )
        throws ServletException, IOException
    {
        String dir1 = req.getParameter( "dir1" );//"Sofia";
       String dir2 = req.getParameter( "dir2" );//"Francis Ford Coppola";
        //String dir1 = "Sofia";
        //String dir2 = "Francis Ford Coppola"; 
        if (dir1 == null || dir2 == null || dir1.isEmpty() || dir2.isEmpty()) {
            noInput( resp );
        }
        else {
            runQuery( resp, dir1, dir2 );
        }
    }

    protected void noInput( HttpServletResponse resp )
        throws IOException
    {
        header( resp );
        resp.getWriter().println( "<p>Please select director names as query params <code>dir1</code> and <code>dir2</code></p>" );
        footer( resp );
    }

    protected void footer( HttpServletResponse resp ) throws IOException {
        resp.getWriter().println( FOOTER );
    }

    protected void header( HttpServletResponse resp ) throws IOException {
        resp.getWriter().println( HEADER );
    }

    protected void runQuery( HttpServletResponse resp, String dir1, String dir2 )
        throws IOException
    {
        PrintWriter out = resp.getWriter();

        // Set up the query
   //     String q = QUERY.replace( "%dir_name_1%", "\"" + dir1 + "\"" )
     //                   .replace( "%dir_name_2%", "\"" + dir2 + "\"" );
        String q=QUERY;
        Query query = QueryFactory.create( q ) ;
        Model model = FileManager.get().loadModel( "e:\\applications\\linkedmdb-18-05-2009-dump\\dump.nt" );
       // QueryExecution qexec = QueryExecutionFactory.sparqlService( SPARQL_ENDPOINT, query );

       //com.hp.hpl.jena.query.Query query = QueryFactory.create(QUERY);
      QueryExecution qexec = QueryExecutionFactory.create(query, model);
        // perform the query
        ResultSet results = qexec.execSelect();

        // generate the output
        header( resp );
        if (!results.hasNext()) {
            out.println( "<p>No results, sorry.</p>" );
        }
        else {
            out.println( "<h1>Results</h1>" );
            while (results.hasNext()) {
                QuerySolution qs = results.next();
                String actorName = qs.getLiteral( "actorName" ).getLexicalForm();
                out.println( String.format( "<div>Actor named: %s</div>", actorName ) );
            }
        }
        footer( resp );
    }
}

Is there any way to resolve this exception?

It seems you're loading all your data in memory using Jena/RIOT. As far as I know, LinkedIMDB is large enough to give you problems with this approach. What you're doing is bringing all your database to memory.

Increasing the heap of your JVM could be one possible solution but it won't scale if your data keeps growing.

The right solution is to go for other configurations of Jena that are designed for this size of datasets. These are:

Jena SDB, which uses relational databases as backend.
Jena TDB, which uses a native Java storage based on B-trees indexes to speed up queries. It scales better than (1).

Optionally you could go for scalable RDF databases such 4store and query your data via Jena ARQ. This solution is by far the one that will scale and perform better.

You are running out of heap memory in Java Virtual Machine (JVM). Either increase amount of heap memory that is available to JVM or design your software to use less memory, for example process the stuff in smaller chunks.

To increase heap memory, add these parameters to your servlet container's or application server's startup script, somewhere your java binary is executed. This tells JVM that it may use up to 512 megabytes of memory, if that is not enough, try with larger values: