Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
40 changes: 40 additions & 0 deletions http-tests/document-hierarchy/POST-html-jsonld.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
#!/usr/bin/env bash
set -euo pipefail

initialize_dataset "$END_USER_BASE_URL" "$TMP_END_USER_DATASET" "$END_USER_ENDPOINT_URL"
initialize_dataset "$ADMIN_BASE_URL" "$TMP_ADMIN_DATASET" "$ADMIN_ENDPOINT_URL"
purge_cache "$END_USER_VARNISH_SERVICE"
purge_cache "$ADMIN_VARNISH_SERVICE"
purge_cache "$FRONTEND_VARNISH_SERVICE"

# add agent to the writers group

add-agent-to-group.sh \
-f "$OWNER_CERT_FILE" \
-p "$OWNER_CERT_PWD" \
--agent "$AGENT_URI" \
"${ADMIN_BASE_URL}acl/groups/writers/"

# POST an HTML document with an embedded JSON-LD <script> block;
# HtmlJsonLDReader should extract the JSON-LD and the triple should land in the graph

(
curl -k -w "%{http_code}\n" -o /dev/null -f -s \
-E "$AGENT_CERT_FILE":"$AGENT_CERT_PWD" \
-H "Accept: application/n-triples" \
-H "Content-Type: text/html" \
--data-binary @- \
"$END_USER_BASE_URL" <<EOF
<!DOCTYPE html><html><head><title>HTML/JSON-LD POST test</title><script type="application/ld+json">{"@context":{"ex":"http://example.com/","label":{"@id":"ex:default-predicate"}},"@id":"${END_USER_BASE_URL}named-subject-html-jsonld","label":"named object HTML/JSON-LD POST"}</script></head><body></body></html>
EOF
) \
| grep -q "$STATUS_NO_CONTENT"

# check that the triple from the embedded JSON-LD is queryable

curl -k -f -s -G \
-E "$AGENT_CERT_FILE":"$AGENT_CERT_PWD" \
-H "Accept: application/n-triples" \
"$END_USER_BASE_URL" \
| tr -d '\n' \
| grep '"named object HTML/JSON-LD POST"' > /dev/null
49 changes: 49 additions & 0 deletions http-tests/proxy/GET-proxied-html-jsonld.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
#!/usr/bin/env bash
set -euo pipefail

initialize_dataset "$END_USER_BASE_URL" "$TMP_END_USER_DATASET" "$END_USER_ENDPOINT_URL"
initialize_dataset "$ADMIN_BASE_URL" "$TMP_ADMIN_DATASET" "$ADMIN_ENDPOINT_URL"
purge_cache "$END_USER_VARNISH_SERVICE"
purge_cache "$ADMIN_VARNISH_SERVICE"
purge_cache "$FRONTEND_VARNISH_SERVICE"

# add agent to the readers group to be able to read documents

add-agent-to-group.sh \
-f "$OWNER_CERT_FILE" \
-p "$OWNER_CERT_PWD" \
--agent "$AGENT_URI" \
"${ADMIN_BASE_URL}acl/groups/readers/"

# Regression: when an upstream proxied URI returns text/html (e.g. a schema.org term page)
# but embeds JSON-LD via <script type="application/ld+json">, ProxyRequestFilter must
# extract triples through HtmlJsonLDReader and serve them in the format the client
# requested via Accept. It must NOT relay the upstream's text/html bytes verbatim —
# that breaks downstream RDF consumers (SaxonJS, curl, anything expecting RDF).

target_uri='https://schema.org/WebSite'

# 1. Content-Type negotiation: response must satisfy the client's Accept (text/turtle),
# not be passed through as upstream's text/html

content_type=$(curl -k -f -s -G -w "%{content_type}" -o /tmp/proxy-html-jsonld.body \
-E "$AGENT_CERT_FILE":"$AGENT_CERT_PWD" \
-H "Accept: text/turtle" \
--data-urlencode "uri=${target_uri}" \
"$END_USER_BASE_URL")

case "$content_type" in
text/turtle*) ;;
*) exit 1 ;;
esac

# 2. The body must parse as turtle and contain at least one triple with
# <https://schema.org/WebSite> as its subject — the canonical @id of the
# schema.org type defined by the JSON-LD embedded in the HTML page.

triple_count=$(rapper -q --input turtle --output ntriples /tmp/proxy-html-jsonld.body - \
| grep -c "^<${target_uri}>" || true)

rm -f /tmp/proxy-html-jsonld.body

if [ "$triple_count" -lt 1 ]; then exit 1; fi
11 changes: 0 additions & 11 deletions pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -136,11 +136,6 @@
<artifactId>jena-arq</artifactId>
<version>6.1.0</version>
</dependency>
<dependency>
<groupId>com.github.jsonld-java</groupId>
<artifactId>jsonld-java</artifactId>
<version>0.13.4</version>
</dependency>
<dependency>
<groupId>${project.groupId}</groupId>
<artifactId>twirl</artifactId>
Expand All @@ -158,12 +153,6 @@
<version>4.3.0</version>
<type>war</type>
</dependency>
<!-- required by jsonld-java - version same as Jersey's HTTP Client -->
<dependency>
<groupId>org.apache.httpcomponents</groupId>
<artifactId>httpclient-cache</artifactId>
<version>4.5.14</version>
</dependency>
<dependency>
<groupId>com.auth0</groupId>
<artifactId>java-jwt</artifactId>
Expand Down
24 changes: 8 additions & 16 deletions src/main/java/com/atomgraph/linkeddatahub/Application.java
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,7 @@
import com.atomgraph.linkeddatahub.client.filter.JSONGRDDLFilterProvider;
import com.atomgraph.linkeddatahub.imports.ImportExecutor;
import com.atomgraph.linkeddatahub.io.HtmlJsonLDReaderFactory;
import com.atomgraph.linkeddatahub.io.JsonLDReader;
import com.atomgraph.linkeddatahub.io.SchemaOrgDocumentLoader;
import com.atomgraph.linkeddatahub.listener.EMailListener;
import com.atomgraph.linkeddatahub.writer.ModelXSLTWriter;
import com.atomgraph.linkeddatahub.model.Import;
Expand Down Expand Up @@ -175,8 +175,8 @@
import com.atomgraph.server.mapper.SHACLConstraintViolationExceptionMapper;
import com.atomgraph.server.mapper.SPINConstraintViolationExceptionMapper;
import com.atomgraph.spinrdf.vocabulary.SP;
import com.github.jsonldjava.core.DocumentLoader;
import com.github.jsonldjava.core.JsonLdOptions;
import com.apicatalog.jsonld.JsonLdError;
import com.apicatalog.jsonld.JsonLdOptions;
import java.io.FileOutputStream;
import java.io.UnsupportedEncodingException;
import java.nio.charset.StandardCharsets;
Expand Down Expand Up @@ -229,9 +229,6 @@
import org.apache.jena.rdf.model.ResIterator;
import org.apache.jena.rdf.model.Resource;
import org.apache.jena.riot.resultset.ResultSetLang;
import org.apache.jena.riot.system.ErrorHandlerFactory;
import org.apache.jena.riot.system.ParserProfile;
import org.apache.jena.riot.system.RiotLib;
import org.apache.jena.sparql.graph.GraphReadOnly;
import org.apache.jena.vocabulary.DCTerms;
import org.apache.jena.vocabulary.LocationMappingVocab;
Expand Down Expand Up @@ -670,23 +667,18 @@ public Application(final ServletConfig servletConfig, final MediaTypes mediaType
RDFLanguages.register(ResultSetLang.RS_None);

// add HTML/JSON-LD reader
DocumentLoader documentLoader = new DocumentLoader();
JsonLdOptions jsonLdOptions = new JsonLdOptions();
try (InputStream contextStream = servletConfig.getServletContext().getResourceAsStream("/WEB-INF/classes/com/atomgraph/linkeddatahub/schema.org.jsonldcontext.json"))
{
String jsonContext = new String(contextStream.readAllBytes(), StandardCharsets.UTF_8);
documentLoader.addInjectedDoc("http://schema.org", jsonContext);
documentLoader.addInjectedDoc("https://schema.org", jsonContext);
jsonLdOptions.setDocumentLoader(documentLoader);
JsonLdOptions jsonLdOptions = new JsonLdOptions();
jsonLdOptions.setDocumentLoader(new SchemaOrgDocumentLoader(jsonContext));

ParserProfile profile = RiotLib.profile(HtmlJsonLDReaderFactory.HTML, null, ErrorHandlerFactory.getDefaultErrorHandler());
RDFLanguages.register(HtmlJsonLDReaderFactory.HTML);
RDFParserRegistry.registerLangTriples(HtmlJsonLDReaderFactory.HTML,
new HtmlJsonLDReaderFactory(new JsonLDReader(Lang.JSONLD, profile, profile.getErrorHandler()), jsonLdOptions));
RDFParserRegistry.registerLangTriples(HtmlJsonLDReaderFactory.HTML, new HtmlJsonLDReaderFactory(jsonLdOptions));
}
catch (IOException ex)
catch (IOException | JsonLdError ex)
{
if (log.isErrorEnabled()) log.error("schema.org @context not found", ex);
if (log.isErrorEnabled()) log.error("schema.org @context not found or invalid", ex);
}

// register plain RDF/XML writer as default
Expand Down
70 changes: 30 additions & 40 deletions src/main/java/com/atomgraph/linkeddatahub/io/HtmlJsonLDReader.java
Original file line number Diff line number Diff line change
Expand Up @@ -16,16 +16,16 @@
*/
package com.atomgraph.linkeddatahub.io;

import static com.atomgraph.linkeddatahub.io.JsonLDReader.JSONLD_OPTIONS;
import com.github.jsonldjava.core.JsonLdOptions;
import com.apicatalog.jsonld.JsonLdOptions;
import java.io.InputStream;
import java.io.Reader;
import java.io.StringReader;
import org.apache.jena.atlas.web.ContentType;
import org.apache.jena.riot.Lang;
import static org.apache.jena.riot.Lang.JSONLD;
import org.apache.jena.riot.RDFParser;
import org.apache.jena.riot.ReaderRIOTBase;
import org.apache.jena.riot.RiotParseException;
import org.apache.jena.riot.lang.LangJSONLD11;
import org.apache.jena.riot.system.StreamRDF;
import org.apache.jena.sparql.util.Context;
import org.apache.jena.util.FileUtils;
Expand All @@ -35,35 +35,32 @@

/**
* JSON-LD-in-HTML reader.
* Extracts <code>&lt;script type="application/ld+json"&gt;</code> elements from the HTML input
* and delegates each JSON-LD payload to Jena's stock {@link Lang#JSONLD11} reader (Titanium-backed).
* Can be used to read schema.org data.
*
*
* @author {@literal Martynas Jusevičius <martynas@atomgraph.com>}
*/
public class HtmlJsonLDReader extends ReaderRIOTBase
{

private final JsonLDReader jsonLDReader;
private final JsonLdOptions options;

/**
* Constructs JSON-LD-in-HTML reader.
*
* @param jsonLDReader JSON-LD reader
* Constructs JSON-LD-in-HTML reader without options.
*/
public HtmlJsonLDReader(JsonLDReader jsonLDReader)
public HtmlJsonLDReader()
{
this(jsonLDReader, null);
this(null);
}

/**
* Constructs JSON-LD-in-HTML reader.
*
* @param jsonLDReader JSON-LD reader
* @param options JSON-LD reader options
* Constructs JSON-LD-in-HTML reader with options.
*
* @param options Titanium JSON-LD options
*/
public HtmlJsonLDReader(JsonLDReader jsonLDReader, JsonLdOptions options)
public HtmlJsonLDReader(JsonLdOptions options)
{
this.jsonLDReader = jsonLDReader;
this.options = options;
}

Expand All @@ -72,16 +69,16 @@ public void read(InputStream in, String baseURI, Lang lang, StreamRDF output, Co
{
read(FileUtils.asBufferedUTF8(in), baseURI, output, context);
}

@Override
public void read(Reader in, String baseURI, ContentType ct, StreamRDF output, Context context)
{
read(in, baseURI, output, context);
}

/**
* Reads JSON-LD data from the HTML <code>&lt;script&gt;</code> element.
*
*
* @param in HTML input stream
* @param baseURI base URI
* @param output RDF output stream
Expand All @@ -94,32 +91,25 @@ public void read(Reader in, String baseURI, StreamRDF output, Context context)

if (jsonLdElements.isEmpty()) throw new RiotParseException("<script> element with type=\"application/ld+json\" not found", -1, -1);

context.set(JSONLD_OPTIONS, getJsonLdOptions());

// read from all <script type="application/ld+json"> elements
jsonLdElements.stream().map(element -> element.data()).forEach(jsonLd -> {
getJsonLDReader().read(new StringReader(jsonLd), baseURI, JSONLD.getContentType(), output, context);
});
}
if (getJsonLdOptions() != null) context.set(LangJSONLD11.JSONLD_OPTIONS, getJsonLdOptions());

/**
* Returns JSON-LD reader.
*
* @return reader
*/
public JsonLDReader getJsonLDReader()
{
return jsonLDReader;
jsonLdElements.stream().map(element -> element.data()).forEach(jsonLd ->
RDFParser.create().
source(new StringReader(jsonLd)).
lang(Lang.JSONLD11).
base(baseURI).
context(context).
parse(output));
}

/**
* Returns JSON-LD reader options.
*
*
* @return reader options
*/
public JsonLdOptions getJsonLdOptions()
{
return options;
}
}

}
Loading
Loading