Commit 18bd6478 authored by Thibault Ehrhart's avatar Thibault Ehrhart

Cleanup and refactor NewsArticle a bit

parent 2199a04d
......@@ -5,6 +5,7 @@ import com.pengyifan.brat.BratDocument;
import com.pengyifan.brat.BratEntity;
import com.pengyifan.brat.io.BratDocumentReader;
import fr.eurecom.asrael.commons.datatypes.Annotation;
import fr.eurecom.asrael.commons.datatypes.NewsArticle;
import fr.eurecom.asrael.commons.datatypes.WikidataEvent;
import fr.eurecom.asrael.commons.utils.UUIDGenerator;
import org.apache.commons.io.FilenameUtils;
......@@ -107,7 +108,7 @@ public class BratAnnotator {
finalModel.add(event.getModel());
finalModel.add(
ResourceFactory.createResource(
"http://asrael.eurecom.fr/news/"
NewsArticle.BASE_URI
+ UUIDGenerator.generateUUID(identifier)),
ResourceFactory.createProperty(
"http://iptc.org/std/rNews/2011-10-07#about"),
......
......@@ -4,10 +4,13 @@ import fr.eurecom.asrael.commons.utils.UUIDGenerator;
import org.apache.jena.datatypes.xsd.XSDDatatype;
import org.apache.jena.rdf.model.Model;
import org.apache.jena.rdf.model.ModelFactory;
import org.apache.jena.rdf.model.Resource;
import org.apache.jena.rdf.model.ResourceFactory;
import org.apache.jena.vocabulary.DC_11;
import org.apache.jena.vocabulary.RDF;
import java.net.URI;
import java.net.URISyntaxException;
import java.time.LocalDateTime;
import java.time.Month;
import java.time.format.DateTimeFormatter;
......@@ -219,22 +222,23 @@ public class NewsArticle {
model.setNsPrefixes(prefixes);
final Resource entity = ResourceFactory.createResource(this.getURI());
model.add(
ResourceFactory.createResource(NewsArticle.BASE_URI + UUIDGenerator.generateUUID(this.id)),
entity,
RDF.type,
ResourceFactory.createResource("http://iptc.org/std/rNews/2011-10-07#Article"));
// Identifier
model.add(
ResourceFactory.createResource(NewsArticle.BASE_URI + UUIDGenerator.generateUUID(this.id)),
entity,
ResourceFactory.createProperty("http://iptc.org/std/rNews/2011-10-07#identifier"),
ResourceFactory.createPlainLiteral(this.id));
// Dateline
if (!this.dateline.isEmpty()) {
model.add(
ResourceFactory.createResource(
NewsArticle.BASE_URI + UUIDGenerator.generateUUID(this.id)),
entity,
ResourceFactory.createProperty("http://iptc.org/std/rNews/2011-10-07#dateline"),
ResourceFactory.createPlainLiteral(this.dateline));
}
......@@ -242,15 +246,14 @@ public class NewsArticle {
// Headline
for (final Map.Entry<String, String> entry : this.headlines.entrySet()) {
model.add(
ResourceFactory.createResource(
NewsArticle.BASE_URI + UUIDGenerator.generateUUID(this.id)),
entity,
ResourceFactory.createProperty("http://iptc.org/std/rNews/2011-10-07#headline"),
ResourceFactory.createLangLiteral(entry.getValue(), entry.getKey()));
}
// Date created
model.add(
ResourceFactory.createResource(NewsArticle.BASE_URI + UUIDGenerator.generateUUID(this.id)),
entity,
ResourceFactory.createProperty("http://iptc.org/std/rNews/2011-10-07#dateCreated"),
ResourceFactory.createTypedLiteral(
this.dateCreated.format(DateTimeFormatter.ofPattern("yyyy-MM-dd'T'HH:mm:ss'Z'")),
......@@ -258,7 +261,7 @@ public class NewsArticle {
// Date published
model.add(
ResourceFactory.createResource(NewsArticle.BASE_URI + UUIDGenerator.generateUUID(this.id)),
entity,
ResourceFactory.createProperty("http://iptc.org/std/rNews/2011-10-07#datePublished"),
ResourceFactory.createTypedLiteral(
this.datePublished.format(DateTimeFormatter.ofPattern("yyyy-MM-dd'T'HH:mm:ss'Z'")),
......@@ -266,7 +269,7 @@ public class NewsArticle {
// Content reference time
model.add(
ResourceFactory.createResource(NewsArticle.BASE_URI + UUIDGenerator.generateUUID(this.id)),
entity,
ResourceFactory.createProperty("http://schema.org/contentReferenceTime"),
ResourceFactory.createTypedLiteral(
this.datePublished.format(DateTimeFormatter.ofPattern("yyyy-MM-dd'T'HH:mm:ss'Z'")),
......@@ -274,7 +277,7 @@ public class NewsArticle {
// Date modified
model.add(
ResourceFactory.createResource(NewsArticle.BASE_URI + UUIDGenerator.generateUUID(this.id)),
entity,
ResourceFactory.createProperty("http://iptc.org/std/rNews/2011-10-07#dateModified"),
ResourceFactory.createTypedLiteral(
this.dateModified.format(DateTimeFormatter.ofPattern("yyyy-MM-dd'T'HH:mm:ss'Z'")),
......@@ -284,8 +287,7 @@ public class NewsArticle {
if (!this.descriptions.isEmpty()) {
for (final Map.Entry<String, String> entry : this.descriptions.entrySet()) {
model.add(
ResourceFactory.createResource(
NewsArticle.BASE_URI + UUIDGenerator.generateUUID(this.id)),
entity,
ResourceFactory.createProperty("http://iptc.org/std/rNews/2011-10-07#articleBody"),
ResourceFactory.createLangLiteral(entry.getValue(), entry.getKey()));
}
......@@ -294,26 +296,20 @@ public class NewsArticle {
// Slug
if (!this.slug.isEmpty()) {
model.add(
ResourceFactory.createResource(
NewsArticle.BASE_URI + UUIDGenerator.generateUUID(this.id)),
entity,
ResourceFactory.createProperty("http://iptc.org/std/rNews/2011-10-07#slug"),
ResourceFactory.createPlainLiteral(this.slug));
}
// Subjects
for (final String subject : this.subjects) {
model.add(
ResourceFactory.createResource(
NewsArticle.BASE_URI + UUIDGenerator.generateUUID(this.id)),
DC_11.subject,
ResourceFactory.createResource(subject));
model.add(entity, DC_11.subject, ResourceFactory.createResource(subject));
}
// Keywords
for (final String keyword : this.keywords) {
model.add(
ResourceFactory.createResource(
NewsArticle.BASE_URI + UUIDGenerator.generateUUID(this.id)),
entity,
ResourceFactory.createProperty("http://schema.org/keywords"),
ResourceFactory.createPlainLiteral(keyword));
}
......@@ -323,8 +319,7 @@ public class NewsArticle {
if (!media.getUrl().isEmpty()) {
model.add(media.getRDFModel());
model.add(
ResourceFactory.createResource(
NewsArticle.BASE_URI + UUIDGenerator.generateUUID(this.id)),
entity,
ResourceFactory.createProperty("http://iptc.org/std/rNews/2011-10-07#associatedMedia"),
ResourceFactory.createResource(media.getUrl()));
}
......@@ -333,21 +328,20 @@ public class NewsArticle {
// Genre
if (!this.genre.isEmpty()) {
model.add(
ResourceFactory.createResource(
NewsArticle.BASE_URI + UUIDGenerator.generateUUID(this.id)),
entity,
ResourceFactory.createProperty("http://iptc.org/std/rNews/2011-10-07#genre"),
ResourceFactory.createPlainLiteral(this.genre));
}
// Language
model.add(
ResourceFactory.createResource(NewsArticle.BASE_URI + UUIDGenerator.generateUUID(this.id)),
entity,
ResourceFactory.createProperty("http://iptc.org/std/rNews/2011-10-07#inLanguage"),
ResourceFactory.createPlainLiteral(this.language));
// Content location
model.add(
ResourceFactory.createResource(NewsArticle.BASE_URI + UUIDGenerator.generateUUID(this.id)),
entity,
ResourceFactory.createProperty("http://schema.org/contentLocation"),
ResourceFactory.createResource(
NewsArticle.BASE_URI + UUIDGenerator.generateUUID(this.id) + "/contentLocation"));
......@@ -380,6 +374,10 @@ public class NewsArticle {
return model;
}
public final String getURI() {
return NewsArticle.BASE_URI + UUIDGenerator.generateUUID(this.id);
}
@Override
public final boolean equals(final Object obj) {
if (this == obj) {
......
......@@ -111,17 +111,20 @@ public class AgenceFrancePresse {
NewsArticle article =
AgenceFrancePresseMethods.parseArticle(
inputFactory.createXMLStreamReader(in));
if (!Collections.disjoint(
article.getKeywords(),
Arrays.asList("Agenda", "Advisory", "COMMUNIQUÉ-BUSINESS-WIRE"))) {
// Ignore articles with certain keywords
System.out.println(
"Skip article with Agenda/Advisory/Business Wire: " + article.getId());
return;
if (article != null) {
if (!Collections.disjoint(
article.getKeywords(),
Arrays.asList("Agenda", "Advisory", "COMMUNIQUÉ-BUSINESS-WIRE"))) {
// Ignore articles with certain keywords
System.out.println(
"Skip article with Agenda/Advisory/Business Wire: " + article.getId());
return;
}
List<ImageObject> images =
AgenceFrancePresseMethods.getArticleImages(article);
images.forEach(article::addAssociatedMedia);
newsArticles.add(article);
}
List<ImageObject> images = AgenceFrancePresseMethods.getArticleImages(article);
images.forEach(article::addAssociatedMedia);
newsArticles.add(article);
} catch (XMLStreamException e) {
AgenceFrancePresse.LOGGER.error(e.toString());
}
......
......@@ -161,6 +161,10 @@ public final class AgenceFrancePresseMethods {
AgenceFrancePresseMethods.LOGGER.error("Error when parsing XML file: " + e.toString());
}
if (news.getId().isEmpty()) {
return null;
}
return news;
}
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment