Commit 7423d71b authored by Bertrand Goupil's avatar Bertrand Goupil
Browse files

Use of the news ID

- Change UUI temp generation with news Id
- Add news id in the json  response
parent 03100854
...@@ -40,9 +40,9 @@ ...@@ -40,9 +40,9 @@
<factorypathentry kind="VARJAR" id="M2_REPO/net/sourceforge/saxon/saxon/9.1.0.8/saxon-9.1.0.8.jar" enabled="true" runInBatchMode="false"/> <factorypathentry kind="VARJAR" id="M2_REPO/net/sourceforge/saxon/saxon/9.1.0.8/saxon-9.1.0.8.jar" enabled="true" runInBatchMode="false"/>
<factorypathentry kind="VARJAR" id="M2_REPO/xom/xom/1.2.5/xom-1.2.5.jar" enabled="true" runInBatchMode="false"/> <factorypathentry kind="VARJAR" id="M2_REPO/xom/xom/1.2.5/xom-1.2.5.jar" enabled="true" runInBatchMode="false"/>
<factorypathentry kind="VARJAR" id="M2_REPO/xalan/xalan/2.7.0/xalan-2.7.0.jar" enabled="true" runInBatchMode="false"/> <factorypathentry kind="VARJAR" id="M2_REPO/xalan/xalan/2.7.0/xalan-2.7.0.jar" enabled="true" runInBatchMode="false"/>
<factorypathentry kind="EXTJAR" id="/Users/bertrand/Documents/eclipse-workspace/afp-asrael/Limsi-SourceExtractor/lib/jar/stanford-french-corenlp-2016-01-14-models.jar" enabled="true" runInBatchMode="false"/> <factorypathentry kind="EXTJAR" id="/Users/bertrand/git/code/Limsi-SourceExtractor/lib/jar/stanford-french-corenlp-2016-01-14-models.jar" enabled="true" runInBatchMode="false"/>
<factorypathentry kind="EXTJAR" id="/Users/bertrand/Documents/eclipse-workspace/afp-asrael/Limsi-SourceExtractor/lib/jar/wapiti-1.5.0-win.jar" enabled="true" runInBatchMode="false"/> <factorypathentry kind="EXTJAR" id="/Users/bertrand/git/code/Limsi-SourceExtractor/lib/jar/wapiti-1.5.0-win.jar" enabled="true" runInBatchMode="false"/>
<factorypathentry kind="EXTJAR" id="/Users/bertrand/Documents/eclipse-workspace/afp-asrael/Limsi-SourceExtractor/lib/jar/hfst-ol.jar" enabled="true" runInBatchMode="false"/> <factorypathentry kind="EXTJAR" id="/Users/bertrand/git/code/Limsi-SourceExtractor/lib/jar/hfst-ol.jar" enabled="true" runInBatchMode="false"/>
<factorypathentry kind="VARJAR" id="M2_REPO/commons-io/commons-io/2.5/commons-io-2.5.jar" enabled="true" runInBatchMode="false"/> <factorypathentry kind="VARJAR" id="M2_REPO/commons-io/commons-io/2.5/commons-io-2.5.jar" enabled="true" runInBatchMode="false"/>
<factorypathentry kind="VARJAR" id="M2_REPO/org/apache/commons/commons-lang3/3.4/commons-lang3-3.4.jar" enabled="true" runInBatchMode="false"/> <factorypathentry kind="VARJAR" id="M2_REPO/org/apache/commons/commons-lang3/3.4/commons-lang3-3.4.jar" enabled="true" runInBatchMode="false"/>
<factorypathentry kind="VARJAR" id="M2_REPO/commons-cli/commons-cli/1.3.1/commons-cli-1.3.1.jar" enabled="true" runInBatchMode="false"/> <factorypathentry kind="VARJAR" id="M2_REPO/commons-cli/commons-cli/1.3.1/commons-cli-1.3.1.jar" enabled="true" runInBatchMode="false"/>
......
server: server:
context-path: /sourceExtractor context-path: /limsi-sourceExtractor
resource: resource:
#Directory containing the librairies and models #Directory containing the librairies and models
......
...@@ -8,7 +8,7 @@ ...@@ -8,7 +8,7 @@
<parent> <parent>
<groupId>org.springframework.boot</groupId> <groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-parent</artifactId> <artifactId>spring-boot-starter-parent</artifactId>
<version>1.5.2.RELEASE</version> <version>1.5.8.RELEASE</version>
</parent> </parent>
<name>LimsiSourceExtractor</name> <name>LimsiSourceExtractor</name>
<properties> <properties>
...@@ -156,6 +156,7 @@ ...@@ -156,6 +156,7 @@
<artifactId>spring-boot-starter-tomcat</artifactId> <artifactId>spring-boot-starter-tomcat</artifactId>
<scope>provided</scope> <scope>provided</scope>
</dependency> </dependency>
<!-- <dependency> --> <!-- <dependency> -->
<!-- <groupId>org.springframework.boot</groupId> --> <!-- <groupId>org.springframework.boot</groupId> -->
<!-- <artifactId>spring-boot-starter-actuator</artifactId> --> <!-- <artifactId>spring-boot-starter-actuator</artifactId> -->
......
...@@ -4,7 +4,6 @@ import java.io.File; ...@@ -4,7 +4,6 @@ import java.io.File;
import java.io.IOException; import java.io.IOException;
import java.io.InputStream; import java.io.InputStream;
import java.io.StringWriter; import java.io.StringWriter;
import java.util.UUID;
import java.util.regex.Matcher; import java.util.regex.Matcher;
import java.util.regex.Pattern; import java.util.regex.Pattern;
...@@ -22,11 +21,12 @@ import fr.limsi.sourceExtractor.training.AProcessSupport; ...@@ -22,11 +21,12 @@ import fr.limsi.sourceExtractor.training.AProcessSupport;
public class SimplePreprocessing { public class SimplePreprocessing {
private Pattern pattern; private Pattern pattern, patternIdentifier;
private AProcessSupport processSupport; private AProcessSupport processSupport;
public SimplePreprocessing(AProcessSupport processSupport) { public SimplePreprocessing(AProcessSupport processSupport) {
this.pattern = Pattern.compile("<p>([^<]+)</p>"); this.pattern = Pattern.compile("<p>([^<]+)</p>");
this.patternIdentifier = Pattern.compile("<PublicIdentifier>([^<]+)</PublicIdentifier>");
this.processSupport = processSupport; this.processSupport = processSupport;
} }
...@@ -36,8 +36,10 @@ public class SimplePreprocessing { ...@@ -36,8 +36,10 @@ public class SimplePreprocessing {
Paths paths = config.getPaths(); Paths paths = config.getPaths();
Tools tools = config.getTools(); Tools tools = config.getTools();
Resources resources = config.getResources(); Resources resources = config.getResources();
String fileId = UUID.randomUUID().toString(); String xmlText = toString(contentStream);
String text = extractTextFromXML(fileId, contentStream, memory); //String fileId = UUID.randomUUID().toString();
String fileId = extractPublicIdentifier(xmlText);
String text = extractTextFromXML(fileId, xmlText, memory);
// we create the files in which we will write the results // we create the files in which we will write the results
File outFilePRIM = DIRUtils.createDirAndFilesWithExt(paths.DIR_TEST_FILES_UNLABELED_PRIM, fileId, ".tag"); File outFilePRIM = DIRUtils.createDirAndFilesWithExt(paths.DIR_TEST_FILES_UNLABELED_PRIM, fileId, ".tag");
File outFileSEC = DIRUtils.createDirAndFilesWithExt(paths.DIR_TEST_FILES_UNLABELED_SEC, fileId, ".tag"); File outFileSEC = DIRUtils.createDirAndFilesWithExt(paths.DIR_TEST_FILES_UNLABELED_SEC, fileId, ".tag");
...@@ -51,10 +53,25 @@ public class SimplePreprocessing { ...@@ -51,10 +53,25 @@ public class SimplePreprocessing {
return fileId; return fileId;
} }
private String extractTextFromXML(String fileId, InputStream contentStream, Memory memory) throws IOException { private String toString(InputStream contentStream) throws IOException {
StringWriter writer = new StringWriter(); StringWriter writer = new StringWriter();
IOUtils.copy(contentStream, writer, Charsets.UTF_8); IOUtils.copy(contentStream, writer, Charsets.UTF_8);
String xmlText = writer.toString(); String xmlText = writer.toString();
return xmlText;
}
private String extractPublicIdentifier(String xmlText) {
Matcher matcher = this.patternIdentifier.matcher(xmlText);
String fileId ="";
while (matcher.find()) {
fileId = matcher.group(1);
}
fileId.replaceAll("\\r", "");
return fileId;
}
private String extractTextFromXML(String fileId, String xmlText, Memory memory) {
Matcher matcher = this.pattern.matcher(xmlText); Matcher matcher = this.pattern.matcher(xmlText);
StringBuilder result = new StringBuilder(); StringBuilder result = new StringBuilder();
while (matcher.find()) { while (matcher.find()) {
......
...@@ -963,7 +963,8 @@ public abstract class AProcess extends AProcessSupport { ...@@ -963,7 +963,8 @@ public abstract class AProcess extends AProcessSupport {
TreeSet<SourceAnnotation> orderedAnnotations = new TreeSet<>(); TreeSet<SourceAnnotation> orderedAnnotations = new TreeSet<>();
orderedAnnotations.addAll(annotations.values()); orderedAnnotations.addAll(annotations.values());
StringBuilder jsonResult = new StringBuilder(); StringBuilder jsonResult = new StringBuilder();
jsonResult.append("{\"source_sentences\":[\n"); jsonResult.append("{\"identifier\":\""+fileId+"\",\n");
jsonResult.append("\"source_sentences\":[\n");
int annIndex = 0; int annIndex = 0;
......
## Directory containing the librairies and models ## Directory containing the librairies and models
LIB_DIR=/Users/bertrand/Documents/eclipse-workspace/afp-asrael/Limsi-SourceExtractor/lib LIB_DIR=/Users/bertrand/git/code/Limsi-SourceExtractor/lib
## Directory containing the language-dependent resources ## Directory containing the language-dependent resources
RESOURCES_DIR=/Users/bertrand/Documents/eclipse-workspace/afp-asrael/Limsi-SourceExtractor/resources RESOURCES_DIR=/Users/bertrand/git/code//Limsi-SourceExtractor/resources
## DATA_DIR should only be set correctly for training the models ## DATA_DIR should only be set correctly for training the models
## The directory is useless in production mode ## The directory is useless in production mode
......
...@@ -3,9 +3,9 @@ server: ...@@ -3,9 +3,9 @@ server:
resource: resource:
#Directory containing the librairies and models #Directory containing the librairies and models
lib: /Users/bertrand/Documents/eclipse-workspace/afp-asrael/Limsi-SourceExtractor/lib lib: /Users/bertrand/git/code/Limsi-SourceExtractor/lib
#Directory containing the language-dependent resources #Directory containing the language-dependent resources
resources: /Users/bertrand/Documents/eclipse-workspace/afp-asrael/Limsi-SourceExtractor/resources resources: /Users/bertrand/git/code/Limsi-SourceExtractor/resources
#Directory containing trained data #Directory containing trained data
data: /home/xtannier/Recherche/SourceExtractor data: /home/xtannier/Recherche/SourceExtractor
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment