Commit 86f87807 authored by Bertrand Goupil's avatar Bertrand Goupil

First release

parents
<?xml version="1.0" encoding="UTF-8"?>
<classpath>
<classpathentry kind="src" output="target/classes" path="src/main/java">
<attributes>
<attribute name="optional" value="true"/>
<attribute name="maven.pomderived" value="true"/>
</attributes>
</classpathentry>
<classpathentry excluding="**" kind="src" output="target/classes" path="src/main/resources">
<attributes>
<attribute name="maven.pomderived" value="true"/>
</attributes>
</classpathentry>
<classpathentry kind="con" path="org.eclipse.jdt.launching.JRE_CONTAINER/org.eclipse.jdt.internal.debug.ui.launcher.StandardVMType/JavaSE-1.8">
<attributes>
<attribute name="maven.pomderived" value="true"/>
</attributes>
</classpathentry>
<classpathentry kind="con" path="org.eclipse.m2e.MAVEN2_CLASSPATH_CONTAINER">
<attributes>
<attribute name="maven.pomderived" value="true"/>
</attributes>
</classpathentry>
<classpathentry kind="src" output="target/test-classes" path="src/test/java">
<attributes>
<attribute name="optional" value="true"/>
<attribute name="maven.pomderived" value="true"/>
</attributes>
</classpathentry>
<classpathentry kind="src" path=".apt_generated">
<attributes>
<attribute name="optional" value="true"/>
</attributes>
</classpathentry>
<classpathentry kind="output" path="target/classes"/>
</classpath>
This diff is collapsed.
/.apt_generated/
/target/
<?xml version="1.0" encoding="UTF-8"?>
<projectDescription>
<name>Limsi-SourceExtractor</name>
<comment></comment>
<projects>
</projects>
<buildSpec>
<buildCommand>
<name>org.eclipse.wst.common.project.facet.core.builder</name>
<arguments>
</arguments>
</buildCommand>
<buildCommand>
<name>org.eclipse.jdt.core.javabuilder</name>
<arguments>
</arguments>
</buildCommand>
<buildCommand>
<name>org.springframework.ide.eclipse.core.springbuilder</name>
<arguments>
</arguments>
</buildCommand>
<buildCommand>
<name>org.eclipse.wst.validation.validationbuilder</name>
<arguments>
</arguments>
</buildCommand>
<buildCommand>
<name>org.eclipse.m2e.core.maven2Builder</name>
<arguments>
</arguments>
</buildCommand>
</buildSpec>
<natures>
<nature>org.eclipse.jem.workbench.JavaEMFNature</nature>
<nature>org.eclipse.wst.common.modulecore.ModuleCoreNature</nature>
<nature>org.springframework.ide.eclipse.core.springnature</nature>
<nature>org.eclipse.m2e.core.maven2Nature</nature>
<nature>org.eclipse.jdt.core.javanature</nature>
<nature>org.eclipse.wst.common.project.facet.core.nature</nature>
<nature>org.eclipse.wst.jsdt.core.jsNature</nature>
</natures>
</projectDescription>
<?xml version="1.0" encoding="UTF-8"?>
<classpath>
<classpathentry excluding="**/bower_components/*|**/node_modules/*|**/*.min.js" kind="src" path="src/main/webapp"/>
<classpathentry kind="con" path="org.eclipse.wst.jsdt.launching.JRE_CONTAINER"/>
<classpathentry kind="con" path="org.eclipse.wst.jsdt.launching.WebProject">
<attributes>
<attribute name="hide" value="true"/>
</attributes>
</classpathentry>
<classpathentry kind="con" path="org.eclipse.wst.jsdt.launching.baseBrowserLibrary"/>
<classpathentry kind="output" path=""/>
</classpath>
eclipse.preferences.version=1
encoding//src/main/java=UTF-8
encoding//src/main/resources=UTF-8
encoding//src/test/java=UTF-8
encoding/<project>=UTF-8
eclipse.preferences.version=1
org.eclipse.jdt.apt.aptEnabled=true
eclipse.preferences.version=1
org.eclipse.jdt.core.compiler.codegen.inlineJsrBytecode=enabled
org.eclipse.jdt.core.compiler.codegen.targetPlatform=1.8
org.eclipse.jdt.core.compiler.codegen.unusedLocal=preserve
org.eclipse.jdt.core.compiler.compliance=1.8
org.eclipse.jdt.core.compiler.debug.lineNumber=generate
org.eclipse.jdt.core.compiler.debug.localVariable=generate
org.eclipse.jdt.core.compiler.debug.sourceFile=generate
org.eclipse.jdt.core.compiler.problem.assertIdentifier=error
org.eclipse.jdt.core.compiler.problem.enumIdentifier=error
org.eclipse.jdt.core.compiler.problem.forbiddenReference=warning
org.eclipse.jdt.core.compiler.processAnnotations=enabled
org.eclipse.jdt.core.compiler.source=1.8
activeProfiles=
eclipse.preferences.version=1
resolveWorkspaceProjects=true
version=1
<?xml version="1.0" encoding="UTF-8"?><project-modules id="moduleCoreId" project-version="1.5.0">
<wb-module deploy-name="limsiSourceExtractor">
<wb-resource deploy-path="/" source-path="/target/m2e-wtp/web-resources"/>
<wb-resource deploy-path="/" source-path="/src/main/webapp" tag="defaultRootSource"/>
<wb-resource deploy-path="/WEB-INF/classes" source-path="/src/main/java"/>
<wb-resource deploy-path="/WEB-INF/classes" source-path="/src/main/resources"/>
<wb-resource deploy-path="/WEB-INF/classes" source-path="/.apt_generated"/>
<property name="java-output-path" value="/Limsi-SourceExtractor/target/classes"/>
<property name="context-root" value="limsiSourceExtractor"/>
</wb-module>
</project-modules>
<?xml version="1.0" encoding="UTF-8"?>
<faceted-project>
<fixed facet="wst.jsdt.web"/>
<installed facet="java" version="1.8"/>
<installed facet="jst.web" version="3.1"/>
<installed facet="wst.jsdt.web" version="1.0"/>
</faceted-project>
org.eclipse.wst.jsdt.launching.baseBrowserLibrary
\ No newline at end of file
Window
\ No newline at end of file
disabled=06target
eclipse.preferences.version=1
# Limsi-SourceExtractor
## Abstract
This is a code refactoring of Limsi's source extractor program in order to expose source extraction as a web service.
This is a Spring Boot application deployed in a Docker image.
## Project requirement
This is a Java/Spring boot project. All dependencies are setup in a Maven pom.
On Macos X or Windows workstation, Docker should be install [https://www.docker.com/docker-toolbox](https://www.docker.com/docker-toolbox)
+ Java Oracle JDK8 [http://www.oracle.com/technetwork/java/javase/downloads/index.html](http://www.oracle.com/technetwork/java/javase/downloads/index.html)
+ Apache Maven [https://maven.apache.org/download.cgi](https://maven.apache.org/download.cgi)
+ Docker [https://www.docker.com/docker-toolbox](https://www.docker.com/docker-toolbox)
## Run and Build project
### Run localy
+ Change `application.yml` in `PROJECT_HOME/Limsi-SourceExtractor/src/test/configuration` with local lib and resources path (change PROJECT_HOME). Required absolute path.
resource:
#Directory containing the librairies and models
lib: PROJECT_HOME/Limsi-SourceExtractor/lib
#Directory containing the language-dependent resources
resources: PROJECT_HOME/Limsi-SourceExtractor/resources
+ `mvn clean install`
+ Goto to `target` directory
+ Run in a terminal `java -Dspring.config.location=../src/test/configuration/ -jar limsiSourceExtractor.jar`
If running from IDE (eclipse), VM argument `java -Dspring.config.location=src/test/configuration/` must be added in the "Run configuration".
#### Troubleshooting
+ Application do not find `lib` and `resources` path at startup:
=> check that application.yml set the correct path for lib and resources
=> -Dspring.configuration path should end with /
#### Bug
+ Service stop on MacosX during extraction with wapiti's message `error: invalid model format` (This is not happening running inside eclipse ...)
### Run with Docker
####Use default resources
Models and resources are embedded by default in the docker image.
+ Build docker image `mvn clean package docker:build`
+ Start default image `docker run -p 8080:8080 -t limsi-source-extractor`
####Custom resources/lib
Build image can be used with new traing set. In this case it is possible to overwrite default model configuration.
In the docker image volumes have been setup to mount new resources.
+ `lib` folder should be mount to `/configuration/lib`
+ `resources` folder should be mount to `/configuration/resources`
+ Configuration file from PROJECT_HOME/Limsi-SourceExtractor/configuration/ that define `application.yml` whith docker Volumes must be use.
+ Run docker image with mount resources:
docker run -p 8080:8080 -v /PROJECT_HOME/Limsi-SourceExtractor/configuration:/configuration -v /PROJECT_HOME/Limsi-SourceExtractor/lib:/configuration/lib -v /PROJECT_HOME/Limsi-SourceExtractor/resources:/configuration/resources -t limsi-source-extractor
#### Docker cheat sheet
+ List images `docker images`
+ List containers `docker ps -a`
+ Stop container `docker stop <container_name>`
+ Start container `docker start <container_name>`
+ Remove stop containers `docker rm $(docker ps -a -q)`
+ Remove image `docker rmi <image_name>`
+ Load image from file `docker load < image_file.tar`
+ Export image `docker export > image_file.tar`
## Test
HTTP GET http://localhost:8080/sourceExtractor/
=>HTTP 200 "Greetings from Spring Boot!"
HTTP POST http://localhost:8080/sourceExtractor/extractNewsML
Charset text/xml
curl -XPOST -H "Content-Type: text/xml" -d @afp.com-20150123T153420Z-TX-PAR-SZS52.xml http://localhost:8080/sourceExtractor/extractNewsML
server:
context-path: /sourceExtractor
resource:
#Directory containing the librairies and models
#lib: /Users/bertrand/Documents/eclipse-workspace/afp-asrael/Limsi-SourceExtractor/lib
lib: /configuration/lib
#Directory containing the language-dependent resources
resources: /configuration/resources
#Directory containing trained data
data: /home/xtannier/Recherche/SourceExtractor
logging:
level:
fr.limsi.sourceExtractor: info
\ No newline at end of file
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<groupId>Limsi-SourceExtractor</groupId>
<artifactId>Limsi-SourceExtractor</artifactId>
<version>0.0.1-SNAPSHOT</version>
<!-- <packaging>war</packaging> -->
<parent>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-parent</artifactId>
<version>1.5.2.RELEASE</version>
</parent>
<name>LimsiSourceExtractor</name>
<properties>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
</properties>
<build>
<finalName>limsiSourceExtractor</finalName>
<resources>
<resource>
<directory>src/main/resources</directory>
</resource>
</resources>
<plugins>
<plugin>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-maven-plugin</artifactId>
<configuration>
<includeSystemScope>true</includeSystemScope>
</configuration>
</plugin>
<plugin>
<artifactId>maven-compiler-plugin</artifactId>
<configuration>
<source>1.8</source>
<target>1.8</target>
</configuration>
</plugin>
<plugin>
<groupId>com.spotify</groupId>
<artifactId>docker-maven-plugin</artifactId>
<version>1.0.0</version>
<configuration>
<imageName>limsi-source-extractor</imageName>
<dockerDirectory>src/main/docker</dockerDirectory>
<resources>
<resource>
<targetPath>/</targetPath>
<directory>${project.build.directory}</directory>
<include>${project.build.finalName}.jar</include>
</resource>
<resource>
<targetPath>/</targetPath>
<directory>${project.basedir}</directory>
<includes>
<include>lib/**/*</include>
<include>resources/**/*</include>
</includes>
<excludes>
<exclude>lib/jar/**/*</exclude>
<exclude>**/.*</exclude>
</excludes>
</resource>
</resources>
</configuration>
</plugin>
</plugins>
</build>
<dependencies>
<dependency>
<groupId>edu.stanford.nlp</groupId>
<artifactId>stanford-corenlp</artifactId>
<version>3.7.0</version>
<exclusions>
<exclusion>
<groupId>com.io7m.xom</groupId>
<artifactId>xom</artifactId>
</exclusion>
</exclusions>
</dependency>
<dependency>
<groupId>org.maltparser</groupId>
<artifactId>maltparser</artifactId>
<version>1.9.0</version>
</dependency>
<dependency>
<groupId>com.google.guava</groupId>
<artifactId>guava</artifactId>
<version>21.0</version>
</dependency>
<dependency>
<groupId>org.grobid</groupId>
<artifactId>grobid-core</artifactId>
<version>0.4.1</version>
<exclusions>
<exclusion>
<groupId>org.slf4j</groupId>
<artifactId>slf4j-log4j12</artifactId>
</exclusion>
<exclusion></exclusion>
</exclusions>
</dependency>
<dependency>
<groupId>edu.stanford.nlp</groupId>
<artifactId>stanford-french-corenlp-2016-01-14-models</artifactId>
<version>3.7.0</version>
<scope>system</scope>
<systemPath>${project.basedir}/lib/jar/stanford-french-corenlp-2016-01-14-models.jar</systemPath>
</dependency>
<dependency>
<groupId>fr.limsi.wapiti</groupId>
<artifactId>wapiti-win</artifactId>
<version>1.5.0</version>
<scope>system</scope>
<systemPath>${project.basedir}/lib/jar/wapiti-1.5.0-win.jar</systemPath>
</dependency>
<dependency>
<groupId>net.sf</groupId>
<artifactId>hfst</artifactId>
<version>1.0</version>
<scope>system</scope>
<systemPath>${project.basedir}/lib/jar/hfst-ol.jar</systemPath>
</dependency>
<dependency>
<groupId>commons-io</groupId>
<artifactId>commons-io</artifactId>
<version>2.5</version>
</dependency>
<dependency>
<groupId>org.apache.commons</groupId>
<artifactId>commons-lang3</artifactId>
<version>3.4</version>
</dependency>
<dependency>
<groupId>commons-cli</groupId>
<artifactId>commons-cli</artifactId>
<version>1.3.1</version>
</dependency>
<dependency>
<groupId>org.apache.commons</groupId>
<artifactId>commons-collections4</artifactId>
<version>4.1</version>
</dependency>
<!-- Dépendances couche de service -->
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-web</artifactId>
</dependency>
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-configuration-processor</artifactId>
<optional>true</optional>
</dependency>
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-tomcat</artifactId>
<scope>provided</scope>
</dependency>
<!-- <dependency> -->
<!-- <groupId>org.springframework.boot</groupId> -->
<!-- <artifactId>spring-boot-starter-actuator</artifactId> -->
<!-- </dependency> -->
</dependencies>
</project>
\ No newline at end of file
car===car;
quand===quand;
parce que===parce que;parceque;
pourquoi===pourquoi;pourquoy;
néanmoins===néanmoins;
soit===soit;
tandis===tandis;
d'où===d'où;
puis===puis;
cependant===cependant;
quoique===quoique;quoiqu';
aussi===aussi;
et/ou===et/ou;
ou===ou;
&===&;
or===or;
mais===mais;
alors===alors;
comme===comme;
si===si;
non===non;
sinon===sinon;
afin===afin;
lorsque===lorsque;lorsqu';
pis===pis;
ni===ni;
et===et;
pi===pi;
puisque===puisque;puisqu';
de===des;
son===ses;
ce===cet;
de===de;
son===leur;
ledit===ledit;
mon===ma;
mon===mes;
ton===ta;
ton===ton;
votre===vos;
leur===leurs;
le===les;
mon===mon;
votre===votre;
son===son;
un===un;
notre===notre;
notre===nos;
le===l';
le===le;
son===sa;
un===une;
le===la;
ce===ce;
ton===tes;
tu===tu;
ne===ne;
-ci===-ci;
-là===-là;
ô===ô;
parce===parce;
voici===voici;
depuis===depuis;
avant===avant;
ca===ca;
jusqu'à===jusqu'à;
vs===vs;
selon===selon;
par===par;
de===de;
sur===sur;
excepté===excepté;
pour===pour;
par-delà===par-delà;
entre===entre;
derrière===derrière;
jusque===jusque;jusques;
circa===circa;
lez===lez;
devant===devant;
après===après;
parmi===parmi;
auprès===auprès;
sous===sous;
s/===s/;
avec===avec;
sauf===sauf;
au-delà===au-delà;
durant===durant;
ci-contre===ci-contre;
dans===dans;
hormis===hormis;
dès===dès;
à===à;
contre===contre;
au-dessus===au-dessus;
concernant===concernant;
au-dessous===au-dessous;
moins===moins;
pendant===pendant;
dedans===dedans;
sans===sans;
devers===devers;
malgré===malgré;
vers===vers;
en===en;
ès===ès;
autour===autour;
dessous===dessous;
lequel===lesquels;
quelqu'un===quelqu'un;quelqu'une;
celui===celuy;
quiconque===quiconque;
moi-même===moi-même;
rien===rien;
lequel===lequel;
tel===tel;
te===te;
toi-même===toi-même;
chacun===chacun;
quel===quelles;
elle===elles;
cela===cela;
vôtre===vôtre;
lui===lui;
quelque===quelque;
leur===leur;leurs;
votre===vostre;
on===on;
quoi===quoy;
aucun===aucun;aucune;
lui-même===lui-même;
ceci===ceci;
notre===nostre;
mien===miens;
moi===moi;
certain===certains;
ça===ça;
se===se;
celui-ci===celui-ci;
nous-même===nous-mêmes;
celui===celui;
grand-chose===grand-chose;
autrui===autrui;
mien===mien;
soi===soi;
le===le;
dont===dont;
celui===icelui;
tien===tien;tiens;
le===la;
quid===quid;
ce===ce;
soi-même===soi-même;
celui-là===celui-là;celle-là;
lui-même===eux-mêmes;
toi===toi;
personne===personne;
vous===vous;
nôtre===nôtre;
auquel===auquel;auxquels;
nawak===nawak;
quelle===quelle;
que===que;
me===me;
celui-là===ceux-là;
celle-ci===celles-ci;
qui===qui;
vôtre===vôtres;
celui-ci===ceux-ci;
celle-là===celles-là;
nôtre===nôtres;
il===il;
vous-même===vous-mêmes;
un===un;
celle-ci===celle-ci;
elle===elle;
nul===nul;
je===je;
maint===maint;
nous===nous;
tu===tu;
vous-même===vous-même;
ce===ceste;
quoi===quoi;
ej===ej;
chacun===chacune;
en===en;
il===ils;
tous===tous;
y===y;
tout===tout;
sien===sien;siens;
elle-même===elle-même;
il===-t-il
il===-t-ils
il===-ils
il===-il
elle===-t-elles
elle===-t-elle
elle===-elles
elle===-elle
on===-on
on===-t-on
nous===-nous
vous===-vous
je===-je
tu===-tu
The files in this repository contain mappings from treebank specific tagsets
to a set of 12 universal part-of-speech tags. The 12 universal tags are:
VERB - verbs (all tenses and modes)
NOUN - nouns (common and proper)
PRON - pronouns
ADJ - adjectives
ADV - adverbs
ADP - adpositions (prepositions and postpositions)
CONJ - conjunctions
DET - determiners
NUM - cardinal numbers
PRT - particles or other function words
X - other: foreign words, typos, abbreviations
. - punctuation