Skip to content
GitLab
Projects
Groups
Snippets
Help
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
S
source-extractor
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Service Desk
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Operations
Operations
Environments
Analytics
Analytics
CI / CD
Repository
Value Stream
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
asrael
source-extractor
Commits
5887cba1
Commit
5887cba1
authored
May 04, 2018
by
Bertrand Goupil
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Bug fix: clean Memory object to avoid memory leak.
Add support to JAVA_OPT into Docker instances.
parent
f9a72390
Changes
5
Hide whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
25 additions
and
11 deletions
+25
-11
src/main/docker/Dockerfile
src/main/docker/Dockerfile
+3
-2
src/main/java/fr/limsi/sourceExtractor/Memory.java
src/main/java/fr/limsi/sourceExtractor/Memory.java
+6
-0
src/main/java/fr/limsi/sourceExtractor/application/process/SimpleExtractSource.java
...rceExtractor/application/process/SimpleExtractSource.java
+3
-0
src/main/java/fr/limsi/sourceExtractor/training/AProcess.java
...main/java/fr/limsi/sourceExtractor/training/AProcess.java
+6
-3
src/main/java/fr/limsi/sourceExtractor/training/AProcessSupport.java
...va/fr/limsi/sourceExtractor/training/AProcessSupport.java
+7
-6
No files found.
src/main/docker/Dockerfile
View file @
5887cba1
...
...
@@ -20,5 +20,6 @@ ADD limsiSourceExtractor.jar limsiSourceExtractor.jar
EXPOSE
8080
RUN
sh
-c
'touch /limsiSourceExtractor.jar'
ENV
JAVA_OPTS="-Dspring.config.location=/configuration/"
ENTRYPOINT
[ "sh", "-c", "java $JAVA_OPTS -Djava.security.egd=file:/dev/./urandom -jar /limsiSourceExtractor.jar" ]
\ No newline at end of file
#ENV JAVA_OPTS="-Dspring.config.location=/configuration/"
#ENTRYPOINT [ "sh", "-c", "java $JAVA_OPTS -Dspring.config.location=/configuration/ -Djava.security.egd=file:/dev/./urandom -jar /limsiSourceExtractor.jar" ]
ENTRYPOINT
exec java $JAVA_OPTS -Dspring.config.location=/configuration/ -Djava.security.egd=file:/dev/./urandom -jar /limsiSourceExtractor.jar
\ No newline at end of file
src/main/java/fr/limsi/sourceExtractor/Memory.java
View file @
5887cba1
...
...
@@ -15,5 +15,11 @@ public class Memory {
this
.
docTexts
=
docTexts
;
this
.
sentenceOffsetsByFile
=
sentenceOffsetsByFile
;
}
public
void
cleanEntry
(
String
fileId
)
{
this
.
parsedTexts
.
remove
(
fileId
);
this
.
docTexts
.
remove
(
fileId
);
this
.
sentenceOffsetsByFile
.
remove
(
fileId
);
}
}
src/main/java/fr/limsi/sourceExtractor/application/process/SimpleExtractSource.java
View file @
5887cba1
...
...
@@ -10,6 +10,7 @@ import org.slf4j.Logger;
import
org.slf4j.LoggerFactory
;
import
fr.limsi.sourceExtractor.DIRUtils
;
import
fr.limsi.sourceExtractor.Memory
;
import
fr.limsi.sourceExtractor.Paths
;
import
fr.limsi.sourceExtractor.application.configuration.SourceExtractorConfig
;
import
fr.limsi.sourceExtractor.application.configuration.SourceExtractorConstant
;
...
...
@@ -66,6 +67,8 @@ public class SimpleExtractSource extends TrainingUnLabel {
String
response
=
parseFileToAddAnnotationToBrat
(
fileId
,
annContent
);
logger
.
debug
(
"output: {}"
,
response
);
cleanDirContent
(
this
.
paths
.
DATA_UNLABELED_DIR
,
fileId
);
//Remote Memory entries
extractorConfig
.
getMemory
().
cleanEntry
(
fileId
);
return
response
;
}
...
...
src/main/java/fr/limsi/sourceExtractor/training/AProcess.java
View file @
5887cba1
...
...
@@ -29,6 +29,7 @@ import org.apache.commons.collections4.CollectionUtils;
import
com.google.common.base.Charsets
;
import
com.google.common.io.Files
;
import
edu.stanford.nlp.pipeline.Annotation
;
import
fr.limsi.sourceExtractor.DIRUtils
;
import
fr.limsi.sourceExtractor.Memory
;
import
fr.limsi.sourceExtractor.Pair
;
...
...
@@ -159,7 +160,9 @@ public abstract class AProcess extends AProcessSupport {
//inputTxtFile = new File(this.paths.DIR_INPUT_UNLABELED, fileId + ".txt");
}
//le fichier n'est jamais utilisé => remplacement par null (à tester possible effet de bord)
taggedTextOnlyOffset
=
parseText
(
fileId
,
SourceExtractorUtil
.
getDocText
(
fileId
,
null
,
memory
),
tools
,
resources
,
memory
);
String
text
=
SourceExtractorUtil
.
getDocText
(
fileId
,
null
,
memory
);
Annotation
annotation
=
new
Annotation
(
text
);
taggedTextOnlyOffset
=
parseText
(
annotation
,
fileId
,
text
,
tools
,
resources
,
memory
);
HashMap
<
String
,
String
>
retrieveOffsetsOfSource
=
new
HashMap
<>();
...
...
@@ -446,8 +449,8 @@ public abstract class AProcess extends AProcessSupport {
text
=
SourceExtractorUtil
.
getDocText
(
fileId
,
new
File
(
paths
.
DIR_INPUT_UNLABELED
,
fileId
+
".txt"
),
memory
);
}
String
tagWithOffset
=
parseText
(
fileId
,
text
,
tools
,
resources
,
memory
);
Annotation
annotation
=
new
Annotation
(
text
);
String
tagWithOffset
=
parseText
(
annotation
,
fileId
,
text
,
tools
,
resources
,
memory
);
String
resultWapiti
=
""
;
String
line
;
...
...
src/main/java/fr/limsi/sourceExtractor/training/AProcessSupport.java
View file @
5887cba1
...
...
@@ -80,10 +80,11 @@ public abstract class AProcessSupport {
public
String
tagAndConvert
(
String
fileId
,
String
text
,
File
outFileSEC
,
String
ann
,
Tools
tools
,
Resources
resources
,
Memory
memory
)
throws
IOException
{
// we use malt parser to keep the subjects found
String
maltParsertext
=
parseTextForMaltParser
(
text
,
tools
,
resources
);
Annotation
document
=
new
Annotation
(
text
);
String
maltParsertext
=
parseTextForMaltParser
(
document
,
text
,
tools
,
resources
);
// we parse the text to tag each word of this text.
String
parsedText
=
parseText
(
fileId
,
text
,
tools
,
resources
,
memory
);
String
parsedText
=
parseText
(
document
,
fileId
,
text
,
tools
,
resources
,
memory
);
// we join both parse text
String
taggedText
=
tagFromMaltParserAndParseText
(
parsedText
,
maltParsertext
,
resources
);
...
...
@@ -126,14 +127,14 @@ public abstract class AProcessSupport {
* Le text à tag.
* @return Le StringBuilder créé avec toutes les informations nécessaires. (6 champs)
*/
protected
String
parseText
(
String
fileId
,
String
text
,
Tools
tools
,
Resources
resources
,
Memory
memory
)
{
protected
String
parseText
(
Annotation
document
,
String
fileId
,
String
text
,
Tools
tools
,
Resources
resources
,
Memory
memory
)
{
String
parsedText
=
memory
.
parsedTexts
.
get
(
fileId
);
if
(
parsedText
!=
null
)
{
return
parsedText
;
}
// create an empty Annotation just with the given text
Annotation
document
=
new
Annotation
(
text
);
//
Annotation document = new Annotation(text);
// run all Annotators on this text
tools
.
getCoreNLPParser
().
annotate
(
document
);
...
...
@@ -516,9 +517,9 @@ public abstract class AProcessSupport {
* @return
* @throws IOException
*/
private
String
parseTextForMaltParser
(
String
text
,
Tools
tools
,
Resources
resources
)
throws
IOException
{
private
String
parseTextForMaltParser
(
Annotation
document
,
String
text
,
Tools
tools
,
Resources
resources
)
throws
IOException
{
// create an empty Annotation just with the given text
Annotation
document
=
new
Annotation
(
text
);
//
Annotation document = new Annotation(text);
// run all Annotators on this text
tools
.
coreNLPParser
.
annotate
(
document
);
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment