-
Notifications
You must be signed in to change notification settings - Fork 2
Expand file tree
/
Copy pathclassify-train.sh
More file actions
executable file
·37 lines (24 loc) · 1.17 KB
/
classify-train.sh
File metadata and controls
executable file
·37 lines (24 loc) · 1.17 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
#!/bin/sh
weka="java -cp /usr/share/java/weka.jar"
for f in `ls /tmp/fr_*`; do
tid=`echo $f | sed -e 's!/tmp/fr_!!' | sed -e 's!_.*!!'`
cp $f arffs/$tid.arff
echo "TID: $tid"
echo "String to word vector"
$weka weka.filters.unsupervised.attribute.StringToWordVector \
-S -R 2 -P title- -W 1000 -prune-rate -1.0 -C -N 0 -L \
-stemmer weka.core.stemmers.LovinsStemmer -M 1 \
-tokenizer "weka.core.tokenizers.WordTokenizer -delimiters \" \\r\\n\\t.,;:\\\'\\\"()?\!\"" \
-i arffs/$tid.arff -o arffs/$tid-wordvec.arff
echo "Reordering class label"
$weka weka.filters.unsupervised.attribute.Reorder \
-R 3-last,1-2 -i arffs/$tid-wordvec.arff -o arffs/$tid-reorder.arff
echo "Spread subsample"
$weka weka.filters.supervised.instance.SpreadSubsample \
-M 1.0 -X 0.0 -S 1 -c last -i arffs/$tid-reorder.arff -o arffs/$tid-spread.arff
echo "Random Forest"
$weka weka.classifiers.trees.RandomForest \
-I 20 -K 0 -t arffs/$tid-spread.arff -v -d models/$tid-rf.model.xml
$weka weka.classifiers.trees.RandomForest \
-I 20 -K 0 -t arffs/$tid-spread.arff -v -d models/$tid-rf.model
done