Skip to content

Commit 5124c90

Browse files
committed
add support for download progress bar for pta:install:package. Made API for Stanford taggers easier
1 parent 7842ea9 commit 5124c90

File tree

11 files changed

+134
-54
lines changed

11 files changed

+134
-54
lines changed

README.md

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,25 +16,36 @@ composer require yooper/php-text-analysis
1616
Documentation for the library resides in the wiki.
1717
https://github.com/yooper/php-text-analysis/wiki
1818

19+
20+
21+
1922
Dictionary Installation
2023
=============
2124

2225
Not required unless you use the dictionary stemmers
2326

24-
*For Ubuntu*
27+
*For Ubuntu < 16*
2528
```
2629
sudo apt-get install libpspell-dev
2730
sudo apt-get install php5-pspell
2831
sudo apt-get install aspell-en
2932
sudo apt-get install php5-enchant
3033
```
34+
*For Ubuntu >= 16*
35+
```
36+
sudo apt-get install libpspell-dev php7.0-pspell aspell-en php7.0-enchant
37+
```
38+
39+
3140
*For Centos*
3241
```
3342
sudo yum install php5-pspell
3443
sudo yum install aspell-en
3544
sudo yum install php5-enchant
3645
```
3746

47+
*PHP Pecl Stem* is not currently available in php 7.0.
48+
3849

3950
Tokenize
4051
=============

src/Console/Commands/NltkPackageInstallCommand.php

Lines changed: 78 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -6,18 +6,30 @@
66
use Symfony\Component\Console\Input\InputArgument;
77
use Symfony\Component\Console\Input\InputInterface;
88
use Symfony\Component\Console\Output\OutputInterface;
9+
use Symfony\Component\Console\Helper\ProgressBar;
910

1011
use TextAnalysis\Downloaders\DownloadPackageFactory as DPF;
1112
use TextAnalysis\Downloaders\NltkCorporaIndexDownloader;
1213

1314

15+
1416
/**
1517
* Installs the selected nltk corpus package
1618
*
1719
* @author yooper
1820
*/
1921
class NltkPackageInstallCommand extends Command
2022
{
23+
/**
24+
* @var ProgressBar
25+
*/
26+
protected $progressBar = null;
27+
28+
/**
29+
* @var \Symfony\Component\Console\Output\OutputInterface
30+
*/
31+
private $output;
32+
2133
protected function configure()
2234
{
2335
$this->setName('pta:install:package')
@@ -31,25 +43,88 @@ protected function configure()
3143

3244
protected function execute(InputInterface $input, OutputInterface $output)
3345
{
46+
$this->output = $output;
3447
$packageId = $input->getArgument('package');
3548

3649
$listPackages = (new NltkCorporaIndexDownloader())->getPackages();
3750

38-
$packageFound = false;
51+
$packageFound = null;
3952

4053
foreach($listPackages as $package)
4154
{
4255
if($packageId == $package->getId()) {
43-
$packageFound = true;
44-
$download = DPF::download($package);
56+
$packageFound = $package;
4557
break;
4658
}
4759
}
4860

4961
if(!$packageFound) {
5062
$output->writeln("Package {$packageId} was not found, try textconsole pta:list, to see the available packages");
5163
} else {
64+
65+
$download = DPF::download($package);
66+
// Create stream context.
67+
$context = stream_context_create([], ['notification' => [$this, 'progress']]);
68+
69+
// Pipe file.
70+
$resource = fopen($packageFound->getUrl(), 'r', null, $context);
71+
$stream = fopen($download->getDownloadFullPath(), 'w+');
72+
if (!$stream) {
73+
$output->writeln("Package {$packageFound->getId()} - {$packageFound->getName()} install failed, permission denied to create file into {$download->getDownloadFullPath()}");
74+
}
75+
76+
stream_copy_to_stream($resource, $stream);
77+
78+
if (!fclose($stream)) {
79+
$output->writeln("Could not save file {$download->getDownloadFullPath()}");
80+
}
81+
82+
// End output.
83+
$this->progressBar->finish();
84+
85+
if(!$download->verifyChecksum()) {
86+
$output->writeln("Bad checksum for the downloaded package {$packageFound->getId()}");
87+
exit;
88+
}
89+
$download->unpackPackage();
90+
$output->writeln(PHP_EOL);
5291
$output->writeln("Package {$package->getId()} - {$package->getName()} was installed into {$download->getInstallDir()}");
5392
}
5493
}
94+
95+
/**
96+
* @param int $notificationCode
97+
* @param int $severity
98+
* @param string $message
99+
* @param int $messageCode
100+
* @param int $bytesTransferred
101+
* @param int $bytesMax
102+
*/
103+
public function progress($notificationCode, $severity, $message, $messageCode, $bytesTransferred, $bytesMax)
104+
{
105+
if (STREAM_NOTIFY_REDIRECTED === $notificationCode) {
106+
$this->progressBar->clear();
107+
$this->progressBar = null;
108+
return;
109+
}
110+
111+
if (STREAM_NOTIFY_FILE_SIZE_IS === $notificationCode) {
112+
if ($this->progressBar) {
113+
$this->progressBar->clear();
114+
}
115+
$this->progressBar = new ProgressBar($this->output, $bytesMax);
116+
}
117+
118+
if (STREAM_NOTIFY_PROGRESS === $notificationCode) {
119+
if (is_null($this->progressBar)) {
120+
$this->progressBar = new ProgressBar($this->output);
121+
}
122+
$this->progressBar->setProgress($bytesTransferred);
123+
}
124+
125+
if (STREAM_NOTIFY_COMPLETED === $notificationCode) {
126+
$this->finish($bytesTransferred);
127+
}
128+
}
129+
55130
}

src/Downloaders/DownloadPackageFactory.php

Lines changed: 2 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -31,14 +31,7 @@ protected function __construct(Package $package)
3131
if(file_exists($this->getDownloadFullPath()) && $this->verifyChecksum()) {
3232
return;
3333
}
34-
35-
$this->downloadRemoteFile();
36-
37-
if($this->verifyChecksum()) {
38-
throw new Exception("Bad checksum for the downloaded package {$this->getPackage()->getId()}");
39-
}
40-
41-
$this->unpackPackage();
34+
4235
}
4336

4437
/**
@@ -67,7 +60,7 @@ public function verifyChecksum()
6760
* de-compress the downloaded corpus into the install directory, or
6861
* copy the files into the install directory
6962
*/
70-
protected function unpackPackage()
63+
public function unpackPackage()
7164
{
7265
// it is zipped, we must unzip it
7366
if($this->getPackage()->getUnzip()) {
@@ -150,21 +143,6 @@ public function initialize()
150143

151144
}
152145

153-
/**
154-
* @todo improve downloader code, make it more robust
155-
*/
156-
protected function downloadRemoteFile()
157-
{
158-
$handle = fopen($this->getPackage()->getUrl(), "rb");
159-
$fp = fopen($this->getDownloadFullPath(), 'w');
160-
$content = '';
161-
while (!feof($handle)) {
162-
$content = fread($handle, 8192);
163-
fwrite($fp, $content);
164-
}
165-
fclose($handle);
166-
fclose($fp);
167-
}
168146

169147
/**
170148
* Has the full path to where the download should go

src/Taggers/StanfordAbstract.php

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -160,7 +160,7 @@ public function getPathToJava()
160160
if(getenv('JAVA_HOME')) {
161161
return getenv('JAVA_HOME');
162162
} else {
163-
return 'java';
163+
throw new RuntimeException('env JAVA_HOME must be set.');
164164
}
165165
}
166166

src/Taggers/StanfordNerTagger.php

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,17 @@
1212
*/
1313
class StanfordNerTagger extends StanfordAbstract
1414
{
15-
public function __construct($jarPath, $classifierPath, $javaOptions = array(), $separator = '/') {
15+
public function __construct($jarPath = null, $classifierPath = null, $javaOptions = array(), $separator = '/') {
16+
$nerPath = 'taggers/stanford-ner-2015-12-09';
17+
18+
if(!$jarPath) {
19+
$jarPath = get_storage_path($nerPath).'stanford-ner.jar';
20+
}
21+
22+
if(!$classifierPath) {
23+
$classifierPath = get_storage_path($nerPath.DIRECTORY_SEPARATOR."classifiers")."english.all.3class.distsim.crf.ser.gz";
24+
}
25+
1626
parent::__construct($jarPath, $classifierPath, $javaOptions, $separator);
1727
// created the temp file
1828
$this->tmpFilePath = tempnam(sys_get_temp_dir(), "stanford_ner_");

src/Taggers/StanfordPosTagger.php

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12,8 +12,18 @@
1212
*/
1313
class StanfordPosTagger extends StanfordAbstract
1414
{
15-
public function __construct($jarPath, $classifierPath, $javaOptions = array(), $separator = '/') {
16-
parent::__construct($jarPath, $classifierPath, $javaOptions, $separator);
15+
public function __construct($jarPath = null, $modelPath = null, $javaOptions = array(), $separator = '/')
16+
{
17+
$posPath = 'taggers/stanford-postagger-2015-12-09';
18+
if(!$jarPath) {
19+
$jarPath = get_storage_path($posPath).'stanford-postagger-3.6.0.jar';
20+
}
21+
22+
if(!$modelPath) {
23+
$modelPath = get_storage_path($posPath.DIRECTORY_SEPARATOR."models")."english-left3words-distsim.tagger";
24+
}
25+
26+
parent::__construct($jarPath, $modelPath, $javaOptions, $separator);
1727
// created the temp file
1828
$this->tmpFilePath = tempnam(sys_get_temp_dir(), "stanford_pos_");
1929
}

tests/TextAnalysis/Adapters/EnchantAdapterTest.php

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ public function testSpelling()
1717
}
1818
$adapter = new EnchantAdapter();
1919
$this->assertEquals('run', $adapter->suggest("runn")[0]);
20-
$this->assertEquals('cooper', $adapter->suggest("yooper")[0]);
20+
$this->assertEquals('looper', $adapter->suggest("yooper")[0]);
2121
$this->assertEquals('flute', $adapter->suggest("flute")[0]);
2222
}
2323
}

tests/TextAnalysis/Stemmers/DictionaryStemmerTest.php

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@ class DictionaryStemmerTest extends \PHPUnit_Framework_TestCase
1414
{
1515
public function testPspell()
1616
{
17-
if( getenv('SKIP_TEST')) {
17+
if( getenv('SKIP_TEST') || !extension_loaded('stem')) {
1818
return;
1919
}
2020

tests/TextAnalysis/Stemmers/SnowballStemmerTest.php

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ class SnowballStemmerTest extends \PHPUnit_Framework_TestCase
1212
{
1313
public function testDefaultEnglish()
1414
{
15-
if( getenv('SKIP_TEST')) {
15+
if( getenv('SKIP_TEST') || !extension_loaded('stem')) {
1616
return;
1717
}
1818
$stemmer = new SnowballStemmer('english', $exceptions = ['universities' => 'university']);
@@ -27,7 +27,7 @@ public function testDefaultEnglish()
2727

2828
public function testSwedish()
2929
{
30-
if( getenv('SKIP_TEST')) {
30+
if( getenv('SKIP_TEST') || !extension_loaded('stem')) {
3131
return;
3232
}
3333
$stemmer = new SnowballStemmer('swedish');
@@ -36,7 +36,7 @@ public function testSwedish()
3636

3737
public function testException()
3838
{
39-
if( getenv('SKIP_TEST')) {
39+
if( getenv('SKIP_TEST') || !extension_loaded('stem')) {
4040
return;
4141
}
4242
$this->setExpectedException('Exception');

tests/TextAnalysis/Taggers/StanfordNerTaggerTest.php

Lines changed: 7 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,8 @@
1414
*/
1515
class StanfordNerTaggerTest extends \PHPUnit_Framework_TestCase
1616
{
17+
protected $nerPath = 'taggers/stanford-ner-2015-12-09';
18+
1719
protected $text = "Marquette County is a county located in the Upper Peninsula of the US state of Michigan. As of the 2010 census, the population was 67,077.";
1820

1921
public function testJarNotFound()
@@ -29,7 +31,7 @@ public function testClassiferNotFound()
2931
return;
3032
}
3133

32-
$tagger = new StanfordNerTagger(get_storage_path('ner').'stanford-ner.jar', "classifier.gz");
34+
$tagger = new StanfordNerTagger(get_storage_path($this->nerPath).'stanford-ner.jar', "classifier.gz");
3335
$this->setExpectedException('RuntimeException', 'Classifier not found classifier.gz');
3436
$tagger->tag([]);
3537
}
@@ -51,20 +53,16 @@ public function testTempCreatedFile()
5153

5254
public function testStanfordNer()
5355
{
54-
if( getenv('SKIP_TEST') || !getenv('JAVA_HOME')) {
56+
if( getenv('SKIP_TEST')) {
5557
return;
5658
}
57-
59+
5860
$document = new TokensDocument((new WhitespaceTokenizer())->tokenize($this->text));
59-
60-
$jarPath = get_storage_path('ner').'stanford-ner.jar';
61-
$classiferPath = get_storage_path('ner'.DIRECTORY_SEPARATOR."classifiers")."english.all.3class.distsim.crf.ser.gz";
62-
63-
$tagger = new StanfordNerTagger($jarPath, $classiferPath);
61+
$tagger = new StanfordNerTagger();
6462
$output = $tagger->tag($document->getDocumentData());
6563

6664
$this->assertFileExists($tagger->getTmpFilePath());
67-
$this->assertEquals(138, filesize($tagger->getTmpFilePath()));
65+
$this->assertEquals(138, filesize($tagger->getTmpFilePath()));
6866
$this->assertEquals(['Michigan','LOCATION'], $output[15], "Did you set JAVA_HOME env variable?");
6967
}
7068

0 commit comments

Comments
 (0)