Skip to content

Commit ffd7b5f

Browse files
committed
refactoring & readme
1 parent 9503adb commit ffd7b5f

File tree

7 files changed

+97
-24
lines changed

7 files changed

+97
-24
lines changed

README.md

Lines changed: 35 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,21 @@ NLP Tasks Available through Microsoft Labs API:
2121
composer require web64/php-nlp-client
2222
```
2323

24+
## Entity Extraction - Named Entity Recognition
25+
This package provies access to three different methods for entity extraction.
26+
27+
| First Header | Language Support | Programming Lang. | API Access |
28+
| ------------- | ------------- | ------------- | ------------- |
29+
| Polyglot | 40 languages | Python | NLP Server |
30+
| CoreNLP | 6 languages | Java | CoreNLP Standalone server |
31+
| Spacy.io | 7 languages | Python | NLP Server |
32+
33+
If you are dealing with text in English or one of the major European language you will get teh best results with CoreNLP or Spacy.io.
34+
35+
The quality of extracted entities with Polyglot is not great, but for many languages it is the only available option at the moment.
36+
37+
Polyglot and Spacy NER is accessible thorough the NLP Server, CoreNLP requires its own standalone java server.
38+
2439
## Usage
2540

2641
### Language detection:
@@ -35,11 +50,11 @@ $detected_lang = $nlp->language( "The quick brown fox jumps over the lazy dog" )
3550
```php
3651
// From URL
3752
$nlp = new \Web64\Nlp\NlpClient('http://localhost:6400/');
38-
$newspaper = $nlp->newspaperUrl('https://github.com/web64/nlpserver');
53+
$newspaper = $nlp->newspaper('https://github.com/web64/nlpserver');
3954

4055
// or from HTML
4156
$html = file_get_contents( 'https://github.com/web64/nlpserver' );
42-
$newspaper = $nlp->newspaperHtml( $html );
57+
$newspaper = $nlp->newspaper_html( $html );
4358

4459
Array
4560
(
@@ -56,10 +71,10 @@ Array
5671
)
5772
```
5873

59-
### Entity Extraction & Sentiment Analysis
74+
### Entity Extraction & Sentiment Analysis (Polyglot)
6075
```php
6176
$nlp = new \Web64\Nlp\NlpClient('http://localhost:6400/');
62-
$polyglot = $nlp->polyglot( $text, 'en' );
77+
$polyglot = $nlp->polyglot_entities( $text, 'en' );
6378

6479
$entities = $polyglot->getEntities();
6580
$sentiment = $polyglot->getSentiment();
@@ -138,11 +153,11 @@ Article Extraction using python port of Readability.js
138153
$nlp = new \Web64\Nlp\NlpClient( 'http://localhost:6400/' );
139154

140155
// From URL:
141-
$article = $nlp->readabilityUrl('https://github.com/web64/nlpserver');
156+
$article = $nlp->readability('https://github.com/web64/nlpserver');
142157

143158
// From HTML:
144159
$html = file_get_contents( 'https://github.com/web64/nlpserver' );
145-
$article = $nlp->readabilityHtml( $html );
160+
$article = $nlp->readability_html( $html );
146161

147162
/*
148163
Array
@@ -161,10 +176,10 @@ This uses the Polyglot multilingual NLP library to return entities and a sentime
161176
Ensure the models for the required languages are downloaded for Polyglot.
162177

163178
```php
164-
$polyglot = $nlp->polyglot( $text );
179+
$polyglot = $nlp->polyglot_entities( $text );
165180

166181
// Specify language
167-
$polyglot = $nlp->polyglot( $text, 'no' );
182+
$polyglot = $nlp->polyglot_entities( $text, 'no' );
168183

169184
$polyglot->getSentiment(); // -1
170185

@@ -190,8 +205,6 @@ $polyglot->getLocations(); // Array of Locations
190205
$polyglot->getOrganizations(); // Array of organisations
191206
$polyglot->getPersons(); // Array of people
192207

193-
194-
195208
$polyglot->getEntities();
196209
/*
197210
Returns combined array of all entities
@@ -204,6 +217,18 @@ Array
204217
)
205218
*/
206219
```
220+
### Sentiment Analysis
221+
222+
```php
223+
$sentiment = $nlp->sentiment( "This is the worst product ever" );
224+
// -1
225+
226+
$sentiment = $nlp->sentiment( "This is great! " );
227+
// 1
228+
229+
// specify language in second parameter for non-english
230+
$sentiment = $nlp->sentiment( $french_text, 'fr' );
231+
```
207232

208233
## CoreNLP - Entity Extraction (NER)
209234
CoreNLP har much better quality for NER that Polyglot, but only supports a few languages including English, French, German and Spanish.

src/NlpClient.php

Lines changed: 18 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -45,15 +45,15 @@ public function spacy_entities( $text, $lang = 'en' )
4545
*/
4646
public function summarize( $text, $word_count = null )
4747
{
48-
$data = $this->post_call('/summarize', ['text' => $text, 'word_count' => $word_count ] );
48+
$data = $this->post_call('/gensim/summarize', ['text' => $text, 'word_count' => $word_count ] );
4949

5050
return ( !empty($data['summarize']) ) ? $data['summarize'] : null;
5151
}
5252

5353
/**
5454
* Article Extraction from HTML
5555
*/
56-
public function newspaperHtml( $html )
56+
public function newspaper_html( $html )
5757
{
5858
$data = $this->post_call('/newspaper', ['html' => $html ] );
5959

@@ -63,7 +63,7 @@ public function newspaperHtml( $html )
6363
/**
6464
* Article Extraction from URL
6565
*/
66-
public function newspaperUrl( $url )
66+
public function newspaper( $url )
6767
{
6868
$data = $this->get_call('/newspaper', ['url' => $url ] );
6969

@@ -74,7 +74,7 @@ public function newspaperUrl( $url )
7474
/**
7575
* Readability Article Extraction from URL
7676
*/
77-
public function readabilityUrl( $url )
77+
public function readability( $url )
7878
{
7979
$data = $this->get_call('/readability', ['url' => $url ] );
8080

@@ -84,29 +84,39 @@ public function readabilityUrl( $url )
8484
/**
8585
* Readability Article Extraction from HTML
8686
*/
87-
public function readabilityHTML( $html )
87+
public function readability_html( $html )
8888
{
8989
$data = $this->post_call('/readability', ['html' => $html ] );
9090

9191
return ( !empty($data['readability']) ) ? $data['readability'] : null;
9292
}
9393

94+
/**
95+
* Sentiment Analysis by Polyglot
96+
*/
97+
public function sentiment( $text )
98+
{
99+
$data = $this->post_call('/polyglot/sentiment', ['text' => $text ] );
100+
101+
return ( isset($data['sentiment']) ) ? $data['sentiment'] : null;
102+
}
103+
94104
/**
95105
* Get neighbouring words
96106
*/
97107
public function neighbours( $word, $lang = 'en')
98108
{
99-
$data = $this->get_call('/neighbours', ['word' => $word, 'lang' => $lang ] );
109+
$data = $this->get_call('/polyglot/neighbours', ['word' => $word, 'lang' => $lang ] );
100110

101111
return ( !empty($data['neighbours']) ) ? $data['neighbours'] : null;
102112
}
103113

104114
/**
105115
* Get entities and sentiment analysis of text
106116
*/
107-
public function polyglot( $text, $language = null )
117+
public function polyglot_entities( $text, $language = null )
108118
{
109-
$data = $this->post_call('/polyglot', ['text' => $text, 'lang' => $language] );
119+
$data = $this->post_call('/polyglot/entities', ['text' => $text, 'lang' => $language] );
110120
$this->msg( $data );
111121
return new \Web64\Nlp\Classes\PolyglotResponse( $data['polyglot'] );
112122
}

tests/TestCase.php

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ public function setUp()
1515
'http://localhost:6400/',
1616
'http://localhost:6400/',
1717
],
18-
'debug' => true,
18+
'debug' => false,
1919
];
2020
}
2121

tests/Unit/NewspaperTest.php

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ public function url_article_extraction()
1111
{
1212
$nlp = new \Web64\Nlp\NlpClient( $this->nlpserver_config['hosts'], $this->nlpserver_config['debug'] );
1313

14-
$newspaper = $nlp->newspaperUrl('https://github.com/web64/nlpserver');
14+
$newspaper = $nlp->newspaper('https://github.com/web64/nlpserver');
1515

1616
$this->msg( $newspaper );
1717
$this->assertNotEmpty($newspaper);
@@ -23,7 +23,7 @@ public function html_article_extraction()
2323
$nlp = new \Web64\Nlp\NlpClient( $this->nlpserver_config['hosts'], $this->nlpserver_config['debug'] );
2424

2525
$html = file_get_contents( 'https://github.com/web64/nlpserver' );
26-
$newspaper = $nlp->newspaperHtml( $html );
26+
$newspaper = $nlp->newspaper_html( $html );
2727

2828
$this->msg( $newspaper );
2929

tests/Unit/PolyglotTest.php

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ public function entity_extraction()
1616
The origin of the bell's nickname is open to question; it may be named after Sir Benjamin Hall, who oversaw its installation, or heavyweight boxing champion Benjamin Caunt.
1717
Four quarter bells chime at 15, 30 and 45 minutes past the hour and just before Big Ben tolls on the hour. The clock uses its original Victorian mechanism, but an electric motor can be used as a backup.";
1818

19-
$polyglot = $nlp->polyglot( $text, 'en' );
19+
$polyglot = $nlp->polyglot_entities( $text, 'en' );
2020

2121
$this->msg( $polyglot );
2222
$this->msg( $polyglot->getEntities() );

tests/Unit/ReadabilityTest.php

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ public function readability_url_article_extraction()
1111
{
1212
$nlp = new \Web64\Nlp\NlpClient( $this->nlpserver_config['hosts'], $this->nlpserver_config['debug'] );
1313

14-
$article = $nlp->readabilityUrl('https://github.com/web64/nlpserver');
14+
$article = $nlp->readability('https://github.com/web64/nlpserver');
1515

1616
//$this->msg( $article );
1717
$this->assertNotEmpty($article);
@@ -23,7 +23,7 @@ public function readability_html_article_extraction()
2323
$nlp = new \Web64\Nlp\NlpClient( $this->nlpserver_config['hosts'], $this->nlpserver_config['debug'] );
2424

2525
$html = file_get_contents( 'https://github.com/web64/nlpserver' );
26-
$article = $nlp->readabilityHtml( $html );
26+
$article = $nlp->readability_html( $html );
2727

2828
$this->msg( $article );
2929

tests/Unit/SentimentTest.php

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
<?php
2+
3+
namespace Tests\Unit;
4+
5+
use Tests\TestCase;
6+
7+
class SentimentTest extends TestCase
8+
{
9+
10+
/** @test */
11+
public function sentiment_test()
12+
{
13+
$nlp = new \Web64\Nlp\NlpClient( $this->nlpserver_config['hosts'], $this->nlpserver_config['debug'] );
14+
15+
$text = "Big Ben is the largest of five bells and weighs 13.7 tonnes. It was the largest bell in the United Kingdom for 23 years.
16+
The origin of the bell's nickname is open to question; it may be named after Sir Benjamin Hall, who oversaw its installation, or heavyweight boxing champion Benjamin Caunt.
17+
Four quarter bells chime at 15, 30 and 45 minutes past the hour and just before Big Ben tolls on the hour. The clock uses its original Victorian mechanism, but an electric motor can be used as a backup.";
18+
19+
$sentiment = $nlp->sentiment( $text, 'en' );
20+
$this->msg( "EN Sentiment:" . $sentiment );
21+
22+
$sentiment = $nlp->sentiment( "This is the worst product ever" );
23+
$this->msg( "EN -Sentiment:" . $sentiment );
24+
25+
$sentiment = $nlp->sentiment( "This is great! " );
26+
$this->msg( "EN +Sentiment:" . $sentiment );
27+
28+
29+
// $sentiment = $nlp->sentiment( "Detter er helt forferdelig og skrekkelig og uverdig dårlig værst uhyggelig jævlig. hater dette. ", 'no' );
30+
// $this->msg( "NO -Sentiment:" . $sentiment );
31+
32+
// $sentiment = $nlp->sentiment( "Detter er helt fantastisk bra, det beste jeg har sett. Imponert over den gode kvaliteten. sexy, spesiell, sær, trygg, nummen, takknemmlig, tilfreds, forelsket, modig, søt, snill, hjelpsom, fantastisk, ambisjonsrik, omtenksom, macho, fin, morsom, tålmodig, familieorientert, arbeidsom, iderik, kreativ, intelligent! S", 'no' );
33+
// $this->msg( "NO +Sentiment:" . $sentiment );
34+
35+
$this->assertNotNull( $sentiment );
36+
}
37+
38+
}

0 commit comments

Comments
 (0)