VideoIndexer/getTranscript.py at master · carmatthews/VideoIndexer · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
# Parses the results from Get Video Index to extract the Transcript items.
# Use the Get Video Index API: https://api-portal.videoindexer.ai/docs/services/Operations/operations/Get-Video-Index?
# https://docs.microsoft.com/en-us/azure/media-services/video-indexer/video-indexer-output-json-v2

import requests


##### CONFIGURE YOUR ACCOUNTS & ACCESS HERE

# The video_id is assigned when the file is uploaded.  If you didn't catch it, use getListVideos.py to see all of your files.
video_indexer_video_id = 'REPLACE'

# Configure access to Video Indexer
video_indexer_account_id = 'REPLACE'   # See account settings in Video Indexer Portal: https://www.videoindexer.ai/settings/account
video_indexer_region = 'REPLACE'       # At the top of the Video Indexer Portal (https://www.videoindexer.ai), you should see the region (may be trial, WestUS etc.)
video_indexer_key = 'REPLACE'          # See Profile in Video Indexer Developer Portal: https://api-portal.videoindexer.ai/developer

#######  END CONFIGURATION SECTION

# Get authorization token for Video Indexer API
auth_uri = 'https://api.videoindexer.ai/auth/{}/Accounts/{}/AccessToken'.format(video_indexer_region,video_indexer_account_id)
auth_params = {'allowEdit':'true'}
auth_header = {'Ocp-Apim-Subscription-Key': video_indexer_key}
auth_token = requests.get(auth_uri,headers=auth_header,params=auth_params).text.replace('"','')

# Get indexed results for Video/Audio file
check_uri = 'https://api.videoindexer.ai/{}/Accounts/{}/Videos/{}/Index'.format(video_indexer_region,video_indexer_account_id,video_indexer_video_id)

check_header = { 'x-ms-client-request-id': '', }
check_params = {
    'accessToken':auth_token,
    #'language': '{string}',  #Note this could be used to translate a transcript into another language set to 'en-US' or 'es-ES'
    #'reTranslate': 'False',
    #'includeStreamingUrls': 'True'
    }

try:
    response = requests.get(check_uri, headers=check_header, params=check_params)
    response_body = response.json()

    # Consider any status other than 2xx an error
    if not response.status_code // 100 == 2:
        print("Error: {} {}".format(response.status_code, response_body))
    else:
        if response_body.get('state') == 'Processed':  #The video has been indexed and transcript should be available
            for video in response_body["videos"]:
                #print("Video id: {} insights are: {}".format(video.get('id'), video.get('insights')))
                for transcript in video["insights"]["transcript"]:
                    #print(transcript)  #use this to view all transcript data available
                    print("Line {}: {}-{} Speaker#{} in Language {} - {}".format(transcript.get('id'),transcript.get('instances')[0]["start"], transcript.get('instances')[0]["end"], transcript.get('speakerId'), transcript.get('language'), transcript.get('text')))
        else:
            print('Video Id: {} has state: {}.'.format(response_body.get('id'), response_body.get('state')))

except requests.exceptions.RequestException as e:
    # A serious problem happened, like an SSLError or InvalidURL
    print("Error: {}".format(e))