spectroplot/data_reader.py at main · physicien/spectroplot · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
#!/usr/bin/python3

import sys                          #sys files processing
import re                           #regex
from pathlib import Path            #path processing (replace os)
from global_constants import specstring_start, specstring_end

class SpectrumData(object):
    """
    Object which will contain the spectrum data extracted from the
    orca.out/experiment.asc/orca.spectrum/orca.spectrum.rootX file.
    """

    def __init__(self,path):

        self.path = path
        self.name = self.read_name()
        self.filetype = self.read_ext()
        self.rootnumber = self.read_root()
        self.data = self.read_data()


    def read_name(self):
        name = Path(Path(self.path).stem).stem
        return name

    def read_ext(self):
        extlist = Path(self.path).suffixes
        ext = "".join(extlist)
        return ext

    def read_root(self):
        fext = self.filetype
        if re.search(".spectrum.root\d+",fext):
            return int(next(re.finditer(r'\d+$',fext)).group(0))
        else:
            return int(0)

    def read_out_abs(self):
        #check for uv data in orca.out
        found_uv_section=False
        energylist=list()       #energy cm-1
        intenslist=list()       #fosc
        with open(self.path,'r') as file:
            for line in file:
                #detect ORCA version
                if "Program Version" in line:
                    version=re.search("\d\.\d\.\d",line)[0]
                    if int(version[0]) < 6:
                        l1,l2=1,3
                    else:
                        l1,l2=4,7
                #start extract text
                if specstring_start in line:
                    #found UV data in orca.out
                    found_uv_section=True
                    for line in file:
                        if specstring_end in line:
                            #stop extract text
                            break
                        #only recognize lines that start with number
                        #split line into 3 lists mode, energy, intensities
                        #line should start with a number
                        if re.search("\d\s+\d",line):
                            energylist.append(float(line.strip().split()[l1]))
                            intenslist.append(float(line.strip().split()[l2]))
                    else:
                        continue    # executed if the inner loop didn't break
                    break           # executed if the inner loop did break

        #no UV data in orca.out -> exit here
        if not found_uv_section:
            print(f"'{specstring_start}'" + "not found in" +f"'{self.path}'")
            sys.exit(1)

        #return data from orca.out
        return energylist,intenslist

    def read_out(self):
        return self.read_out_abs()  # Temp solution before adding IR

    def read_asc(self):
        wavelengthlist=list()
        intenslist=list()
        with open(self.path,'r') as file:
            for line in file:
                #start extract text
                wavelengthlist.append(float(line.strip().split()[0]))
                intenslist.append(float(line.strip().split()[1]))

        #return data from experiment.asc
        return wavelengthlist,intenslist

    def read_spectrum(self):
        energylist=list()
        intenslist=list()
        with open(self.path,'r') as file:
            for line in file:
                #start extract text
                if re.search("\d\s+\d",line):
                    energylist.append(float(line.strip().split()[0]))
                    intenslist.append(float(line.strip().split()[1]))

        #return data from experiment.asc
        return energylist,intenslist

    def read_data(self):
        fpath = self.path
        fext = self.filetype
        if fext == '.out':
            try:
                xlist,ylist = self.read_out()
            #file not found -> exit here
            except IOError:
                print(f"'{fpath}'" + " not found")
                sys.exit(1)

        elif fext == '.asc':
            try:
                xlist,ylist = self.read_asc()
            #file not found -> exit here
            except IOError:
                print(f"'{fpath}'" + " not found")
                sys.exit(1)

        elif fext == '.spectrum' or re.search(".spectrum.root\d+",fext):
            try:
                xlist,ylist = self.read_spectrum()
            #file not found -> exit here
            except IOError:
                print(f"'{fpath}'" + " not found")
                sys.exit(1)

        else:
            print(r"warning! The file %s couldn't be opened." % fpath)
            sys.exit(1)

        return [xlist,ylist]