1
|
|
2
|
|
3
|
|
4
|
|
5
|
|
6
|
|
7
|
|
8
|
|
9
|
|
10
|
|
11
|
|
12
|
|
13
|
|
14
|
|
15
|
|
16
|
|
17
|
|
18
|
|
19
|
|
20
|
|
21
|
'''
|
22
|
This script writes/updates a table into a PostgreSQL Database
|
23
|
reading data and columns format directly from a FITS file.
|
24
|
|
25
|
The database can be local or on a remote server.
|
26
|
|
27
|
Its syntax is:
|
28
|
|
29
|
$ python ingest_dataset_from_FITS.py <file>.fits
|
30
|
|
31
|
@author: Alessandro NASTASI for IAS - IDOC
|
32
|
@date: 24/04/2015
|
33
|
'''
|
34
|
|
35
|
__author__ = "Alessandro Nastasi"
|
36
|
__credits__ = ["Alessandro Nastasi", "Karin Dassas"]
|
37
|
__license__ = "GPL"
|
38
|
__version__ = "1.0"
|
39
|
__date__ = "24/04/2015"
|
40
|
|
41
|
import psycopg2
|
42
|
import pyfits
|
43
|
|
44
|
import numpy as np
|
45
|
import os, sys, re, math
|
46
|
from time import time
|
47
|
|
48
|
class bcolors:
|
49
|
HEADER = '\033[95m'
|
50
|
OKBLUE = '\033[94m'
|
51
|
OKGREEN = '\033[92m'
|
52
|
WARNING = '\033[93m'
|
53
|
FAIL = '\033[91m'
|
54
|
ENDC = '\033[0m'
|
55
|
|
56
|
PSQL_FORMAT = {
|
57
|
'L' : 'boolean DEFAULT false',
|
58
|
|
59
|
|
60
|
'I' : 'integer DEFAULT (-1) NOT NULL',
|
61
|
'J' : 'real DEFAULT (- (1.6375E+30::numeric)::real)',
|
62
|
'K' : 'real DEFAULT (- (1.6375E+30::numeric)::real)',
|
63
|
'E' : 'real DEFAULT (- (1.6375E+30::numeric)::real)',
|
64
|
'D' : 'real DEFAULT (- (1.6375E+30::numeric)::real)',
|
65
|
'A' : 'character varying(1027)'
|
66
|
|
67
|
|
68
|
|
69
|
|
70
|
}
|
71
|
|
72
|
def convert_into_SQL_format(fits_format):
|
73
|
"""Convert from FITS to PSQL formats"""
|
74
|
formats = 'LXBIJKAEDCMPQ'
|
75
|
psql_format = ''
|
76
|
for char in formats:
|
77
|
if len(fits_format.split(char)) > 1:
|
78
|
psql_format = PSQL_FORMAT[char]
|
79
|
return psql_format
|
80
|
|
81
|
def RADECtoXYZ(RA,DEC):
|
82
|
"""Convert RA DEC pointing to X Y Z"""
|
83
|
|
84
|
RArad=math.radians(RA)
|
85
|
DECrad=math.radians(DEC)
|
86
|
X=math.cos(DECrad)*math.cos(RArad)
|
87
|
Y=math.cos(DECrad)*math.sin(RArad)
|
88
|
Z=math.sin(DECrad)
|
89
|
ResultXYZ=[X,Y,Z]
|
90
|
return ResultXYZ
|
91
|
|
92
|
dbname = "'<database_name>'"
|
93
|
|
94
|
if (len(sys.argv) > 1):
|
95
|
filename = sys.argv[1]
|
96
|
else:
|
97
|
print bcolors.WARNING + "\n\tSintax:\t$ python ingest_dataset_from_FITS.py <file>.fits\n" + bcolors.ENDC
|
98
|
os._exit(0)
|
99
|
|
100
|
host = raw_input("\n> Where is the dataset to update/create? (enter 0 to exit)\n\t- localhost [1]\n\t- remote server "+bcolors.WARNING+'<server_name>'+bcolors.ENDC+" [2]\n\t--> ")
|
101
|
choice = False
|
102
|
|
103
|
while not choice:
|
104
|
if host=='1':
|
105
|
|
106
|
user = "'postgres'"
|
107
|
host = "'localhost'"
|
108
|
pwd = "''"
|
109
|
choice = True
|
110
|
|
111
|
elif host=='2':
|
112
|
|
113
|
user = "'<username>'"
|
114
|
host = "'<server_name>'"
|
115
|
pwd = "''"
|
116
|
choice = True
|
117
|
|
118
|
elif host=='0':
|
119
|
print '\nExit.\n'; os._exit(0)
|
120
|
|
121
|
else:
|
122
|
print bcolors.WARNING+'\n!! Choice not valid !!'+ bcolors.ENDC
|
123
|
host = raw_input('\n\t> Please enter 1, 2 or 0: ')
|
124
|
|
125
|
fileInput = pyfits.open(filename)
|
126
|
fileExtension = filename.split('.')[1].strip()
|
127
|
|
128
|
input_mode = 'fits'
|
129
|
|
130
|
|
131
|
'''
|
132
|
if fileExtension == 'fits': input_mode = 'fits'
|
133
|
else: input_mode = 'csv'
|
134
|
'''
|
135
|
|
136
|
dataset = raw_input('\n> Please enter the name of the new dataset to create into the %s database: ' % dbname)
|
137
|
|
138
|
|
139
|
conn = psycopg2.connect("dbname="+dbname+" user="+user+" host="+host+" password="+pwd+"")
|
140
|
|
141
|
|
142
|
cur = conn.cursor()
|
143
|
|
144
|
|
145
|
if input_mode == 'fits':
|
146
|
print '\n- Reading/storing data from FITS table (it may take some time. Please wait...)'
|
147
|
data = fileInput[1].data
|
148
|
|
149
|
table_size = data.size
|
150
|
|
151
|
fields = (data.names)
|
152
|
fields_format = (data.formats)
|
153
|
data2D = [[data[field][i] for field in fields] for i in range(table_size)]
|
154
|
|
155
|
|
156
|
table = False
|
157
|
|
158
|
|
159
|
cur.execute("select exists(select * from information_schema.tables where table_name=%s)", (dataset,))
|
160
|
table = cur.fetchone()[0]
|
161
|
drop_cascade = 'n'
|
162
|
|
163
|
if table:
|
164
|
print '\n- Dataset already exists. Dropping it...'
|
165
|
try:
|
166
|
cur.execute("DROP TABLE "+dataset+";")
|
167
|
except:
|
168
|
|
169
|
print bcolors.WARNING+"\n>> Impossible to drop the table, possibly because other elements (e.g. VIEWS) depend on it. <<"+bcolors.ENDC
|
170
|
drop_cascade = raw_input("\n\t> Do you want to use the 'DROP ... CASCADE' option to delete them too? [y/n]: ")
|
171
|
if drop_cascade in 'YESyes1':
|
172
|
|
173
|
conn = psycopg2.connect("dbname="+dbname+" user="+user+" host="+host+" password="+pwd+"")
|
174
|
cur = conn.cursor()
|
175
|
cur.execute("DROP TABLE "+dataset+" CASCADE;")
|
176
|
print '\n\t- Dataset %s and all his dependencies successfully dropped.' % dataset
|
177
|
else:
|
178
|
print "\n- Exit.\n"; os._exit(0)
|
179
|
|
180
|
else:
|
181
|
|
182
|
print '\n- Table does not exist. A new one will be created...'
|
183
|
|
184
|
|
185
|
if ('RA' in fields) and ('DEC' in fields):
|
186
|
print '\n- Found RA and DEC. The Cartesian coordinates x,y,z will be computed and appended to the dataset as additional columns...'
|
187
|
fields.extend(['x','y','z'])
|
188
|
fields_format.extend(['E','E','E'])
|
189
|
xyz = [RADECtoXYZ(data['RA'][j], data['DEC'][j]) for j in range(len(data2D))]
|
190
|
data2D = np.column_stack( [ data2D , xyz ] )
|
191
|
|
192
|
|
193
|
fields_string_length = [len(field.split('A')) for field in fields_format]
|
194
|
|
195
|
print '\n- Creating/updating the table...\n'
|
196
|
createTable_cmd = "CREATE TABLE %s (id integer PRIMARY KEY" % dataset
|
197
|
for j, name in enumerate(fields):
|
198
|
|
199
|
createTable_cmd += ", %s %s" % (name, convert_into_SQL_format(fields_format[j]) )
|
200
|
|
201
|
createTable_cmd +=");"
|
202
|
cur.execute(createTable_cmd)
|
203
|
|
204
|
|
205
|
for i in range(table_size):
|
206
|
sys.stdout.write("- Filling the %sth row of the table...\r" % i)
|
207
|
sys.stdout.flush()
|
208
|
toExecute = "INSERT INTO %s (id " % dataset
|
209
|
for field in fields: toExecute += ", %s" % field
|
210
|
toExecute += ") VALUES (%s " % i
|
211
|
for j, field in enumerate(fields):
|
212
|
|
213
|
if fields_string_length[j]>1:
|
214
|
toExecute += ", '%s'" % data2D[i][j]
|
215
|
else:
|
216
|
if str(data2D[i][j]) == 'nan': toExecute += ", NULL"
|
217
|
else: toExecute += ", %s" % data2D[i][j]
|
218
|
|
219
|
toExecute +=")"
|
220
|
cur.execute(toExecute)
|
221
|
if i == table_size-1: sys.stdout.write("- Filling the %sth row of the table..." % i +bcolors.OKGREEN+"\t[OK]"+bcolors.ENDC+"\r")
|
222
|
|
223
|
|
224
|
conn.commit()
|
225
|
|
226
|
|
227
|
cur.close()
|
228
|
conn.close()
|
229
|
|
230
|
print "\n--> The dataset "+bcolors.OKGREEN+"'%s'" % dataset+bcolors.ENDC+" has been updated/created into the "+bcolors.OKBLUE+dbname+bcolors.ENDC+" database in host: "+bcolors.OKBLUE+host+bcolors.ENDC+".\n"
|
231
|
|