-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathcollaboration.py
More file actions
128 lines (107 loc) · 6.2 KB
/
collaboration.py
File metadata and controls
128 lines (107 loc) · 6.2 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
import requests
import json
import time
# Istituzioni pugliesi dal tuo filtro (lineage normalizzata)
PUGLIA_SHORT_IDS = [
"I68618741", "I142910587", "I106296451", "I5561750",
"I4210108634", "I4210166808", "I4210144761", "I4210099368",
"I4210148743", "I4210131270", "I4210117053", "I4210122493"
]
BASE_URL = "https://api.openalex.org/works"
# Filtro originale (rimosso grant per velocità e stabilità)
QUERY_FILTER = (
"title_and_abstract.search:\"blue economy\" OR \"blue growth\" OR \"ocean economy\" OR \"marine economy\" "
"OR \"economia blu\" OR \"crescita blu\" OR \"crescita del blu\" OR fisheries OR fishing OR aquaculture "
"OR mariculture OR \"fish farming\" OR seafood OR \"marine biotechnology\" OR microalgae OR macroalgae "
"OR seaweed OR algae OR \"offshore wind\" OR \"marine renewable energy\" OR \"wave energy\" OR \"tidal energy\" "
"OR \"blue energy\" OR desalination OR \"seawater desalination\" OR \"marine spatial planning\" OR \"blue carbon\" "
"OR seagrass OR posidonia OR benthic OR \"marine ecology\" OR \"marine biodiversity\" OR \"maritime transport\" "
"OR shipping OR \"port infrastructure\" OR shipbuilding OR \"ship repair\" OR \"naval architecture\" OR boatbuilding "
"OR yacht* OR \"coastal management\" OR \"coastal erosion\" OR shoreline OR \"coastal zone\" OR \"coastal tourism\" "
"OR \"marine tourism\" OR \"nautical tourism\" OR \"cruise tourism\" OR \"microplastic*\" OR \"marine littering\" "
"OR \"marine pollution\" OR \"port\","
"authorships.countries:IT,publication_year:2014-2024,"
"authorships.institutions.lineage:i68618741|i142910587|i106296451|i5561750|i4210108634|i4210166808|i4210144761|i4210099368|i4210148743|i4210131270|i4210117053|i4210122493,"
"authorships.institutions.lineage:!i4210155236"
)
def run_analysis():
cursor = "*"
all_works = []
collab_map = {} # { id_puglia: { 'name': ..., 'partners': { id_partner: data } } }
print("Interrogazione OpenAlex in corso...")
while cursor:
params = {
"filter": QUERY_FILTER,
"select": "id,title,authorships",
"per_page": 200,
"cursor": cursor
}
print(params)
response = requests.get(BASE_URL, params=params).json()
results = response.get("results", [])
if not results: break
all_works.extend(results)
for work in results:
authorships = work.get("authorships", [])
# Identifica attori pugliesi nel lavoro
puglia_presenti = []
for auth in authorships:
pos = auth.get("author_position")
# Definizione Utente: IN = First/Last, OUT = Middle
tipo_collab = "IN" if pos in ["first", "last"] else "OUT"
for inst in auth.get("institutions", []):
# Estrae ID pulito dalla lineage (es. I123)
lineage = [l.split("/")[-1].upper() for l in inst.get("lineage", [])]
if any(pid in lineage for pid in PUGLIA_SHORT_IDS):
puglia_presenti.append({
"id": inst.get("id"),
"name": inst.get("display_name"),
"tipo": tipo_collab
})
# Analisi incrociata con i partner
for p_actor in puglia_presenti:
p_id = p_actor["id"]
if p_id not in collab_map:
collab_map[p_id] = {"name": p_actor["name"], "partners": {}}
for auth in authorships:
paese = auth.get("countries", ["N/A"])
for inst in auth.get("institutions", []):
c_id = inst.get("id")
if c_id == p_id: continue # Salta se stesso
if c_id not in collab_map[p_id]["partners"]:
c_lineage = [l.split("/")[-1].upper() for l in inst.get("lineage", [])]
is_puglia = any(pid in c_lineage for pid in PUGLIA_SHORT_IDS)
collab_map[p_id]["partners"][c_id] = {
"nome": inst.get("display_name"),
"ambito": "Inter-regionale" if is_puglia else "Extra-regionale",
"paese": paese[0] if paese else "N/A",
"count_IN": 0, # Leadership Puglia
"count_OUT": 0 # Supporto Puglia
}
collab_map[p_id]["partners"][c_id][f"count_{p_actor['tipo']}"] += 1
cursor = response.get("meta", {}).get("next_cursor")
print(f"Scaricati {len(all_works)} lavori...")
time.sleep(0.1)
# Salvataggio Risultati
with open("dataset_completo.json", "w", encoding="utf-8") as f:
json.dump(all_works, f, indent=4, ensure_ascii=False)
with open("mappa_collaborazioni.json", "w", encoding="utf-8") as f:
json.dump(collab_map, f, indent=4, ensure_ascii=False)
# Stampa Analisi Finale
print(f"\nAnalisi completata su {len(all_works)} articoli.")
for p_id, data in collab_map.items():
sum_in = sum(v['count_IN'] for v in data['partners'].values())
sum_out = sum(v['count_OUT'] for v in data['partners'].values())
print(f"\nISTITUZIONE: {data['name']}")
print(f"Totale Collaborazioni IN (Puglia First/Last): {sum_in}")
print(f"Totale Collaborazioni OUT (Puglia Middle): {sum_out}")
print(f"{'Partner':<45} | {'Ambito':<18} | {'Tipo':<6} | {'Paese':<6} | {'Tot'}")
print("-" * 90)
# Ordina per volume di collaborazione
sorted_partners = sorted(data['partners'].items(),
key=lambda x: x[1]['count_IN'] + x[1]['count_OUT'], reverse=True)
for c_id, d in sorted_partners[:15]:
tot = d['count_IN'] + d['count_OUT']
pref_tipo = "IN" if d['count_IN'] >= d['count_OUT'] else "OUT"
print(f"{d['nome'][:43]:<45} | {d['ambito']:<18} | {pref_tipo:<6} | {d['paese']:<6} | {tot}")
run_analysis()