import itertools
import pandas as pd
import numpy as np
# Import the necessaries libraries
import plotly.offline as pio
import plotly.graph_objs as go
import plotly.express as px
# [jdf] no need to specify the renderer but, for interactive use, init_notebook should be called
# pio.renderers.default = "jupyterlab"
# Set notebook mode to work in offline
# pio.init_notebook_mode()
# pio.init_notebook_mode(connected=True)
width = 750
import sqlite3
#### Data Preparation
# static data – codes -> names etc.
staticdata = dict(
decision = {
'C': 'Confer vs. cond Accept', # relevant for the 2020 and 2021 data have a different meaning
'A': 'Accept', # for the 2020 data
'A2': 'Accept', # after the second round, should be 120 in 2022
'R': 'Reject', # reject after the first round -- should be 322 in 2022
'R2': 'Reject in round 2', # reject after the second round -- should be 2 in 2022
'R-2nd': 'Reject in round 2',
'DR-S': 'Desk Reject (Scope)', # should be 7 in 2022
'DR-P': 'Desk Reject (Plagiarism)', # should be 4 in 2022
'AR-P': 'Admin Reject (Plagiarism)', # should be 1 in 2022
'DR-F': 'Desk Reject (Format)', # should be 4 in 2022
'R-Strong': 'Reject Strong', # cannot resubmit to TVCG for a year
'T': 'Reject TVCG fasttrack', # Explicitly invited to resubmit to TVCG, status in major revision
},
FinalDecision = { # Just flatten to Accept and Reject
'C': 'Accept',
'A': 'Accept', # for the 2020 data
'A2': 'Accept', # after the second round, should be 120 in 2022
'R': 'Reject', # reject after the first round -- should be 322 in 2022
'R2': 'Reject', # reject after the second round -- should be 2 in 2022
'R-2nd': 'Reject',
'DR-S': 'Reject', # should be 7 in 2022
'DR-P': 'Reject', # should be 4 in 2022
'AR-P': 'Reject', # should be 1 in 2022
'DR-F': 'Reject', # should be 4 in 2022
'R-Strong': 'Reject',
'T': 'Reject',
},
area = {
'T&E': 'Theoretical & Empirical',
'App': 'Applications',
'S&R': 'Systems & Rendering',
'R&I': 'Representations & Interaction',
'DTr': 'Data Transformations',
'A&D': 'Analytics & Decisions',
},
bid = {
0: 'no bid',
1: 'want',
2: 'willing',
3: 'reluctant',
4: 'conflict'
},
stat = {
'Prim': 'Primary',
'Seco': 'Secondary'
},
keywords = pd.read_csv("../data/2021/keywords.csv", sep=';'), # 2021 is correct as there was no new keywords file in 2022
colnames = {
'confsubid': 'Paper ID',
'rid': 'Reviewer',
'decision': 'Decision',
'area': 'Area',
'stat': 'Role',
'bid': 'Bid'
}
)
dbcon = sqlite3.connect('../data/vis-area-chair.db') #[jdf] assume data is in ..
submissions_raw20 = pd.read_sql_query('SELECT * from submissions WHERE year = 2020', dbcon, 'sid')
submissions_raw21 = pd.read_sql_query('SELECT * from submissions WHERE year = 2021', dbcon, 'sid')
submissions_raw22 = pd.read_sql_query('SELECT * from submissions WHERE year = 2022', dbcon, 'sid')
submissions_raw23 = pd.read_sql_query('SELECT * from submissions WHERE year = 2023', dbcon, 'sid')
submissions_raw24 = pd.read_sql_query('SELECT * from submissions WHERE year = 2024', dbcon, 'sid')
submissions_raw = pd.read_sql_query('SELECT * from submissions', dbcon, 'sid')
#print(submissions_raw24)
submissions = (submissions_raw
.join(
pd.read_sql_query('SELECT * from areas', dbcon, 'aid'),
on='aid'
)
.assign(Keywords = lambda df: (pd
.read_sql_query('SELECT * FROM submissionkeywords', dbcon, 'sid')
.loc[df.index]
.join(
pd.read_sql_query('SELECT * FROM keywords', dbcon, 'kid'),
on='kid'
)
.keyword
.groupby('sid')
.apply(list)
))
.assign(**{'# Keywords': lambda df: df.Keywords.apply(len)})
.assign(**{'FinalDecision': lambda df: df['decision']})
.replace(staticdata)
.rename(columns = staticdata['colnames'])
.drop(columns = ['legacy', 'aid'])
# .set_index('sid')
# .set_index('Paper ID')
# note -- I changed the index, since 'Paper ID' was not unique for multiple years.
# By not setting the index to 'Paper ID' the index remains with 'sid'.
# However, 'sid' is used as a unique index in the creation of the database anyways.
)
# replace the old 'Paper ID' with a unique identifier, so that the code from 2021 will work
submissions = submissions.rename(columns = {'Paper ID':'Old Paper ID'})
submissions.reset_index(inplace=True)
submissions['Paper ID'] = submissions['sid']
submissions = submissions.set_index('Paper ID')
#submissions colums: (index), sid (unique id), Paper ID (unique), Old Paper ID, Decision, year, Area, Keywords (as a list), # Keywords
all_years = submissions['year'].unique()
#rates_decision computes the acceptance rates (and total number of papers) per year
#rates_decision: (index), Decision, year, count, Percentage
rates_decision = (submissions
.value_counts(['Decision', 'year'])
.reset_index()
# .rename(columns = {0: 'count'})
)
rates_decision['Percentage'] = rates_decision.groupby(['year'])['count'].transform(lambda x: x/x.sum()*100)
rates_decision = rates_decision.round({'Percentage': 1})
#rates_decision computes the acceptance rates (and total number of papers) per year
#rates_decision: (index), Decision, year, count, Percentage
rates_decision_final = (submissions
.value_counts(['FinalDecision', 'year'])
.reset_index()
# .rename(columns = {0: 'count'})
)
rates_decision_final['Percentage'] = rates_decision_final.groupby(['year'])['count'].transform(lambda x: x/x.sum()*100)
rates_decision_final = rates_decision_final.round({'Percentage': 1})
#submissions
#bids_raw: (index), Reviewer ID, sid (unique paper identifier over mult years), match score, bid of the reviewer, role of the reviewer, Paper ID
bids_raw = (pd
.read_sql_query('SELECT * from reviewerbids', dbcon)
.merge(submissions_raw['confsubid'], on='sid')
.replace(staticdata)
.rename(columns = staticdata['colnames'])
)
#bids_raw
## Renaming Paper ID to Old Paper ID, setting Paper ID to sid, keeping all 3 for now...
bids_raw = bids_raw.rename(columns = {'Paper ID':'Old Paper ID'})
bids_raw['Paper ID'] = bids_raw['sid']
# bids = Reviewer, sid, Bid (how the reviewer bid on this paper)
# doesn't include review/sid that were not bid for [.query('Bid != "no bid"')]
bids = (bids_raw
.query('Bid != "no bid"')
# Paper ID is not unique over multiple years!
# .drop(columns = ['sid'])
# [['Reviewer','Paper ID', 'Bid']]
[['Reviewer','sid', 'Paper ID', 'Bid']]
.reset_index(drop = True)
)
# matchscores becomes a table to reviewer/sid with the match scores
# many of these will be "NaN" since we now have multiple years together.
# we need to check whether the reviewer IDs remain unique across the years!
matchscores = (bids_raw
# Paper ID is not unique over multiple years!
# [['Reviewer','Paper ID','match']]
[['Reviewer','sid','Paper ID','match']]
# Paper ID is not unique over multiple years!
# .set_index(['Reviewer', 'Paper ID'])
.set_index(['Reviewer', 'Paper ID'])
.match
.unstack(level=1)
)
# assignments = Reviewer, sid, Role (primary, secondary)
# doesn't include review/sid that were not assigned [.query('Role != ""')]
assignments = (bids_raw
.query('Role != ""')
# Paper ID is not unique over multiple years!
# [['Reviewer', 'Paper ID', 'Role']]
[['Reviewer', 'sid', 'Paper ID', 'Role']]
.reset_index(drop = True)
)
del dbcon
#### Plot Defaults
acc_template = go.layout.Template()
acc_template.layout = dict(
font = dict(
family='Fira Sans',
color = 'black',
size = 13
),
title_font_size = 14,
plot_bgcolor = 'rgba(255,255,255,0)',
paper_bgcolor = 'rgba(255,255,255,0)',
margin = dict(pad=10),
xaxis = dict(
title = dict(
font = dict( family='Fira Sans Medium', size=13 ),
standoff = 10
),
gridcolor='lightgray',
gridwidth=1,
automargin = True,
fixedrange = True,
),
yaxis = dict(
title = dict(
font = dict( family='Fira Sans Medium', size=13 ),
standoff = 10,
),
gridcolor='lightgray',
gridwidth=1,
automargin = True,
fixedrange = True,
),
legend=dict(
title_font_family="Fira Sans Medium",
),
colorway = px.colors.qualitative.T10,
hovermode = 'closest',
hoverlabel=dict(
bgcolor="white",
bordercolor='lightgray',
font_color = 'black',
font_family = 'Fira Sans'
),
)
acc_template.data.bar = [dict(
textposition = 'inside',
insidetextanchor='middle',
textfont_size = 12,
)]
px.defaults.template = acc_template
px.defaults.category_orders = {
'Decision': list(staticdata['decision'].values()),
'FinalDecision': list(staticdata['FinalDecision'].values()),
'Area': list(staticdata['area'].values()),
'Short Name': staticdata['keywords']['Short Name'].tolist(),
}
config = dict(
displayModeBar = False,
scrollZoom = False,
responsive = False
)