%% Antes de processar este arquivo LaTeX (LaTeX2e) deve
%% verificar que o arquivo TEMA.cls estah no mesmo
%% diretorio. O arquivo TEMA.cls pode ser obtido do
%% endereco www.sbmac.org.br/tema.

\documentclass{TEMA}

%\usepackage[brazil]{babel}      % para texto em Português
\usepackage[english]{babel}    % para texto em Inglês

%\usepackage[latin1]{inputenc}   % para acentuação em Português
%\input{P-margin.inf}

\usepackage[dvips]{graphics}
\usepackage{subfigure}
\usepackage{graphicx}
\usepackage{epsfig}

\usepackage{url}
\usepackage{times, amsmath, amsfonts, amssymb, amsthm}
\setcounter{MaxMatrixCols}{11}

% \newcommand{\B}{{\tt\symbol{92}}}
% \newcommand{\til}{{\tt\symbol{126}}}
% \newcommand{\chap}{{\tt\symbol{94}}}
% \newcommand{\agud}{{\tt\symbol{13}}}
% \newcommand{\crav}{{\tt\symbol{18}}}

\newcommand{\R}{\mathbb{R}}
\newcommand{\F}{\mathbb{F}}
\newcommand{\Z}{\mathbb{Z}}
\newcommand{\E}{\mathbb{E}}
\newcommand{\G}{\mathbb{G}}
\newcommand{\B}{\mathbb{B}}
\newcommand{\N}{\mathbb{N}}
\newcommand{\LL}{\mathbb{L}}
\newcommand{\M}{\mathbb{M}}
\newcommand{\A}{\mathbf{A}}
\newcommand{\X}{\mathbf{X}}
\newcommand{\Y}{\mathbf{Y}}
\newcommand{\FU}{\mathcal{F}(U)}
\newcommand{\FV}{\mathcal{F}(V)}
\newcommand{\vetx}{\mathbf{x}}
\newcommand{\vety}{\mathbf{y}}
\newcommand{\vetz}{\mathbf{z}}
\newcommand{\ima}{\mathbf{a}}
\newcommand{\imb}{\mathbf{b}}
\newcommand{\imc}{\mathbf{c}}
\newcommand{\ims}{\mathbf{s}}
\newcommand{\impulse}{\mathbf{i}_{\mathbf{h},v}}
\newcommand{\PX}{\mathcal{P}(\mathbf{X})}

\newcommand{\bb}{\begin{equation}}
\newcommand{\ee}{\end{equation}}
\newcommand{\bbb}{\begin{eqnarray}}
\newcommand{\eee}{\end{eqnarray}}
\newcommand{\benu}{\begin{enumerate}}
\newcommand{\eenu}{\end{enumerate}}
\newcommand{\tr}{\mbox{tr}}

\newcommand{\score}[1]{\mathcal{S}(#1)}

\newcommand{\vetw}{{\bf w}}
\newcommand{\vetn}{{\bf n}}
\newcommand{\tn}{\,\mathrm{t}\,}
\newcommand{\sn}{\,\mathrm{s}\,}
\newcommand{\ag}{\,\mathrm{a}\,}
\newcommand{\bpm}{\begin{bmatrix}}
\newcommand{\epm}{\end{bmatrix}}
\newcommand{\alphav}{\mbox{\boldmath$\alpha$}}
\newcommand{\thetav}{\mbox{\boldmath$\theta$}}
\newcommand{\lambdav}{\mbox{\boldmath$\lambda$}}
\newcommand{\gammav}{\mbox{\boldmath$\gamma$}}
\newcommand{\varthetav}{\mbox{\boldmath$\vartheta$}}
\newcommand{\betav}{\mbox{\boldmath$\beta$}}
\newcommand{\phiv}{\mbox{\boldmath$\phi$}}
\newcommand{\Phiv}{\mbox{\boldmath$\Phi$}}

\begin{document}

%********************************************************
\title
    {On the Criteria for Receiving a Research Productivity Fellowship from the Brazilian National Council for Scientific and Technological Development in Mathematics and Statistics\thanks{}}
%\thanks{This work was supported in part by Funda\c{c}\~ao Arauc{\'a}ria/SETI and by CNPq under grant no. 304240/2011-7.}}

\author{}
%     {M.E. VALLE. %
%      Department of Mathematics, State University of Londrina.\\ CEP 86051-990, Londrina - PR, Brazil. E-mail: valle@uel.br
%      \\ \\
%      F. SAKURAY. %
%      Department of Computer Science, State University of Londrina. \\CEP 86051-990, Londrina - PR, Brazil. E-mail:sakuray@uel.br}

\criartitulo

\runningheads{}{}% {M.E. Valle and F. Sakuray}{On the Criteria for Receiving a Research Productivity Fellowship from CNPq}

% \begin{abstract}
% {\bf Resumo}. Este documento, preparado usando-se a classe
% especial {\em TEMA.cls}, fornece algumas informações importantes
% para os autores que pretendem submeter trabalhos (artigos)
% completos para a série TEMA.
% 
% {\bf Palavras-chave}. Palavra-chave 1, palavra-chave 2,
% palavra-chave 3.
% \end{abstract}


\begin{abstract}
{\bf Abstract.} The fellowship of research productivity (PQ) granted by the national council for scientific and technological development (CNPq), besides the financial support, renders a significant status among Brazilian researchers of all areas of knowledge. Consequently, both the profile and the criteria for holding a PQ fellowship become of interest to the entire Brazilian scientific community. In this paper, we model the decision criteria as a weighted sum of the scientific production and the supervisory experience of an applicant for PQ fellowship. The scientific production is measured as the number of publications grouped according to  the QUALIS system provided by the Brazilian federal agency for the improvement of higher education (CAPES). The Lattes curricula of the successful applicants for the PQ fellowship in the field of mathematics and statistics, along with the curricula of many non-PQ fellows of similar institutions, were used to estimate the criteria adopted in the last call for PQ 
fellowship in category 2. By allowing a certain tolerance, the model reproduced the decision criteria within acceptable bounds using the previous and the current QUALIS systems over a database composed of 234 curricula. As a consequence, the model may help to decide whether a researcher applicant is worthy to receive a PQ fellowship. Furthermore, it confirmed the recognized value of publications in journals with an elevated rank in the QUALIS system.

{\bf Keywords:} Scientific production, research productivity fellowship, mathematics and statistics, support vector machines, quadratic programming problem. 
% CNPq, qualis system, apoio a pesquisa, produção científica, maquinas de vetor de suporte.
\end{abstract} 

\newsec{Introduction}

The {\em national council for scientific and technological development} (CNPq -- Conselho Nacional de Desenvolvimento Cient\'ifico e Tecnol\'ogico, in Portuguese), linked to the Brazilian {\em ministry of science and technology}, is one of the major public institution for the support of science, technology, and innovation in Brazil. Besides funds for research projects, grants for the purchasing of equipment, and programs for the production of scientific knowledge and the establishment of research networks, CNPq also offers fellowships in Brazil and abroad. In particular, the so-called {\em fellowship of research productivity} (PQ) have been conceived in the 1970s as a way to encourage researchers with outstanding scientific production in their fields. 
Currently, Brazilian researchers of all areas of knowledge desire the PQ fellowship due to the status that comes with it \cite{arruda09,oliveira11,santos10}. 
As a consequence, both the profile and the criteria for holding a PQ fellowship becomes of interest to the entire scientific community. 
%Some accounts on science in Brazil can be found in \cite{coutinho12,helene11,leite11,leta06,petherick10,regalado10}.

The profile and the scientific production of research productivity fellows in various areas of knowledge, including communication science \cite{toffoli11}, medicine \cite{barata03,oliveira11,oliveira12,scarpelli08}, chemistry \cite{santos10}, and zoology \cite{neves07}, have been investigated in the last years. However, as far as we know, there is no study concerning the profile and the production of PQ fellows in the field of mathematics and statistics (the two disciplines are evaluated by the same council at CNPq). Also, we noted that most papers in the literature provide demographic or statistical information -- such as the mean scientific production or the distribution based on gender, region, or institution -- of the research productivity fellows. In contrast, this paper provides a quantitative study on the criteria for receiving a PQ fellowship. Specifically, we provide a rule which helps to decide whether a researcher applicant is worthy to receive a PQ fellowship in mathematics and statistics.  

First of all, we would like to recall that the fellowship of research productivity is divided in two categories, called PQ1 and PQ2. The first category is subdivided in four levels, which are referred to as A, B, C, and D, while the category PQ2 has no subdivisions. Non-fellow applicants are eligible only for the second category and, therefore, we will focus only on the criteria for receiving a PQ2 fellowship.

Accordingly to Appendix I of the document that specifies the rules for individual fellowship of CNPq, the resolution RN-016/2006\footnote{Website of CNPq. Available at: \url{http://www.cnpq.br}. Accessed on May, 2012.}, the criteria for receiving a PQ2 fellowship must comprise, besides the evaluation of a research project by ad-hoc referees, the scientific production as well as the development of human resources in the last five years. Evidently, the scientific production should be relevant to the disciplines of mathematics and statistics. In order to measure the relevance of a publication, we classified the scientific production of a researcher according to the QUALIS system provided by the {\em Brazilian federal agency for the improvement of higher education} (CAPES -- Coordena\c{c}\~ao de Aperfei\c{c}oamento de Pessoal de N\'ivel Superior, in Portuguese). 

The QUALIS system have been created to assess the scientific production of graduate programs in Brazilian institutions and it is used, in part, to manage the distribution of financial resources. In general terms, journals are ranked into the classes A1, A2, B1, B2, B3, B4, B5, and C, where A1 and C correspond respectively to the higher and lower ranks in the QUALIS system\footnote{Website of CAPES. Available at \url{http://www.capes.gov.br/avaliacao/qualis}, Accessed on May and June, 2012.}. Moreover, the QUALIS is based on information collected by CAPES, which is concerned with the scientific production of graduate programs. As a consequence, a journal that have not published an article linked to the scientific production of a graduate program may not be ranked in the QUALIS yet. In this case, we attributed the label ``N'', which refers to a non-classified journal.

Before June 2012, the QUALIS system in the disciplines of mathematics and statistics comprehended the scientific production of graduate programs from 2007 to 2009. The previous QUALIS was based on the {\em impact factor} and the {\em cited half-live} in {\em Thomson Reuters Journal Citation Reports}. However, in view of some nonexistent or inadequate indexes, it also considered subjective information available at the journal website \cite{Qualis2009}. %Also, in order to measure the nature and relevance of the applicability of a scientific production, the previous QUALIS in mathematics and statistics also provided a label which distinguished journals in the discipline of mathematics, referred to as {\em core}, from the others \cite{Qualis2009}. 
There was no journal classified as C in the previous QUALIS system.

Currently, besides the impact factor and the cited half-live, the QUALIS ranking also takes into account the {\em article influence score} (AIS) provided by {\em Thomson Reuters} \cite{Qualis2012}. As before, subjective information such as the editorial board, aims and scope of the journal, and samples of articles has been adopted in case of nonexistent impact factor or cited half-live index. %The label ``core'', for journals specific to the discipline of mathematics, have been removed from the new QUALIS ranking.

In sum, let us suppose that we can attribute to each applicant a score based on the scientific production and the formation of human resources since 2007. The scientific production is measured according to the number of papers published in each class of the QUALIS in mathematics and statistics, including the non-classified group. 
%The total number of publications classified as core is also considered if the previous QUALIS system is used to rank the scientific production. 
The formation of human resources is measured according to the number of masters and doctors which received their degrees under the supervision of the applicant. All the necessary information can be obtained from the Lattes curriculum of a researcher, which is publicly available at the Lattes platform maintained by CNPq. 

In mathematical terms, the score of a researcher is given by the following weighted sum
\bbb \score{\vetx} =& \alpha_{A1} x_{A1}+ \alpha_{A2} x_{A2}+ \alpha_{B1} x_{B1}+ \alpha_{B2} x_{B2}+ \alpha_{B3} x_{B3}+ \alpha_{B4} x_{B4} & \nonumber \\ & + \alpha_{B5} x_{B5} + \alpha_{C} x_{C} + \alpha_{N} x_{N} + \alpha_{M} x_{M}+ \alpha_{D} x_{D}, & \label{eq:scores_w} \eee
where $x_{A1}, x_{A2}, x_{B1}, x_{B2}, x_{B3}, x_{B4}, x_{B5}, x_{C}$, and $x_{N}$ denote respectively the number of papers published in journals classified as $A1$, $A2$, $B1$, $B2$, $B3$, $B4$, $B5$, $C$, and journals not listed in the QUALIS system. The values $x_{M}$ and $x_{D}$ correspond respectively to the number of masters and doctors supervised by the applicant. Alternatively, \eqref{eq:scores_w} can be written compactly as follows where $\vetx=[x_{A1},x_{A2},\ldots,x_{D}]^T$ and $\alphav=[\alpha_{A1}, \alpha_{A2},\ldots,\alpha_{D}]^T$ are column vectors in $\R^{11}$.
\bb  \score{\vetx} = \alphav^T \vetx, \label{eq:scores} \ee 

The score $\score{\vetx}$ yields, in some sense, a quantitative measure of the worthiness of an applicant $\vetx$ to receive a PQ2 fellowship in mathematics and statistics.
Specifically, an objective decision criteria can be formulated as follows where $\theta$ denotes the threshold for receiving a PQ2 fellowship:
\bb \begin{cases}
     \mbox{The applicant $\vetx$ receives a PQ2 fellowship if }\, \score{\vetx} \geq \theta,  \\
     \mbox{The applicant $\vetx$ does not receive a PQ2 fellowship if}\, \score{\vetx} < \theta. \\
    \end{cases}
\label{eq:decision}
\ee
The next section discusses the methodology used to estimate both the weight vector $\alphav$ and the threshold $\theta$. An estimation of the decision criteria adopted by the administrative council for the mathematical and statistical sciences of the CNPq in the last call for PQ2 fellowship is given in Section \ref{sec:results} In this paper, we consider the previous as well as the the current QUALIS systems. A discussion of the results is given subsequently in Section \ref{sec:discussion} The paper finishes with some concluding remarks in Section \ref{sec:conclusion} This final section also provides a soft version of the decision criteria given by \eqref{eq:decision}. 
%We would like to point out that the reader who is primarily interested in the results may choose to skip Section \ref{sec:material} or read it later.

\newsec{Material and Methods} \label{sec:material}

Shortly after the publication of the list of researchers who won a PQ2 fellowship in the beginning of 2012, we collected the Lattes curricula of the 64 successful applicants. Moreover, for each name in the list, we collected the Lattes curricula of at least 2 members of the same institution which do not hold a fellowship of research productivity. In order to avoid a biased experiment, the non-PQ fellows have been selected without any specific criteria by an undergraduate student of computer science. Indeed, our non-mathematician student collected some curricula with no scientific production since 2007. Because a non-productive academic will hardly apply for a PQ2 fellowship, we removed these Lattes curricula from our database and collected some others. 

Summarizing, we elaborated a database comprising 234 Lattes curricula, in which 64 correspond to successful applicants for PQ2 fellowship in 2012 and the remaining 170 curricula represent non-PQ fellows with at least one scientific paper published since 2007. This database have been processed by a software implemented in the {\tt python} programming language. Briefly, the software gathers both the scientific production and the formation of human resources from a Lattes curriculum. The scientific production is ranked either using the previous or the current QUALIS system. In both cases, the software also adds a label for the successful applicants. 

In mathematical terms, the software yielded a set $\{(\vetx_1,d_1), (\vetx_2,d_2),\ldots,(\vetx_{234},d_{234})\}$, where $\vetx_\xi=[x_{\xi,A1},\ldots,x_{\xi,D}]^T \in \R^{11}$ and $d_\xi \in \{-1,+1\}$ for all $\xi=1,\ldots,234$. The components of $\vetx_\xi$ correspond respectively to the numbers of publications sorted according to the groups $A1, A2, B1, B2, B3, B4, B5, C$, and $N$, followed by the number of masters and doctors under the supervision of the researcher $\vetx_\xi$. We have $d_\xi=+1$ if $\vetx_\xi$ corresponds to a successful PQ2 fellowship applicant and $d_\xi=-1$ if $\vetx_\xi$ represents a non-PQ fellow.

In principle, a decision criteria based on \eqref{eq:scores} and \eqref{eq:decision} should satisfy the following inequalities for all $\xi=1,2,\ldots,234$:
\bb \label{ineq1} \begin{cases} \alphav^T \vetx_\xi - \theta \geq +\rho \quad \mbox{ if } \quad d_\xi=+1,\\
     \alphav^T \vetx_\xi - \theta \leq -\rho \quad \mbox{ if } \quad d_\xi=-1,
    \end{cases} \ee
where $\rho>0$ denotes the margin of separation between the two classes: the class of successful applicants and the class of non-PQ fellows. Furthermore, the best decision criteria is given by the weight vector $\alphav$ and the threshold $\theta$ that maximizes $\rho$. This problem is known in the literature as the linear {\em support vector machine} (SVM) \cite{haykin09,vapnik98,vapnik99}.

We would like to remark that the two inequalities in \eqref{ineq1} can be combined as follows
\bb \label{ineq2} d_\xi \left( \vetw^T \vetx_\xi + b\right) \geq 1, \quad \forall \, \xi=1,\ldots,234,\ee
where $\vetw =\alphav/\rho$ and $b=-\theta/\rho$. Also, it is not hard to show that maximizing the margin of separation $\rho$ is equivalent to minimizing the Euclidean norm of the weight vector $\vetw= [w_1,\ldots,w_{11}]^T \in \R^{11}$ \cite{haykin09}. Thus, in theory, the optimal decision criteria based on \eqref{eq:scores} and \eqref{eq:decision} can be obtained by solving the quadratic programming problem:
\begin{equation} \label{eq:svm}
\left\{ \begin{aligned}
& \underset{\vetw}{\text{minimize}}
& & \frac{1}{2} \vetw^T \vetw \\
& \text{subject to}
& &  d_\xi (\vetw^T \vetx_\xi+b) \geq 1, \quad \forall \xi=1,\ldots,234.
\end{aligned} \right.
\end{equation}

In practice, however, the quadratic problem \eqref{eq:svm} does not admit a solution given the Lattes curricula database. Precisely, no weight vector $\vetw$ and threshold $b$ satisfy all the constraints in \eqref{eq:svm}. This remark confirms that the decision criteria given by \eqref{eq:scores} and \eqref{eq:decision} have not been strictly adopted by the administrative council for the mathematical sciences at CNPq. Indeed, on one hand, the administrative council for the mathematical sciences takes into account the regularity of the scientific production. Also, young researchers with potential for leadership are considered by this council. On the other hand, the quantitative model given by \eqref{eq:scores} and \eqref{eq:decision} does not take into account these subjective or hard to measure criteria. Furthermore, we have not considered the merit of the research project that must be submitted by a PQ fellowship applicant. In fact, we implicitly assumed that the research project of all applicants have been 
approved by the ad-hoc referees.

In view of the remarks in the preceding paragraph, let us introduce non-negative variables $s_1,s_2,\ldots,s_{234}$, called {\em slack variables} in the literature, and replace the constraints \eqref{ineq2} by 
\bb \label{ineq3} d_\xi \left( \vetw^T \vetx_\xi + b\right) \geq 1 - s_\xi, \quad \forall \, \xi=1,\ldots,234.\ee
Now, the goal is to find a weight vector $\vetw$ and a threshold $b$ for which the classification error, averaged over all the Lattes curricula in our database, is minimized. Formally, the quadratic programming problem \eqref{eq:svm} is substituted by
\begin{equation} \label{eq:svm2}
\left\{ \begin{aligned}
& \underset{\vetw,\mathbf{s}}{\text{minimize}}
& & \frac{1}{2} \vetw^T \vetw + \frac{1}{234} \sum_{\xi=1}^{234} s_\xi\\
& \text{subject to}
& &  d_\xi (\vetw^T \vetx_\xi+b) \geq 1 - s_\xi, \quad \forall \xi=1,\ldots,234,\\
& \text{and}& & \mathbf{s}=[s_1,s_2,\ldots,s_{234}]^T \in \R_+^{234},
\end{aligned} \right.
\end{equation}
where $\R_+$ denotes the set of non-negative real numbers. 

We solved the quadratic problem \eqref{eq:svm2} using the {\em optimized cutting plane algorithm} (OCAS) proposed by Franc and Sonnenburg \cite{franc09}, which is free available at the website of the {\em shogun machine learning toolbox}\footnote{Available at: \url{http://www.shogun-toolbox.org/}. Accessed on May, 2012.} \cite{sonnenburg10} for the {\tt GNU Octave}\footnote{Available at: \url{http://www.gnu.org/software/octave/}. Accessed on May, 2012.}. Also, in order to simplify the interpretation of the results, we defined $\theta=100$ and $\alphav=- \theta \vetw^*/b^*$, where $\vetw^*$ and $b^*$ denote the optimal solutions of \eqref{eq:svm2}.

% \bb \left\{ \begin{tabular}{lr} 
%  & \min \frac{1}{2} \vetw^T \vetw \\
% \mbox{s.t.} & d_\xi (\vetw^T \vetx_\xi+b) \geq 1, \quad \forall \xi=1,\ldots,234. \end{tabular} \right. \ee

\newsec{Results} \label{sec:results}

The quadratic problem \eqref{eq:svm2} yielded the threshold $\theta=100$ and the weight vector
$\alphav=[\alpha_{A1},\alpha_{A2},\ldots,\alpha_{D}]^T$ shown in Table \ref{tab:alpha} as an estimation of the decision criteria for a PQ2 fellowship in the beginning of 2012. 
Using the weights and threshold obtained from the previous QUALIS system, the score number based on the objective criteria given by \eqref{eq:decision} resulted in 29 misclassified curricula, which corresponds to an error of 12.4\%. Similarly, the objective criteria obtained using the current QUALIS yielded an error of 12.8\%, i.e. 30 misclassified curricula. Figures \ref{fig:hist2009} and \ref{fig:hist2012} display the distribution of the scores of both successful PQ2 applicants and non-PQ fellows using respectively the previous and current QUALIS ranking.

\begin{table}
\begin{center}
 \begin{tabular}{||c|cccccc||} \hline \hline
 QUALIS & A1 & A2 & B1 & B2 & B3 & B4 \\ \hline
 Previous: & 42.48 &  21.66  & 16.18  & -3.64  &  0.89 & -13.34   \\
 Current: & 35.94 &  14.27 &  11.89  & -8.46  &  9.82  &  0.24   \\
\hline \hline
 \end{tabular}
 \begin{tabular}{||c|ccccc||} \hline \hline
 QUALIS & B5 & C & N  & M & D  \\ \hline
 Previous: & 5.24 &  --- &  -8.40  &  6.97 &   5.59   \\
 Current: & -11.66 &  43.84  & -3.45 &   8.70 &   4.31  \\
\hline \hline
 \end{tabular}
\end{center}
 \caption{Weights for computing the score of a researcher in the field of mathematics and statistics based on the previous and current QUALIS.} \label{tab:alpha}
\end{table}

According to the model based on the current QUALIS, 12 non-PQ researchers are worthy to become PQ2 fellows since their scores are greater than 100. Also, 18 applicants would not be contemplated with a fellowship because their scores are less than 100. Precisely, on one side there are 6 non-PQ fellows with score between 100 and 150. On the other side there are 12 successful applicants with score between 50 and 100. Similar numbers have been obtained using the previous QUALIS system. These remarks show that the administrative council may have had some problem to decide who deserved the PQ2 fellowship. 
%Also, we may suppose that approximately 10\% of the total number of applicants have competitive curricula. 
As we shall see below, the classification errors decrease considerably if we allow some level of tolerance. 

% \begin{figure}[t]
% \begin{center}
%   \resizebox{\columnwidth}{!}{\input{m-files/histogram1}}
%   \caption{Distribution of the scores of successful PQ2 applicants (dark-gray) and non-PQ fellows (gray).} \label{fig:hist}
% \end{center}
% \end{figure}

\begin{figure}[t]
\begin{center}
% \includegraphics[width=0.9\columnwidth]{Analise2009/histogram2009.png}
 \includegraphics[width=1\columnwidth]{histogram2009}
\end{center}
\caption{Distribution of the scores of successful PQ2 applicants (dark-gray) and non-PQ fellows (gray) based on the previous QUALIS.} \label{fig:hist2009}
\end{figure}

\begin{figure}[t]
\begin{center}
 %\includegraphics[width=0.9\columnwidth]{Analise2012/histogram2012.png}
  \includegraphics[width=1\columnwidth]{histogram2012}
\end{center}
\caption{Distribution of the scores of successful PQ2 applicants (dark-gray) and non-PQ fellows (gray) based on the current QUALIS.} \label{fig:hist2012}
\end{figure}

First, we would like to point out that we have collected the Lattes curricula approximately 6 month after they haven been analyzed by the administrative council of CNPq. Some curricula certainly have been modified during this period of time. Also, we may assume that the administrative council will surely consider a publication in an influential journal, such as {\em Nature} or {\em Science}. However, we computed the score of a researcher using the QUALIS system and, thus, it is possible that such influential journal have not been classified in the field of mathematics and statistics yet. As a consequence, the score of the applicant may deviate 50 points, which corresponds approximately to the difference between the largest and the lowest weights. If we allow a tolerance of 50 points in the criteria \eqref{eq:decision} based on the previous QUALIS then only 7 curricula are misclassified, which corresponds to an error of 3.0\%. 
Similarly, a tolerance of 50 points in the criteria \eqref{eq:decision} based on the current QUALIS yields an error of 5.1\%. Specifically, 6 non-PQ fellows are worthy to receive a PQ2 fellowship while 6 successful applicants would not be contemplated with the PQ-fellowship. 
We would like to remark that we have not pursued an explanation for these misclassified curricula -- we only confirmed that the python software have counted correctly their scientific production as well as the formation of human resources. Concluding, we believe that the score-based model reproduced the PQ2 decision criteria within acceptable bounds using either the previous or the current QUALIS systems.

\newsec{Discussion} \label{sec:discussion}

Let us now turn our attention to the values of the weights of the score of a researcher. Precisely, based on our {\em a posteriori} analysis, we will formulate some conjectures on the criteria for receiving a PQ2 fellowship in the field of mathematics and statistics using the previous or the current QUALIS sytems.

\subsection{The Criteria Based on the Previous QUALIS}

First, note that the largest weight shown in Table \ref{tab:alpha} is attained for the scientific papers classified as $A1$ in the previous QUALIS system. Indeed, three $A1$ papers are sufficient to obtain a score greater than 100. As expected, the second and third largest weights refer to the number of papers classified as $A2$ and $B1$. However, a $A1$-paper corresponds approximately to two $A2$-papers or four $B1$-papers. In other words, the number of papers published in the groups $A1$, $A2$, and $B2$ are weighted approximately in a geometric progression with common ratio 1/2.

The weights $\alpha_{B2}$, $\alpha_{B4}$ and $\alpha_{N}$ are negative while the remaining weights, including $\alpha_{B3}$ and $\alpha_{B5}$, are positive. Therefore, the criteria adopted by the administrative council for PQ2 fellowship is not totally compatible with the ranking of journals in the previous QUALIS system, which is implicitly based on the inequalities $\alpha_{B1} \geq \alpha_{B2} \geq \alpha_{B3} \geq \alpha_{B4} \geq \alpha_{B5} \geq \alpha_{N}$.
%Therefore, journals which are classified as $B2$, $B4$ or are not listed in the QUALIS system are not recommended for those who intent to maintain or receive a PQ2 fellowship. 
%In particular, since $\alpha_{B4}=-15.18$, we conclude that the administrative council of mathematics and statistics do not look kindly upon publications on journals with low impact factor or low cited half-life. In contrast, the weight $\alpha_{B3}$ is close to zero and, thus, publication on journals classified as $B3$ do not contribute significantly for holding a PQ2 fellowship. In addition, the weight $\alpha_{B5}=7.4$, besides being positive, it is approximately $\alpha_{B1}/2$. We believe that this weight value reflects an incentive for publishing in Brazilian journals.

The weight $\alpha_{N}=-7.21$ shows that an undistinguished publication -- specially if it refers to an unknown journal not related to the disciplines of mathematics and statistics -- is not favored for holding a PQ2 fellowship. Indeed, we observed that applicants with many papers classified as $N$ published in journals of disciplines such as education, health and agrarian sciences. %Notwithstanding, since $\alpha_{O}=2.84$, there is no significant discrimination between journals specific to the discipline of mathematics (core) and journals that cover statistics or applied mathematics (non-core). 

The supervisory experience in master and doctor programs have similar weights.
In fact, a supervision in a graduate program contribute to the score
approximately as much as a paper published in a journal classified as B5 in the previous QUALIS
system. This remark should not discourage researchers to collaborate on graduate
programs. Indeed, doctoral studies, as well as some master supervisions, usually
result in publications which will possibly increase the score of a researcher.

Finally, we owe the reader an explanation on the negative scores of some non-PQ fellows displayed in Figure \ref{fig:hist2009}. 
One one hand the score of a researcher that published only one paper in a journal classified as $B4$ is -15.21. On the other hand, a researcher with no scientific production has score $0$, which is greater than the score of the former. Erroneously, we may think that publishing in a journal classified as $B4$ is worse than not publishing. 
This conclusion is wrong because the weights have been determined by minimizing the
classification error over the entire curricula database. As a 
consequence, the weights $\alpha_{B2}, \alpha_{B4}$, and $\alpha_{N}$ are negative in order
to discriminate successful PQ2 applicants from non-PQ fellows. For example,
consider two researchers of the same institution characterized respectively
by the following vectors and labels on our database (since no journal are ranked as C in the previous QUALIS, we omitted the component $x_{\xi,C}$ of the vectors):
$$
\vetx_{21} = \big[ \underbrace{0}_{A1},
\underbrace{2}_{  A2}, 
\underbrace{0}_{  B1},  
\underbrace{2}_{  B2},  
\underbrace{4}_{  B3},  
\underbrace{2}_{  B4},  
\underbrace{1}_{  B5},  
\underbrace{0}_{  N},  
\underbrace{3}_{  M}, 
\underbrace{3}_{  D} \big]^T, \; d_{21}=-1,
$$ 
and
$$
\vetx_{22} = \big[
\underbrace{2}_{  A1},  
\underbrace{1}_{  A2},  
\underbrace{1}_{  B1},  
\underbrace{1}_{  B2},  
\underbrace{0}_{  B3},  
\underbrace{0}_{  B4},  
\underbrace{0}_{  B5},  
\underbrace{1}_{  N},  
\underbrace{2}_{  M}, 
\underbrace{4}_{  D} \big]^T, \; d_{22}=+1.
$$ 
The first researcher has a total of 11 papers -- in which 10 are classified as core and all of them have been
published in journals ranked in the previous QUALIS system -- but he is not a PQ-fellow ($d_{21}=-1$).
In contrast, the second researcher has 6 papers published  --
about half of the scientific production of the first researcher -- and he
won a PQ2 fellowship. Accordingly, these two researchers have respectively the
scores $\score{\vetx_{21}}=68.45 < 100$ and $\score{\vetx_{22}}=156.94 \geq 100$.
The distinction between the two researchers follow from the fact that the
former has many papers published in journals classified as $B2, B3, B4$, and
$B5$ while the latter published mainly on journals ranked as $A1$, $A2$, and
$B1$. Nevertheless, both researchers have a significant number of publications. 

\subsection{The Criteria Based on the Current QUALIS}

In contrast to the previous QUALIS, the largest weight in Table \ref{tab:alpha} is attained by the journals classified as C in the current QUALIS system. We believe that the curious value of $\alpha_{C}$ resulted from the fact that only four researchers in our database have papers classified as $C$. These researchers are characterized by the following vectors and labels:
\begin{eqnarray*}
\vetx_{43} = \big[ \underbrace{0}_{A1},  
\underbrace{0}_{  A2},
\underbrace{0}_{  B1},  
\underbrace{0}_{  B2},  
\underbrace{1}_{  B3},  
\underbrace{0}_{  B4},  
\underbrace{0}_{  B5},  
\underbrace{1}_{  C}, 
\underbrace{1}_{  N}, 
\underbrace{0}_{  M}, 
\underbrace{0}_{  D} \big]^T, \; d_{39}=-1, \\
\vetx_{94} = \big[ \underbrace{0}_{A1},  
\underbrace{0}_{  A2},
\underbrace{0}_{  B1},  
\underbrace{0}_{  B2},  
\underbrace{0}_{  B3},  
\underbrace{0}_{  B4},  
\underbrace{0}_{  B5},  
\underbrace{1}_{  C}, 
\underbrace{2}_{  N}, 
\underbrace{0}_{  M}, 
\underbrace{0}_{  D} \big]^T, \; d_{88}=-1, \\
\vetx_{169} = \big[ \underbrace{0}_{A1},
\underbrace{1}_{  A2},
\underbrace{8}_{  B1},  
\underbrace{0}_{  B2},  
\underbrace{0}_{  B3},  
\underbrace{0}_{  B4},  
\underbrace{0}_{  B5},  
\underbrace{1}_{  C}, 
\underbrace{1}_{  N}, 
\underbrace{0}_{  M}, 
\underbrace{0}_{  D} \big]^T, \; d_{162}=+1,
\end{eqnarray*}
and
\begin{equation*}
\vetx_{213} = \big[ \underbrace{1}_{A1},  
\underbrace{1}_{  A2},
\underbrace{1}_{  B1},  
\underbrace{0}_{  B2},  
\underbrace{0}_{  B3},  
\underbrace{0}_{  B4},  
\underbrace{0}_{  B5},  
\underbrace{1}_{  C}, 
\underbrace{0}_{  N}, 
\underbrace{4}_{  M}, 
\underbrace{0}_{  D} \big]^T, \; d_{206}=+1.
\end{equation*}
Moreover, their scores are $\score{\vetx_{39}}=50.21, \score{\vetx_{88}}=36.93, \score{\vetx_{162}}=149.79$, and $\score{\vetx_{206}}=140.73$. If the weight $\alpha_C$ is replaced by $\tilde{\alpha}_C=3.2$, we obtain the scores $\tilde{\mathcal{S}}(\vetx_{39})=90.85,   \tilde{\mathcal{S}}(\vetx_{88})=77.57,   \tilde{\mathcal{S}}(\vetx_{162})=109.15$, and $ \tilde{\mathcal{S}}(\vetx_{206})=100.09$. Note that the four researchers are correctly classified using $\tilde{\alpha}_C$. However, the decision criteria based on the score $\tilde{\mathcal{S}}$ has a lower margin of separation between the groups of successful applicants and non-PQ fellows. Thus, although the weight $\alpha_{C}=43.84$ maximizes the margin of separation of the two classes, it may represent an artificial situation since it can be replaced by $\tilde{\alpha}_C = 3.2$. 

Excluding the weight $\alpha_C$, the largest weights in Table \ref{tab:alpha} are respectively $\alpha_{A1}$, $\alpha_{A2}$, and $\alpha_{B1}$. Furthermore, such as in the previous QUALIS, $\alpha_{A1}$ is about twice the weight $\alpha_{A2}$. However, the weights $\alpha_{A2}$ and $\alpha_{B1}$ are very similar in the current QUALIS. 

Note that the weights $\alpha_{B2}$, $\alpha_{B5}$, and $\alpha_{N}$ are negative while $\alpha_{B3}$ and $\alpha_{B4}$ are positive. Thus, analogously to the previous discussion, the criteria adopted by the administrative council is not totally compatible with the ranking of journals in the current QUALIS. At this point, we would like to call the reader's attention to the variations on the weights $\alpha_{B3}$, $\alpha_{B4}$, and $\alpha_{B5}$. Apparently, the classes $B3$, $B4$, and $B5$ have been messed up from the previous to the current QUALIS. Notwithstanding, we conclude that the administrative council of mathematics and statistics does not look kindly upon publications on journals with low impact factor, low cited half-life, or low AIS. 

Finally, note that the supervisory experience in a master program weights about two doctor supervisions in the current QUALIS.
Also, the supervision of a master contribute to the score approximately as much as a paper published in a journal classified as B3. 


% \bb
% \mathbf{p}_{21} = \bpm 0 &  2  &  2  &  4  &  8 &  10 &  11  \\ \epm^T 
% \quad \mbox{and} \quad
% \mathbf{p}_{22} = \bpm 2 &  3  &  4  &  5  &  5 &  5 &  6  \\ \epm^T,
% \ee 


\newsec{Conclusion} \label{sec:conclusion}

In this paper, we modeled the decision criteria for receiving a PQ2 fellowship from CNPq in the field of mathematics and statistics. The model is based on a weighted sum of the number of publications, classified according to either the previous and current QUALIS systems, and the supervisory experience in graduate programs. The weights were obtained by solving a quadratic programing problem whose objective was to minimize the classification error subject to the information available at the Lattes curricula of both successful PQ2 applicants and non-PQ fellows. By allowing a certain tolerance, the model yielded errors of 3.0\% and 5.1\% for the previous and current QUALIS, respectively. Briefly, the criteria based on the weights shown in Table \ref{tab:alpha} can be translated into the following rule, which corresponds to a soft version of \eqref{eq:decision}:
\bb \begin{cases}
     \mbox{If }\, 150 < \score{\vetx}\, \mbox{then the applicant $\vetx$ is worth to receive a PQ2 fellowship},  \\
     \mbox{If }\, 50 \leq \score{\vetx} \leq 150\, \mbox{then it is possible that the applicant receives a PQ2 fellowship},\\
     \mbox{If }\, \score{\vetx} < 50\, \mbox{then the applicant $\vetx$ is not worth to receive a PQ2 fellowship}. \\
    \end{cases}
\label{eq:decision_soft}
\ee

The solution of the quadratic programming problem revealed that the merit of a PQ2 applicant is determined mainly by the scientific production on the groups $A1, A2$, and $B1$. Furthermore, a paper published in a journal classified as $A1$ weights about two papers published in a journal $A2$, in both QUALIS systems. Also, it is hard to formulate a formal statement concerning journals classified as $B3$, $B4$ and $B5$ in view of the variations on the weights $\alpha_{B3}, \alpha_{B4}$, and $\alpha_{B5}$ from the previous to the current QUALIS. In other words, it is not clear which is the contribution of journals with low impact factor, low cited half-life, or low article influence score (AIS) to receive a PQ2 fellowship.

%Furthermore, the number of paper in each of these groups is weighted approximately in geometric progression with common ration 1/2. In addition, a supervision in a graduate program -- master or doctorate -- contribute as much as a $B5$ paper. 
%Publication on journals classified as $B2$, $B4$ or not listed in the QUALIS system are not encouraged for those who intent to hold a PQ2 fellowship.

In the future, we plan to investigate the temporal variations of the weights used to compute the score of a PQ2 applicant. 
Also, we encourage researchers of other disciplines to perform a similar study. In fact, since the threshold for receiving a PQ2 fellowship can be fixed at 100, the score number can be adopted in many situations to compare the productivity of researchers of different areas of knowledge.

% \section*{Acknowledgment}
% 
% We would like to thank Cezar Bastos Filho for collecting the Lattes curricula. 

% \bibliographystyle{abbrv}  
% \bibliography{/media/MARCOS/references}

\begin{thebibliography}{10}

\bibitem{arruda09}
D.~Arruda, F.~Bezerra, V.~A. Neris, P.~R. de~Toro, and J.~Wainer.
\newblock Brazilian computer science research: Gender and regional
  distributions.
\newblock {\em Scientometrics}, 79(3):651--6, 2009.

\bibitem{barata03}
R.~B. Barata and M.~Goldbaum.
\newblock Perfil dos pesquisadores com bolsa de produtividade em pesquisa do
  {CNPq} da {\'a}rea de sa{\'u}de coletiva.
\newblock {\em Cadernos de Sa{\'u}de P{\'u}blica}, 19(6):1863--1876, 2003.

\bibitem{franc09}
V.~Franc and S.~Sonnenburg.
\newblock Optimized cutting plane algorithm for large-scale risk minimization.
\newblock {\em Journal of Machine Learning Research}, 10:2157--2192, Oct. 2009.

\bibitem{Qualis2012}
N.~Garcia and L.~Casado.
\newblock {C}omunicado 003/2012 -- {\'a}rea de {M}atem{\'a}tica,
  {P}robabilidade e {E}stat{\'i}stica: Atualiza\c{c}{\~a}o do webqualis.
\newblock Available at:
  \url{http://www.capes.gov.br/images/stories/download/avaliacao/Qualis_-_Mate%
matica.pdf}, 2012.
\newblock Accessed on June, 2012.

\bibitem{haykin09}
S.~Haykin.
\newblock {\em Neural Networks and Learning Machines}.
\newblock Prentice-Hall, Upper Saddle River, NJ, 3rd edition edition, 2009.

\bibitem{oliveira12}
E.~A. {Oliveira}, E.~A. Colosimo, D.~R. Martelli, I.~G. Quirino, M.~C.~L.
  Oliveira, L.~S. Lima, A.~C.~S. Silva, and H.~Martelli-Junior.
\newblock Comparison of brazilian researchers in clinical medicine: are
  criteria for ranking well-adjusted?
\newblock {\em Scientometrics}, 90(2):429--443, 2012.

\bibitem{oliveira11}
E.~A. {Oliveira}, A.~L.~P. Ribeiro, I.~G. Quirino, M.~C.~L. Oliveira, D.~R.
  Martelli, L.~S. Lima, E.~A. Colosimo, T.~J. Lopes, A.~C.~S. Silva, and
  H.~Martelli-Junior.
\newblock Profile and scientific production of {CNPq} researchers in
  cardiology.
\newblock {\em Arquivos Brasileiros de Cardiologia}, 97(3):186--193, 2011.

\bibitem{santos10}
N.~C. Santos, L.~F. C{\^a}ndido, and C.~L. Kuppens.
\newblock Produtividade em pesquisa do {CNPq}: An{\'a}lise do perfil dos
  pesquisadores da qu{\'i}mica.
\newblock {\em Qu{\'i}mica Nova}, 33(2):489--495, 2010.

\bibitem{scarpelli08}
A.~C. Scarpelli, F.~Sardenberg, D.~Goursand, S.~M. Paiva, and I.~A. Pordeus.
\newblock Academic trajectories of dental researchers receiving {CNPq}'s
  productivity grants.
\newblock {\em Brazilian Dental Journal}, 9(3):252--256, 2008.

\bibitem{neves07}
M.~A. {Silveira e Pereira Neves}, R.~C. Antunes, E.~S.~R. J{\'u}nior, and C.~Q.
  Gorgati.
\newblock Evolu\c{c}{\~a}o das bolsas de produtividade em pesquisa e dos
  editais universais do {CNPq} no programa b{\'a}sico de zootecnia: 2002 a
  2006.
\newblock {\em Revista Brasileira de Zootecnia}, 36(0):369--376, 2007.

\bibitem{sonnenburg10}
S.~Sonnenburg, G.~R{\"a}tsch, S.~Henschel, C.~Widmer, J.~Behr, A.~Zien,
  F.~de~Bona, A.~Binder, NewAuthor9, and V.~Franc.
\newblock The shogun machine learning toolbox.
\newblock {\em Journal of Machine Learning Research}, 11:1799--1802, Aug. 2010.

\bibitem{toffoli11}
G.~A. Toffoli and S.~M. S.~P. Ferreira.
\newblock Mapeamento da produ\c{c}{\~a}o cient{\'i}fica de pesquisadores
  brasileiros de ci{\^e}ncias da comunica\c{c}{\~a}o: per{\'i}odo de 2000 a
  2009.
\newblock {\em Psicologia {USP}}, 22(2):399--422, 2011.

\bibitem{vapnik98}
V.~N. Vapnik.
\newblock {\em Statistical Learning Theory}.
\newblock John Wiley and Sons, New York, NY, USA, 1998.

\bibitem{vapnik99}
V.~N. Vapnik.
\newblock {\em The Nature of Statistical Learning Theory}.
\newblock Springer, 2 edition, 1999.

\bibitem{Qualis2009}
M.~Viana and M.~Soares.
\newblock {D}ocumento de {\'a}rea 2009 -- {M}atem{\'a}tica, {P}robabilidade e
  {E}stat{\'i}stica.
\newblock Available at:
  \url{http://qualis.capes.gov.br/arquivos/avaliacao/webqualis/criterios2007_2%
009/Criterios_Qualis_2008_01.pdf}, 2009.
\newblock Accessed on May, 2012.

\end{thebibliography}

% \begin{abstract}
% {\bf Abstract}. This document, which was prepared using the class
% file {\em TEMA.cls}, provides some important information for the
% authors who intend to submit papers for TEMA.
% \end{abstract}


\end{document}
\newpage
$ \  \  $  \thispagestyle{myheadings}  \markboth{      }{   }
